コード例 #1
0
def nms(class_pred, box_pred, coef_pred, proto_out, anchors, cfg):
    class_p = class_pred.squeeze()  # [19248, 81]
    box_p = box_pred.squeeze()  # [19248, 4]
    coef_p = coef_pred.squeeze()  # [19248, 32]
    proto_p = proto_out.squeeze()  # [138, 138, 32]

    class_p = class_p.transpose(1, 0).contiguous()  # [81, 19248]

    # exclude the background class
    class_p = class_p[1:, :]
    # get the max score class of 19248 predicted boxes
    class_p_max, _ = torch.max(class_p, dim=0)  # [19248]

    # filter predicted boxes according the class score
    keep = (class_p_max > cfg.nms_score_thre)
    class_thre = class_p[:, keep]
    box_thre = decode(box_p[keep, :], anchors[keep, :])
    coef_thre = coef_p[keep, :]

    if class_thre.shape[1] == 0:
        return None, None, None, None, None
    else:
        box_thre, coef_thre, class_ids, class_thre = fast_nms(
            box_thre, coef_thre, class_thre, cfg)

        return class_ids, class_thre, box_thre, coef_thre, proto_p
コード例 #2
0
    def forward(self, loc_data, conf_data, priors):

        #         loc_data   = prediction[:,:,:4]
        #         conf_data  = prediction[:,:,4:]

        num_priors = priors.shape[0]
        batch_size = loc_data.shape[0]

        output = np.zeros(shape=(batch_size, self.num_classes, self.top_k, 5),
                          dtype=np.float32)

        conf_preds = conf_data.swapaxes(2, 1)

        for i in range(batch_size):
            decoded_boxes = decode(loc=loc_data[i],
                                   priors=priors,
                                   variances=self.variances)

            conf_scores = conf_preds[i].copy()

            for cl in range(1, self.num_classes):
                c_mask = np.greater(conf_scores[cl], self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                scores = np.float32(scores)

                if scores.shape[0] == 0:
                    continue

                l_mask = c_mask.reshape(-1, 1).repeat(4, axis=-1)
                boxes = decoded_boxes[l_mask].reshape(-1, 4).astype(np.float32)
                #                 print(boxes.shape)

                #                boxes     = torch.from_numpy(boxes).float()
                #                scores    = torch.from_numpy(scores).float()
                #
                #                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                ids, count = non_maximum_supression(boxes=boxes,
                                                    scores=scores,
                                                    overlap=self.nms_thresh,
                                                    top_k=self.top_k)

                #                 print(ids.shape)
                #                 print(count)
                ids = np.int32(ids)
                count = np.int32(count)

                scores = scores[ids[:count]]
                scores = np.expand_dims(scores, axis=1)

                output[i, cl, :count] = np.concatenate(
                    (scores, boxes[ids[:count]]), axis=-1)

#         flt = output.ascontiguousarray().reshape(batch_size, -1, 5)
#         idx  = np.argsort(flt[:,:,0], axis=-1)
#         rank = np.argsort(idx, axis=-1)

#         flt[rank < self.top_k].ex

        return output
コード例 #3
0
ファイル: test.py プロジェクト: tpatel0409/ssd-pytorch
def test(ssd, data, default_boxes):
    """ Execute test on one image
        then perform NMS algorithm
        then rescale boxes back to normal size

    Args:
        ssd: trained SSD model
        img: Torch array of shape (1, 3, H, W)
        default_boxes: Torch array of shape (num_default, 4)

    Returns:
        img: numpy array of (3, H, W)
        all_boxes: final boxes, numpy array of (num_boxes, 4)
        all_scores: final scores, numpy array of (num_boxes,)
        all_names: final class names, numpy array of (num_boxes,)
    """
    img, _, _ = data
    img = img.to(device)
    default_boxes = default_boxes.to(device)
    with torch.no_grad():
        out_confs, out_locs = ssd(img)
    out_confs = out_confs.squeeze(0)
    out_locs = out_locs.squeeze(0)
    out_boxes = decode(default_boxes, out_locs)
    out_labels = F.softmax(out_confs, dim=1)

    all_boxes = []
    all_scores = []
    all_names = []

    for c in range(1, NUM_CLASSES):
        cls_scores = out_labels[:, c]
        score_idx = cls_scores > args.score_thresh
        cls_boxes = out_boxes[score_idx]
        cls_scores = cls_scores[score_idx]

        box_idx = compute_nms(cls_boxes, cls_scores, args.nms_thresh,
                              args.max_num_boxes_per_class)

        cls_boxes = cls_boxes[box_idx]
        cls_scores = cls_scores[box_idx]
        cls_names = [c] * cls_boxes.size(0)

        all_boxes.append(cls_boxes)
        all_scores.append(cls_scores)
        all_names.extend(cls_names)

    all_boxes = torch.cat(all_boxes, dim=0)
    all_scores = torch.cat(all_scores, dim=0)

    img = img.squeeze(0).cpu().numpy()
    all_boxes *= img.shape[-1]
    all_boxes = all_boxes.cpu().numpy()
    all_scores = all_scores.cpu().numpy()
    all_names = np.array(all_names)

    return img, all_boxes, all_scores, all_names
コード例 #4
0
    def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t,
                             priors):
        """
        Instead of using softmax,
        use class[0] to be p(obj) * p(IoU) as in YOLO.
        Then for the rest of the classes, softmax them and
        apply CE for only the positive examples.
        """

        conf_t = conf_t.view(-1)  # [batch_size*num_priors]
        conf_data = conf_data.view(
            -1, conf_data.size(-1))  # [batch_size*num_priors, num_classes]

        pos_mask = conf_t > 0
        neg_mask = conf_t == 0

        obj_data = conf_data[:, 0]
        obj_data_pos = obj_data[pos_mask]
        obj_data_neg = obj_data[neg_mask]

        obj_neg_loss = -F.logsigmoid(-obj_data_neg).sum()

        with torch.no_grad():
            pos_priors = (priors.unsqueeze(0).expand(
                batch_size, -1, -1).reshape(-1, 4)[pos_mask, :])

            boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors)
            boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors)

            iou_targets = elemwise_box_iou(boxes_pred, boxes_targ)

        obj_pos_loss = -iou_targets * F.logsigmoid(obj_data_pos) - (
            1 - iou_targets) * F.logsigmoid(-obj_data_pos)
        obj_pos_loss = obj_pos_loss.sum()

        conf_data_pos = (
            conf_data[:, 1:])[pos_mask]  # Now this has just 80 classes
        conf_t_pos = conf_t[pos_mask] - 1  # So subtract 1 here

        class_loss = F.cross_entropy(conf_data_pos,
                                     conf_t_pos,
                                     reduction="sum")

        return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss)
コード例 #5
0
    def __call__(self, predictions):
        """
        Args:
             loc_data: (tensor) Loc preds from loc layers
                Shape: [batch, num_priors, 4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch, num_priors, num_classes]
            mask_data: (tensor) Mask preds from mask layers
                Shape: [batch, num_priors, mask_dim]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [num_priors, 4]
            proto_data:
                (tensor) If using mask_type.lincomb, the prototype masks
                Shape: [batch, mask_h, mask_w, mask_dim]
        Returns:
            output of shape (batch_size, top_k, 1 + 1 + 4 + mask_dim)
            These outputs are in the order:
            class idx, confidence, bbox coords, and mask.

            Note that the outputs are sorted only if cross_class_nms is False
        """

        loc_data = predictions["loc"]
        conf_data = predictions["conf"]
        mask_data = predictions["mask"]
        prior_data = predictions["priors"]

        proto_data = predictions["proto"] if "proto" in predictions else None
        inst_data = predictions["inst"] if "inst" in predictions else None

        out = []

        with timer.env("Detect"):
            batch_size = loc_data.size(0)
            num_priors = prior_data.size(0)

            conf_preds = (conf_data.view(batch_size, num_priors,
                                         self.num_classes).transpose(
                                             2, 1).contiguous())

            for batch_idx in range(batch_size):
                decoded_boxes = decode(loc_data[batch_idx], prior_data)
                result = self.detect(batch_idx, conf_preds, decoded_boxes,
                                     mask_data, inst_data)

                if result is not None and proto_data is not None:
                    result["proto"] = proto_data[batch_idx]

                out.append(result)

        return out
コード例 #6
0
    def forward(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh,
                loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        self.num_classes = num_classes
        self.background_label = bkg_label
        self.top_k = top_k
        # Parameters used in nms.
        self.nms_thresh = nms_thresh
        if nms_thresh <= 0:
            raise ValueError('nms_threshold must be non negative.')
        self.conf_thresh = conf_thresh
        self.variance = cfg['variance']
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()
            # num_det = 0
            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
コード例 #7
0
def post_process(confs, locs, scores, default_boxes, mode=1):
    # confs = tf.squeeze(confs, 0)
    # locs = tf.squeeze(locs, 0)
    # i have o return the locs back
    # print(scores)
    # print(confs)
    # print(locs)
    print(tf.math.reduce_max(locs), tf.math.reduce_min(locs))
    newres = decode(default_boxes, locs).numpy()
    if (mode == 2):
        confs = tf.math.softmax(confs, axis=-1)
        classes = tf.math.argmax(confs, axis=-1)
        scores = tf.math.reduce_max(confs, axis=-1)

    out_boxes = []
    out_labels = []
    out_scores = []
    # print(confs.shape,classes.shape, scores.shape, boxes.shape)
    for c in range(1, NUM_CLASSES):
        if (mode == 1):
            cls_scores = np.zeros(np.shape(confs))
            cls_scores[confs == c] = confs[confs == c]
        else:
            cls_scores = confs[:, c]

        score_idx = cls_scores > 0.5
        # cls_boxes = tf.boolean_mask(boxes, score_idx)
        # cls_scores = tf.boolean_mask(cls_scores, score_idx)
        cls_boxes = newres[score_idx]
        cls_scores = cls_scores[score_idx]

        nms_idx = compute_nms(cls_boxes, cls_scores, 0.35, 200)
        cls_boxes = tf.gather(cls_boxes, nms_idx)
        cls_scores = tf.gather(cls_scores, nms_idx)
        cls_labels = [c] * cls_boxes.shape[0]

        out_boxes.append(cls_boxes)
        out_labels.extend(cls_labels)
        out_scores.append(cls_scores)

    out_boxes = tf.concat(out_boxes, axis=0)
    out_scores = tf.concat(out_scores, axis=0)

    boxes = tf.clip_by_value(out_boxes, 0.0, 1.0).numpy()
    classes = np.array(out_labels)
    scores = out_scores.numpy()

    return boxes, classes, scores
コード例 #8
0
    def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors,
                         masks):
        """ Crops the gt masks using the predicted bboxes,
        scales them down, and outputs the BCE loss. """
        loss_m = 0
        for idx in range(mask_data.size(0)):
            with torch.no_grad():
                cur_pos_idx = pos_idx[idx, :, :]
                cur_pos_idx_squeezed = cur_pos_idx[:, 1]

                # Shape: [num_priors, 4], decoded predicted bboxes
                pos_bboxes = decode(loc_data[idx, :, :], priors.data,
                                    cfg.use_yolo_regressors)
                pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1)
                pos_lookup = idx_t[idx, cur_pos_idx_squeezed]

                cur_masks = masks[idx]
                pos_masks = cur_masks[pos_lookup, :, :]

                # Convert bboxes to absolute coordinates
                num_pos, img_height, img_width = pos_masks.size()

                x1, x2 = sanitize_coordinates(pos_bboxes[:, 0],
                                              pos_bboxes[:, 2], img_width)
                y1, y2 = sanitize_coordinates(pos_bboxes[:, 1],
                                              pos_bboxes[:, 3], img_height)

                scaled_masks = []
                for jdx in range(num_pos):
                    tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]]

                    while tmp_mask.dim() < 2:
                        tmp_mask = tmp_mask.unsqueeze(0)

                    new_mask = F.adaptive_avg_pool2d(tmp_mask.unsqueeze(0),
                                                     cfg.mask_size)
                    scaled_masks.append(new_mask.view(1, -1))

                mask_t = (torch.cat(scaled_masks, 0).gt(0.5).float()
                          )  # Threshold downsampled mask

            pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :]
            loss_m += (F.binary_cross_entropy(
                torch.clamp(pos_mask_data, 0, 1), mask_t, reduction="sum") *
                       cfg.mask_alpha)

        return loss_m
コード例 #9
0
ファイル: net_utils.py プロジェクト: neilctwu/Workable_SSD
    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        # [バッチサイズN,クラス数21,トップ200件,確信度+位置]のゼロリストを作成
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        # 確信度を[バッチサイズN,クラス数,ボックス数]の順番に変更
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                # 確信度の閾値を使ってボックスを削除
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                # handbook
                if scores.size(0) == 0:
                    # handbook
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                # ボックスのデコード処理
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                # boxesからNMSで重複するボックスを削除
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
コード例 #10
0
    def __call__(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4] 
        """
        num = loc_data.size(0)
        num_priors = prior_data.size(0)
        conf_data = self.softmax(conf_data)

        conf_preds = conf_data.view(
            num, num_priors, self.num_classes).transpose(2, 1)
        batch_priors = prior_data.view(-1, num_priors,
                                       4).expand(num, num_priors, 4)
        batch_priors = batch_priors.contiguous().view(-1, 4)

        decoded_boxes = decode(loc_data.view(-1, 4),
                               batch_priors, self.variance)
        decoded_boxes = decoded_boxes.view(num, num_priors, 4)

        # output = torch.zeros(num, self.num_classes, self.top_k, 5)
        output = list()
        for i in range(num):
            boxes = decoded_boxes[i].clone()
            conf_scores = conf_preds[i].clone()
            c_mask = conf_scores[1].gt(self.conf_thresh)
            scores = conf_scores[1][c_mask]
            if scores.numel() == 0:
                continue
            l_mask = c_mask.unsqueeze(1).expand_as(boxes)
            boxes_ = boxes[l_mask].view(-1, 4)
            # ids, count = nms(boxes_, scores, self.nms_thresh, self.nms_top_k)
            # ids, count = nms_py(boxes_, scores, self.nms_thresh, self.nms_top_k)
            # count = count if count < self.top_k else self.top_k
            #ids = torch.tensor(ids)
            # if count >0:
            box_score = [boxes_.detach().numpy(),scores.detach().numpy()] #[boxes_[ids[:count]].detach().numpy(),scores[ids[:count]].detach().numpy()]
            output.append(box_score)
        return output
コード例 #11
0
    def forward(self, loc, conf, prior):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """

        #loc, conf = predictions

        loc_data = loc.data
        conf_data = conf.data
        prior_data = prior.data
        num = loc_data.size(0)  # batch size
        self.num_priors = prior_data.size(0)
        self.boxes = torch.zeros(1, self.num_priors, 4)
        self.scores = torch.zeros(1, self.num_priors, self.num_classes)
        if loc_data.is_cuda:
            self.boxes = self.boxes.cuda()
            self.scores = self.scores.cuda()

        if num == 1:
            # size batch x num_classes x num_priors
            conf_preds = conf_data.unsqueeze(0)

        else:
            conf_preds = conf_data.view(num, num_priors,
                                        self.num_classes)
            self.boxes.expand_(num, self.num_priors, 4)
            self.scores.expand_(num, self.num_priors, self.num_classes)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            conf_scores = conf_preds[i].clone()

            self.boxes[i] = decoded_boxes
            self.scores[i] = conf_scores

        return self.boxes, self.scores
コード例 #12
0
    def forward(self, loc_data, conf_data, prior_data, conf_thresh):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        batch_size = loc_data.size(0)
        num_priors = prior_data.size(0)
        output = torch.zeros(batch_size, self.num_classes, self.top_k, 5)
        if loc_data.is_cuda:
            output = output.cuda()
        conf_preds = conf_data.transpose(2, 1)  # group by classes

        # Decode predictions into bboxes.
        for i in range(batch_size):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(self.num_classes):
                c_mask = conf_scores[cl].gt(conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.dim() == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                thresholded_boxes = decoded_boxes[l_mask]
                if len(thresholded_boxes) > 0:
                    boxes = thresholded_boxes.view(-1, 4)
                    # idx of highest scoring and non-overlapping boxes per class
                    ids, count = nms(boxes, scores, self.nms_thresh,
                                     self.top_k)
                    output[i, cl, :count] = \
                        torch.cat((scores[ids[:count]].unsqueeze(1),
                                   boxes[ids[:count]]), 1)
        flt = output.contiguous().view(batch_size, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
コード例 #13
0
def predict(confs, locs, thresh, default_boxes):

    confs = tf.squeeze(confs, 0)
    locs = tf.squeeze(locs, 0)

    confs = tf.math.softmax(confs, axis=-1)
    boxes = decode(default_boxes, locs)

    out_boxes = []
    out_labels = []
    out_scores = []

    print('confs shape =', np.shape(confs))
    print('confs max =', np.amax(confs))
    print('confs min =', np.amin(confs))
    print('confs max index =',
          np.where(np.array(confs) == np.array(confs).max()))

    for c in range(1, num_classes):
        cls_scores = confs[:, c]

        score_idx = cls_scores > thresh
        cls_boxes = boxes[score_idx]
        cls_scores = cls_scores[score_idx]

        nms_idx = compute_nms(cls_boxes, cls_scores, 0.45, 200)
        cls_boxes = tf.gather(cls_boxes, nms_idx)
        cls_scores = tf.gather(cls_scores, nms_idx)
        cls_labels = [c] * cls_boxes.shape[0]

        out_boxes.append(cls_boxes)
        out_labels.extend(cls_labels)
        out_scores.append(cls_scores)

    out_boxes = tf.concat(out_boxes, axis=0)
    out_scores = tf.concat(out_scores, axis=0)

    boxes = tf.clip_by_value(out_boxes, 0.0, 1.0).numpy()
    classes = np.array(out_labels)
    scores = out_scores.numpy()

    return boxes, classes, scores
コード例 #14
0
def predict(imgs, default_boxes):
    confs, locs = ssd(imgs)

    confs = tf.squeeze(confs, 0)
    locs = tf.squeeze(locs, 0)

    confs = tf.math.softmax(confs, axis=-1)
    classes = tf.math.argmax(confs, axis=-1)
    scores = tf.math.reduce_max(confs, axis=-1)

    boxes = decode(default_boxes, locs)

    out_boxes = []
    out_labels = []
    out_scores = []

    for c in range(1, NUM_CLASSES):
        cls_scores = confs[:, c]

        score_idx = cls_scores > 0.6
        # cls_boxes = tf.boolean_mask(boxes, score_idx)
        # cls_scores = tf.boolean_mask(cls_scores, score_idx)
        cls_boxes = boxes[score_idx]
        cls_scores = cls_scores[score_idx]

        nms_idx = compute_nms(cls_boxes, cls_scores, 0.45, 200)
        cls_boxes = tf.gather(cls_boxes, nms_idx)
        cls_scores = tf.gather(cls_scores, nms_idx)
        cls_labels = [c] * cls_boxes.shape[0]

        out_boxes.append(cls_boxes)
        out_labels.extend(cls_labels)
        out_scores.append(cls_scores)

    out_boxes = tf.concat(out_boxes, axis=0)
    out_scores = tf.concat(out_scores, axis=0)

    boxes = tf.clip_by_value(out_boxes, 0.0, 1.0).numpy()
    classes = np.array(out_labels)
    scores = out_scores.numpy()

    return boxes, classes, scores
コード例 #15
0
    def forward(self, arm_loc_data, arm_conf_data, odm_loc_data, odm_conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [num_priors,4]
        """
        loc_data = odm_loc_data
        conf_data = F.softmax(odm_conf_data,dim=2)
        arm_conf_data = F.softmax(arm_conf_data,dim=2)

        arm_object_conf = arm_conf_data.data[:, :, 1:]
        no_object_index = arm_object_conf <= self.objectness_thre
        conf_data[no_object_index.expand_as(conf_data)] = 0

        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                   self.num_classes).transpose(2, 1)
        #conf_preds = conf_data.view(num,num_priors,self.num_classes)
        # Decode predictions into bboxes.
        if torch.cuda.is_available():
            prior_data.cuda()
        for i in range(num):
            default = decode(arm_loc_data[i], prior_data, self.variance)
            default = center_size(default)
            decoded_boxes = decode(loc_data[i], default, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()
            '''
            prior_conf_max,prior_conf_idx = conf_scores.max(1,keepdim=True)
            cls_mask = prior_conf_idx.gt(0)
            prior_conf_max = prior_conf_max[cls_mask]
            prior_conf_idx = prior_conf_idx[cls_mask]
            decoded_boxes = decoded_boxes[cls_mask]
            conf_mask = prior_conf_max.gt(self.conf_thresh)
            prior_conf_max = prior_conf_max[conf_mask]
            prior_conf_idx = prior_conf_idx[conf_mask]
            decoded_boxes = decoded_boxes[conf_mask]
            '''
            #print(decoded_boxes, conf_scores)
            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                #print(scores.dim())
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                #print(boxes.size(), scores.size())
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                ids = torch.tensor(ids,dtype=torch.long)
                if count ==0:
                    continue
                #print(count,ids[:count],torch.gather(scores,0,ids).data)
                #print(boxes[ids[:count]])
                #print('debug',scores[ids[:count]].size(),boxes[ids[:count]].size())
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].view(-1,1),
                               boxes[ids[:count]].view(-1,4)), 1)
        #flt = output.contiguous().view(num, -1, 5)
        #_, idx = flt[:, :, 0].sort(1, descending=True)
        #_, rank = idx.sort(1)                                             ############????????
        #flt[(rank < self.keep_top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        #print('fit',output.size())
        return output
コード例 #16
0
p_conf = (p_conf - np.float32(
    interpreter.get_output_details()[0]['quantization'][1])) * np.float32(
        interpreter.get_output_details()[0]['quantization'][0])
p_boxes = (p_boxes - np.float32(
    interpreter.get_output_details()[1]['quantization'][1])) * np.float32(
        interpreter.get_output_details()[1]['quantization'][0])

with open('./config.yml') as f:
    cfg = yaml.load(f)

try:
    config = cfg['SSD300']  #[args.arch.upper()]
except AttributeError:
    raise ValueError('Unknown architecture:')
default_boxes = generate_default_boxes(config)
newres = decode(default_boxes, p_boxes[0]).numpy()
conf = softmax(p_conf, -1)[0]
classes = np.argmax(conf, -1)


# sort ant filter to threshold > 0.5, top 400 dets
def det_sort_filt(boxes, conf, classes, topn=100, threshold=0.5):
    # one class
    conf = conf[:, 1:]
    scores = np.squeeze(conf)

    #filter 1  classes
    mask1 = classes == 1
    mask2 = scores >= threshold
    mask = np.logical_and(mask1, mask2)
    boxes = boxes[mask]
コード例 #17
0
    def lincomb_mask_loss(
        self,
        pos,
        idx_t,
        loc_data,
        mask_data,
        priors,
        proto_data,
        masks,
        gt_box_t,
        score_data,
        inst_data,
        interpolation_mode="bilinear",
    ):
        mask_h = proto_data.size(1)
        mask_w = proto_data.size(2)

        process_gt_bboxes = (cfg.mask_proto_normalize_emulate_roi_pooling
                             or cfg.mask_proto_crop)

        if cfg.mask_proto_remove_empty_masks:
            pos = pos.clone()

        loss_m = 0
        loss_d = 0  # Coefficient diversity loss

        for idx in range(mask_data.size(0)):
            with torch.no_grad():
                downsampled_masks = F.interpolate(
                    masks[idx].unsqueeze(0),
                    (mask_h, mask_w),
                    mode=interpolation_mode,
                    align_corners=False,
                ).squeeze(0)
                downsampled_masks = downsampled_masks.permute(1, 2,
                                                              0).contiguous()

                if cfg.mask_proto_binarize_downsampled_gt:
                    downsampled_masks = downsampled_masks.gt(0.5).float()

                if cfg.mask_proto_remove_empty_masks:
                    very_small_masks = (downsampled_masks.sum(dim=(0, 1)) <=
                                        0.0001)
                    for i in range(very_small_masks.size(0)):
                        if very_small_masks[i]:
                            pos[idx, idx_t[idx] == i] = 0

                if cfg.mask_proto_reweight_mask_loss:
                    # Ensure that the gt is binary
                    if not cfg.mask_proto_binarize_downsampled_gt:
                        bin_gt = downsampled_masks.gt(0.5).float()
                    else:
                        bin_gt = downsampled_masks

                    gt_foreground_norm = bin_gt / (
                        torch.sum(bin_gt, dim=(0, 1), keepdim=True) + 0.0001)
                    gt_background_norm = (1 - bin_gt) / (torch.sum(
                        1 - bin_gt, dim=(0, 1), keepdim=True) + 0.0001)

                    mask_reweighting = (
                        gt_foreground_norm * cfg.mask_proto_reweight_coeff +
                        gt_background_norm)
                    mask_reweighting *= mask_h * mask_w

            cur_pos = pos[idx]
            pos_idx_t = idx_t[idx, cur_pos]

            if process_gt_bboxes:
                # Note: this is in point-form
                if cfg.mask_proto_crop_with_pred_box:
                    pos_gt_box_t = decode(
                        loc_data[idx, :, :],
                        priors.data,
                        cfg.use_yolo_regressors,
                    )[cur_pos]
                else:
                    pos_gt_box_t = gt_box_t[idx, cur_pos]

            if pos_idx_t.size(0) == 0:
                continue

            proto_masks = proto_data[idx]
            proto_coef = mask_data[idx, cur_pos, :]
            if cfg.use_mask_scoring:
                mask_scores = score_data[idx, cur_pos, :]

            if cfg.mask_proto_coeff_diversity_loss:
                if inst_data is not None:
                    div_coeffs = inst_data[idx, cur_pos, :]
                else:
                    div_coeffs = proto_coef

                loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t)

            old_num_pos = proto_coef.size(0)
            if old_num_pos > cfg.masks_to_train:
                perm = torch.randperm(proto_coef.size(0))
                select = perm[:cfg.masks_to_train]

                proto_coef = proto_coef[select, :]
                pos_idx_t = pos_idx_t[select]

                if process_gt_bboxes:
                    pos_gt_box_t = pos_gt_box_t[select, :]
                if cfg.use_mask_scoring:
                    mask_scores = mask_scores[select, :]

            num_pos = proto_coef.size(0)
            mask_t = downsampled_masks[:, :, pos_idx_t]

            # Size: [mask_h, mask_w, num_pos]
            pred_masks = proto_masks @ proto_coef.t()
            pred_masks = cfg.mask_proto_mask_activation(pred_masks)

            if cfg.mask_proto_double_loss:
                if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                    pre_loss = F.binary_cross_entropy(torch.clamp(
                        pred_masks, 0, 1),
                                                      mask_t,
                                                      reduction="sum")
                else:
                    pre_loss = F.smooth_l1_loss(pred_masks,
                                                mask_t,
                                                reduction="sum")

                loss_m += cfg.mask_proto_double_loss_alpha * pre_loss

            if cfg.mask_proto_crop:
                pred_masks = crop(pred_masks, pos_gt_box_t)

            if cfg.mask_proto_mask_activation == activation_func.sigmoid:
                pre_loss = F.binary_cross_entropy(torch.clamp(
                    pred_masks, 0, 1),
                                                  mask_t,
                                                  reduction="none")
            else:
                pre_loss = F.smooth_l1_loss(pred_masks,
                                            mask_t,
                                            reduction="none")

            if cfg.mask_proto_normalize_mask_loss_by_sqrt_area:
                gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True)
                pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001)

            if cfg.mask_proto_reweight_mask_loss:
                pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t]

            if cfg.mask_proto_normalize_emulate_roi_pooling:
                weight = mask_h * mask_w if cfg.mask_proto_crop else 1
                pos_get_csize = center_size(pos_gt_box_t)
                gt_box_width = pos_get_csize[:, 2] * mask_w
                gt_box_height = pos_get_csize[:, 3] * mask_h
                pre_loss = (pre_loss.sum(dim=(0, 1)) / gt_box_width /
                            gt_box_height * weight)

            # If the number of masks were limited scale the loss accordingly
            if old_num_pos > num_pos:
                pre_loss *= old_num_pos / num_pos

            loss_m += torch.sum(pre_loss)

        losses = {"M": loss_m * cfg.mask_alpha / mask_h / mask_w}

        if cfg.mask_proto_coeff_diversity_loss:
            losses["D"] = loss_d

        return losses
コード例 #18
0
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        tic = time.time()
        loc, conf, landms = net(img)
        print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores