def clip_to_image(self, remove_empty=True):
     if self.jittor and not isinstance(self.bbox,jt.Var):
         self.to_jittor()
     if self.jittor:
         if self.bbox.numel()==0:
             return self
         TO_REMOVE = 1
         self.bbox[:, 0] = jt.clamp(self.bbox[:, 0] ,min_v=0, max_v=self.size[0] - TO_REMOVE)
         self.bbox[:, 1]= jt.clamp(self.bbox[:, 1],min_v=0, max_v=self.size[1] - TO_REMOVE)
         self.bbox[:, 2]= jt.clamp(self.bbox[:, 2],min_v=0, max_v=self.size[0] - TO_REMOVE)
         self.bbox[:, 3]= jt.clamp(self.bbox[:, 3],min_v=0, max_v=self.size[1] - TO_REMOVE)
         if remove_empty:
             box = self.bbox
             keep = jt.logical_and((box[:, 3] > box[:, 1]),(box[:, 2] > box[:, 0]))
             return self[keep]
     else:
         if self.bbox.size==0:
             return self
         TO_REMOVE = 1
         self.bbox[:, 0] = np.clip(self.bbox[:, 0] ,0, self.size[0] - TO_REMOVE)
         self.bbox[:, 1]= np.clip(self.bbox[:, 1],0, self.size[1] - TO_REMOVE)
         self.bbox[:, 2]= np.clip(self.bbox[:, 2],0, self.size[0] - TO_REMOVE)
         self.bbox[:, 3]= np.clip(self.bbox[:, 3],0, self.size[1] - TO_REMOVE)
         if remove_empty:
             box = self.bbox
             keep = np.where((box[:, 3] > box[:, 1])&(box[:, 2] > box[:, 0]))[0]
             return self[keep]
     return self
 def clip_to_image(self, remove_empty=True):
     if not isinstance(self.bbox, jt.Var):
         self.to_jittor()
     #print(self.bbox)
     if self.bbox.numel() == 0:
         return self
     TO_REMOVE = 1
     self.bbox[:, 0] = jt.clamp(self.bbox[:, 0],
                                min_v=0,
                                max_v=self.size[0] - TO_REMOVE)
     self.bbox[:, 1] = jt.clamp(self.bbox[:, 1],
                                min_v=0,
                                max_v=self.size[1] - TO_REMOVE)
     self.bbox[:, 2] = jt.clamp(self.bbox[:, 2],
                                min_v=0,
                                max_v=self.size[0] - TO_REMOVE)
     self.bbox[:, 3] = jt.clamp(self.bbox[:, 3],
                                min_v=0,
                                max_v=self.size[1] - TO_REMOVE)
     if remove_empty:
         box = self.bbox
         keep = jt.logical_and((box[:, 3] > box[:, 1]),
                               (box[:, 2] > box[:, 0]))
         #print(keep)
         return self[keep]
     return self
Exemple #3
0
 def sample(self, sample_shape=()):
     shape = sample_shape + self.probs.shape[:-1] + (1, )
     rand = jt.rand(shape)
     one_hot = jt.logical_and(self.cum_probs_l < rand,
                              rand <= self.cum_probs_r)
     index = one_hot.index(one_hot.ndim - 1)
     return (one_hot * index).sum(-1)
def remove_small_boxes(boxlist, min_size):
    """
    Only keep boxes with both sides >= min_size

    Arguments:
        boxlist (Boxlist)
        min_size (int)
    """
    # TODO maybe add an API for querying the ws / hs
    xywh_boxes = boxlist.convert("xywh").bbox
    _, _, ws, hs = xywh_boxes.unbind(dim=1)
    keep = jt.where(jt.logical_and((ws >= min_size), (hs >= min_size)))[0]
    return boxlist[keep]
Exemple #5
0
def cross_entropy_loss(output, target, ignore_index=None):
    if len(output.shape) == 4:
        c_dim = output.shape[1]
        output = output.transpose((0, 2, 3, 1))
        output = output.reshape((-1, c_dim))
    if ignore_index is not None:
        target = jt.ternary(target == ignore_index,
                            jt.array(-1).broadcast(target), target)
        mask = jt.logical_and(target >= 0, target < output.shape[1])
    target = target.reshape((-1, ))
    target = target.broadcast(output, [1])
    target = target.index(1) == target

    output = output - output.max([1], keepdims=True)
    loss = output.exp().sum(1).log()
    loss = loss - (output * target).sum(1)
    if ignore_index is None:
        return loss.mean()
    else:
        return loss.sum() / jt.maximum(mask.int().sum(), 1)
Exemple #6
0
    def __call__(self, match_quality_matrix):
        """
        Args:
            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
            pairwise quality between M ground-truth elements and N predicted elements.

        Returns:
            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
            [0, M - 1] or a negative value indicating that prediction i could not
            be matched.
        """
        if match_quality_matrix.numel() == 0:
            # empty targets or proposals not supported during training
            if match_quality_matrix.shape[0] == 0:
                raise ValueError(
                    "No ground-truth boxes available for one of the images "
                    "during training")
            else:
                raise ValueError(
                    "No proposal boxes available for one of the images "
                    "during training")

        # match_quality_matrix is M (gt) x N (predicted)
        # Max over gt elements (dim 0) to find best gt candidate for each prediction
        matches,matched_vals = match_quality_matrix.argmax(dim=0)
        if self.allow_low_quality_matches:
            all_matches = matches.clone()

        # Assign candidate matches with low quality to negative (unassigned) values
        below_low_threshold = matched_vals < self.low_threshold
        between_thresholds = jt.logical_and(matched_vals >= self.low_threshold,matched_vals < self.high_threshold)
        matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD
        matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS

        if self.allow_low_quality_matches:
            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)

        return matches
Exemple #7
0
 def sample(self, sample_shape=[]):
     shape = sample_shape + self.probs.shape[:-1] + (1, )
     rand = jt.rand(shape)
     one_hot = jt.logical_and(self.cum_probs_l < rand,
                              rand <= self.cum_probs_r).float()
     return one_hot
Exemple #8
0
def match(pos_thresh, neg_thresh, truths, priors, labels, crowd_boxes, loc_t,
          conf_t, idx_t, idx, loc_data):
    """Match each prior box with the ground truth box of the highest jaccard
    overlap, encode the bounding boxes, then return the matched indices
    corresponding to both confidence and location preds.
    Args:
        pos_thresh: (float) IoU > pos_thresh ==> positive.
        neg_thresh: (float) IoU < neg_thresh ==> negative.
        truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
        priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
        labels: (tensor) All the class labels for the image, Shape: [num_obj].
        crowd_boxes: (tensor) All the crowd box annotations or None if there are none.
        loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
        conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. Note: -1 means neutral.
        idx_t: (tensor) Tensor to be filled w/ the index of the matched gt box for each prior.
        idx: (int) current batch index.
        loc_data: (tensor) The predicted bbox regression coordinates for this batch.
    Return:
        The matched indices corresponding to 1)location and 2)confidence preds.
    """
    decoded_priors = decode(
        loc_data, priors, cfg.use_yolo_regressors
    ) if cfg.use_prediction_matching else point_form(priors)

    # Size [num_objects, num_priors]
    overlaps = jaccard(
        truths, decoded_priors) if not cfg.use_change_matching else change(
            truths, decoded_priors)

    # Size [num_priors] best ground truth for each prior
    best_truth_idx, best_truth_overlap = overlaps.argmax(dim=0)

    # We want to ensure that each gt gets used at least once so that we don't
    # waste any training data. In order to do that, find the max overlap anchor
    # with each gt, and force that anchor to use that gt.
    for _ in range(overlaps.shape[0]):
        # Find j, the gt with the highest overlap with a prior
        # In effect, this will loop through overlaps.shape[0] in a "smart" order,
        # always choosing the highest overlap first.
        best_prior_idx, best_prior_overlap = overlaps.argmax(dim=1)

        j, _ = best_prior_overlap.argmax(dim=0)

        # Find i, the highest overlap anchor with this gt
        i = best_prior_idx[j]

        # Set all other overlaps with i to be -1 so that no other gt uses it
        overlaps[:, i] = -1
        # Set all other overlaps with j to be -1 so that this loop never uses j again
        overlaps[j, :] = -1

        # Overwrite i's score to be 2 so it doesn't get thresholded ever
        best_truth_overlap[i] = 2
        # Set the gt to be used for i to be j, overwriting whatever was there
        best_truth_idx[i] = j

    matches = truths[best_truth_idx]  # Shape: [num_priors,4]
    conf = labels[best_truth_idx] + 1  # Shape: [num_priors]

    conf[best_truth_overlap < pos_thresh] = -1  # label as neutral
    conf[best_truth_overlap < neg_thresh] = 0  # label as background
    # Deal with crowd annotations for COCO
    if crowd_boxes is not None and cfg.crowd_iou_threshold < 1:
        # Size [num_priors, num_crowds]
        crowd_overlaps = jaccard(decoded_priors, crowd_boxes, iscrowd=True)
        # Size [num_priors]
        best_crowd_idx, best_crowd_overlap = crowd_overlaps.argmax(1)
        # Set non-positives with crowd iou of over the threshold to be neutral.
        conf[jt.logical_and(conf <= 0,
                            best_crowd_overlap > cfg.crowd_iou_threshold)] = -1

    # print('matches',matches)
    loc = encode(matches, priors, cfg.use_yolo_regressors)
    # print('loc',loc)
    loc_t[idx] = loc  # [num_priors,4] encoded offsets to learn
    conf_t[idx] = conf  # [num_priors] top class label for each prior
    idx_t[idx] = best_truth_idx  # [num_priors] indices for lookup
Exemple #9
0
def build_targets(p, targets, model):
    # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
    det = model.model[-1]  # Detect() module
    na, nt = det.na, targets.shape[0]  # number of anchors, targets
    tcls, tbox, indices, anch = [], [], [], []
    gain = jt.ones((7, ))  # normalized to gridspace gain
    ai = jt.index(
        (na, ),
        dim=0).float().view(na, 1).repeat(1,
                                          nt)  # same as .repeat_interleave(nt)

    targets = jt.contrib.concat((targets.repeat(na, 1, 1), ai[:, :, None]),
                                2)  # append anchor indices

    g = 0.5  # bias
    off = jt.array(
        [
            [0, 0],
            # [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
        ], ).float() * g  # offsets

    for i in range(det.nl):
        anchors = det.anchors[i]
        gain[2:6] = jt.array(
            [p[i].shape[3], p[i].shape[2], p[i].shape[3],
             p[i].shape[2]])  # xyxy gain

        # Match targets to anchors
        t = targets * gain

        if nt:
            # Matches
            r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
            j = jt.maximum(r, 1. / r).max(2) < model.hyp['anchor_t']  # compare
            # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
            t = t[j]  # filter

            # Offsets
            gxy = t[:, 2:4]  # grid xy
            gxi = gain[jt.array([2, 3])] - gxy  # inverse
            # j, k = jt.logical_and((gxy % 1. < g), (gxy > 1.)).int().transpose(1,0).bool()
            # l, m = jt.logical_and((gxi % 1. < g),(gxi > 1.)).int().transpose(1,0).bool()
            jk = jt.logical_and((gxy % 1. < g), (gxy > 1.))
            lm = jt.logical_and((gxi % 1. < g), (gxi > 1.))
            j, k = jk[:, 0], jk[:, 1]
            l, m = lm[:, 0], lm[:, 1]

            j = jt.stack((jt.ones_like(j), ))
            t = t.repeat((off.shape[0], 1, 1))[j]
            offsets = (jt.zeros_like(gxy)[None] + off[:, None])[j]
        else:
            t = targets[0]
            offsets = 0

        # Define
        b = t[:, 0].int32()
        c = t[:, 1].int32()  # image, class
        gxy = t[:, 2:4]  # grid xy
        gwh = t[:, 4:6]  # grid wh
        gij = (gxy - offsets).int32()
        gi, gj = gij[:, 0], gij[:, 1]  # grid xy indices

        # Append
        a = t[:, 6].int32()  # anchor indices
        indices.append((b, a, gj.clamp(0, gain[3] - 1),
                        gi.clamp(0,
                                 gain[2] - 1)))  # image, anchor, grid indices
        tbox.append(jt.contrib.concat((gxy - gij, gwh), 1))  # box
        anch.append(anchors[a])  # anchors
        tcls.append(c)  # class

    return tcls, tbox, indices, anch