Exemple #1
0
    def hybrid_forward(self, F, *x):
        if autograd.is_training():
            pre_nms = self.train_pre_nms
            post_nms = self.train_post_nms
        else:
            pre_nms = self.test_pre_nms
            post_nms = self.test_post_nms

        anchors = []
        rpn_pre_nms_proposals = []
        raw_rpn_scores = []
        raw_rpn_boxes = []
        for i, feat in enumerate(x):
            # raw_rpn_score (B, HWN, 1)
            # raw_rpn_box (B, HWN, 4)
            rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = self.head(feat)
            with autograd.pause():
                anchor = self.anchor_generator[i](feat)
                anchor = anchor.reshape((-1, 4))  # (1, N, 4)
                anchors.append(anchor)
                # (B, N, 4)
                rpn_box = self.box_decoder(rpn_box, anchor)
                rpn_box = self.box_clip(rpn_box)
                rpn_pre = F.concat(rpn_score, rpn_box, dim=-1)
                if self.per_level_nms:
                    rpn_pre = F.contrib.box_nms(rpn_pre, overlap_thresh=self.nms_thresh, topk=pre_nms // len(x),
                                                coord_start=1, score_index=0, id_index=-1)

                rpn_pre_nms_proposals.append(rpn_pre)
                raw_rpn_scores.append(raw_rpn_score)
                raw_rpn_boxes.append(raw_rpn_box)

        rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1)
        raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1)
        raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1)

        with autograd.pause():
            if self.per_level_nms:
                # Sort the proposals by scores. So the overlap_thresh=2
                tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=2, topk=pre_nms + 1, coord_start=1,
                                        score_index=0, id_index=-1)
            else:
                tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self.nms_thresh, topk=pre_nms,
                                        coord_start=1, score_index=0, id_index=-1)

        result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
        rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
        rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None)

        if autograd.is_training():
            return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors
        else:
            return rpn_scores, rpn_boxes
Exemple #2
0
 def hybrid_forward(self, F, x, basis=None, level_codes=None, thrs_multiplier=None):
     # print('basis:{}'.format(basis))
     if basis is None:
         return x
     # calculate levels and sort
     with autograd.pause():
         levels = F.dot(level_codes, basis)
         levels, sort_id = F.topk(F.transpose(levels), ret_typ='both', k=self.num_levels, is_ascend=1)  # ascend
         levels = F.transpose(levels)
         # TODO: levels need backward
         sort_id = F.transpose(sort_id)
         # calculate threshold
         thrs = F.dot(thrs_multiplier, levels)
         # calculate output y and its binary code
         y = F.zeros_like(x)  # output
         reshape_x = F.reshape(x, [-1])
         BT = F.zeros_like(reshape_x)
         BT = F.reshape(F.repeat(BT, self.nbits), shape=(-1, self.nbits))  # (N, k)
         zero_y = F.zeros_like(x)
         zero_bits_y = F.zeros_like(BT)
         for i in range(self.num_levels - 1):
             g = F.broadcast_greater(x, thrs[i])  # module 'mxnet.symbol' has no attribute 'greater'
             y = F.where(g, zero_y + levels[i + 1], y)
             BT = F.where(F.reshape(g, [-1]), zero_bits_y + level_codes[sort_id[i + 1][0]], BT)
     if autograd.is_training():
         with autograd.pause():
             # calculate BxBT
             B = F.transpose(BT)
             BxBT = F.zeros([self.nbits, self.nbits])
             for i in range(self.nbits):
                 for j in range(self.nbits):
                     BxBTij = F.multiply(B[i], B[j])
                     BxBTij = F.sum(BxBTij)
                     if i == j:
                         BxBTij += EPS
                     BxBT[i, j] = BxBTij
             BxBT_inv = F.Custom(BxBT.expand_dims(0), op_type='matrix_inverse')
             BxBT_inv = BxBT_inv[0]
             # BxBT_inv = BxBT
             # calculate BxX
             BxX = F.zeros([self.nbits])
             for i in range(self.nbits):
                 BxXi0 = F.multiply(B[i], reshape_x)
                 BxXi0 = F.sum(BxXi0)
                 BxX[i] = BxXi0
             BxX = F.reshape(BxX, [self.nbits, 1])
             new_basis = F.dot(BxBT_inv, BxX)
             # create moving averages op
             basis = MOVING_AVERAGES_FACTOR * basis + new_basis * (1 - MOVING_AVERAGES_FACTOR)
             self.basis.set_data(basis)
     x_clip = F.minimum(x, levels[self.num_levels - 1])  # gradient clip
     y = x_clip + F.stop_gradient(-x_clip) + F.stop_gradient(y)  # gradient: y=clip(x)
     return y
Exemple #3
0
 def hybrid_forward(self, F, box_preds, gt_boxes, obj_t, centers_t,
                    scales_t, weights_t, clas_t):
     """Short summary.
     Parameters
     ----------
     F : mxnet.nd or mxnet.sym
         `F` is mxnet.sym if hybridized or mxnet.nd if not.
     box_preds : mxnet.nd.NDArray
         Predicted bounding boxes.
     gt_boxes : mxnet.nd.NDArray
         Ground-truth bounding boxes.
     obj_t : mxnet.nd.NDArray
         Prefetched Objectness targets.
     centers_t : mxnet.nd.NDArray
         Prefetched regression target for center x and y.
     scales_t : mxnet.nd.NDArray
         Prefetched regression target for scale x and y.
     weights_t : mxnet.nd.NDArray
         Prefetched element-wise gradient weights for center_targets and scale_targets.
     clas_t : mxnet.nd.NDArray
         Prefetched one-hot vector for classification.
     Returns
     -------
     (tuple of) mxnet.nd.NDArray
         objectness: 0 for negative, 1 for positive, -1 for ignore.
         center_targets: regression target for center x and y.
         scale_targets: regression target for scale x and y.
         weights: element-wise gradient weights for center_targets and scale_targets.
         class_targets: a one-hot vector for classification.
     """
     with autograd.pause():
         dynamic_t = self._dynamic_target(box_preds, gt_boxes)
         # use fixed target to override dynamic targets
         obj, centers, scales, weights, clas = zip(
             dynamic_t, [obj_t, centers_t, scales_t, weights_t, clas_t])
         mask = obj[1] > 0
         objectness = F.where(mask, obj[1], obj[0])
         mask2 = mask.tile(reps=(2, ))
         center_targets = F.where(mask2, centers[1], centers[0])
         scale_targets = F.where(mask2, scales[1], scales[0])
         weights = F.where(mask2, weights[1], weights[0])
         mask3 = mask.tile(reps=(self._num_class, ))
         class_targets = F.where(mask3, clas[1], clas[0])
         smooth_weight = 1. / self._num_class
         if self._label_smooth:
             smooth_weight = 1. / self._num_class
             class_targets = F.where(class_targets > 0.5,
                                     class_targets - smooth_weight,
                                     class_targets)
             class_targets = F.where(
                 class_targets < -0.5, class_targets,
                 F.ones_like(class_targets) * smooth_weight)
         class_mask = mask.tile(
             reps=(self._num_class, )) * (class_targets >= 0)
         return [
             F.stop_gradient(x) for x in [
                 objectness, center_targets, scale_targets, weights,
                 class_targets, class_mask
             ]
         ]
Exemple #4
0
    def hybrid_forward(self, F, features, labels):
        """
        根据triplet loss修改,同类样本间的距离小于一类一个margin.
        此为第二种修改方式:统计所有样本组成的样本对
        """
        num_p = (labels.expand_dims(axis=1)
                 == labels.expand_dims(axis=0)).sum().astype(np.float32) - 128
        num_n = (labels.expand_dims(axis=1) !=
                 labels.expand_dims(axis=0)).sum().astype(np.float32)
        with autograd.pause():
            w_same = (labels.expand_dims(axis=1) == labels.expand_dims(axis=0))
            w_same = w_same - F.diag(F.diag(w_same))
            w_diff = (labels.expand_dims(axis=1) != labels.expand_dims(axis=0))
            # w_ij: 同类为1,不同为-1, i==j为0
            w = w_same - w_diff
            # w_ijk: ij同类,jk异类为1,其他为0
            w = (w.expand_dims(axis=2) - w.expand_dims(axis=0) - 1).relu()
            w = w.astype(np.float32)

        distance = ((features.expand_dims(axis=1) -
                     features.expand_dims(axis=0))**2).sum(axis=-1)
        # loss_ijk = d_ij - d_jk
        loss = (distance.expand_dims(axis=2) - distance.expand_dims(axis=0) +
                self._margin).relu()
        loss = w * loss
        loss = loss.sum() / w.sum()
        return loss
    def hybrid_forward(self, F, roi, gt_box):
        """
        Only support batch_size=1 now.
        """
        with autograd.pause():
            # cocnat rpn roi with ground truths
            all_roi = F.concat(roi.squeeze(axis=0),
                               gt_box.squeeze(axis=0),
                               dim=0)
            # calculate ious between (N, 4) anchors and (M, 4) bbox ground-truths
            # ious is (N, M)
            ious = F.contrib.box_iou(all_roi, gt_box,
                                     format='corner').transpose((1, 0, 2))
            matches = self._matcher(ious)
            samples = F.Custom(matches,
                               ious,
                               op_type='quota_sampler',
                               num_sample=self._num_sample,
                               pos_thresh=self._pos_iou_thresh,
                               neg_thresh_high=self._neg_iou_thresh_high,
                               neg_thresh_low=self._neg_iou_thresh_low,
                               pos_ratio=self._pos_ratio)
            samples = samples.squeeze(axis=0)  # remove batch axis
            matches = matches.squeeze(axis=0)

            # shuffle and argsort, take first num_sample samples
            sf_samples = F.where(samples == 0,
                                 F.ones_like(samples) * -999, samples)
            indices = F.argsort(sf_samples, is_ascend=False).slice_axis(
                axis=0, begin=0, end=self._num_sample)
            new_roi = all_roi.take(indices).expand_dims(0)
            new_samples = samples.take(indices).expand_dims(0)
            new_matches = matches.take(indices).expand_dims(0)
        return new_roi, new_samples, new_matches
    def forward(self, bbox, anchor, width, height):
        """
        Only support batch_size=1 now.
        Be careful there's numpy operations inside
        """
        F = mx.nd
        with autograd.pause():
            # anchor with shape (N, 4)
            a_xmin, a_ymin, a_xmax, a_ymax = self._bbox_split(anchor)
            # invalid anchor mask with shape (N, 1)
            imask = ((a_xmin >= -self._allowed_border) *
                     (a_ymin >= -self._allowed_border) *
                     (a_xmax <= (width + self._allowed_border)) *
                     (a_ymax <= (height + self._allowed_border))) <= 0
            imask = mx.nd.array(np.where(imask.asnumpy() > 0)[0],
                                ctx=anchor.context)

            # calculate ious between (N, 4) anchors and (M, 4) bbox ground-truths
            # ious is (N, M)
            ious = F.contrib.box_iou(anchor, bbox, format='corner').transpose(
                (1, 0, 2))
            ious[:, imask, :] = -1
            matches = self._matcher(ious)
            samples = self._sampler(matches, ious)
            samples = samples.as_in_context(anchor.context)
            # training targets for RPN
            cls_target, _ = self._cls_encoder(samples)
            box_target, box_mask = self._box_encoder(
                samples, matches, anchor.expand_dims(axis=0), bbox)
        return cls_target, box_target, box_mask
Exemple #7
0
    def forward(self, bboxes, anchors, height, width):
        # 标注ious
        with autograd.pause():
            ious = mx.nd.contrib.box_iou(anchors, bboxes)
            # 去除无效的锚框(超出边界的)
            x_min, y_min, x_max, y_max = self._spliter(anchors)
            invalid_mask = (x_min < 0) + (y_min < 0) + (x_max >= width) + (
                y_max >= height)
            # 将所有无效锚框的ious设为-1
            invalid_mask = nd.repeat(invalid_mask,
                                     repeats=bboxes.shape[0],
                                     axis=-1)
            ious = nd.where(invalid_mask > 0, nd.ones_like(ious) * -1, ious)

            # 对锚框进行采样
            samples, matches = self._sampler(ious)
            # 下面进行标注

            cls_label, _ = self._cls_encoder(samples)
            targets, masks = self._bbox_encoder(samples.expand_dims(axis=0),
                                                matches.expand_dims(axis=0),
                                                anchors.expand_dims(axis=0),
                                                bboxes.expand_dims(axis=0))

        return cls_label, targets[0], masks[0]
    def sample(
        self, num_samples: Optional[int] = None, dtype=np.float32
    ) -> Tensor:
        r"""
        Draw samples from the distribution.

        If num_samples is given the first dimension of the output will be
        num_samples.

        Parameters
        ----------
        num_samples
            Number of samples to to be drawn.
        dtype
            Data-type of the samples.

        Returns
        -------
        Tensor
            A tensor containing samples. This has shape
            `(*batch_shape, *eval_shape)` if `num_samples = None`
            and  `(num_samples, *batch_shape, *eval_shape)` otherwise.
        """
        with autograd.pause():
            var = self.sample_rep(num_samples=num_samples, dtype=dtype)
            F = getF(var)
            return F.BlockGrad(var)
    def hybrid_forward(self, F, anchor, score, bbox_pred, img):
        """
        Generate proposals. Limit to batch-size=1 in current implementation.
        """
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms

        with autograd.pause():
            # restore bounding boxes
            roi = self._box_decoder(bbox_pred, self._box_to_center(anchor))

            # clip rois to image's boundary
            # roi = F.Custom(roi, img, op_type='bbox_clip_to_image')
            roi = self._clipper(roi, img)

            # remove bounding boxes that don't meet the min_size constraint
            # by setting them to (-1, -1, -1, -1)
            # width = roi.slice_axis(axis=-1, begin=2, end=3)
            # height = roi.slice_axis(axis=-1, begin=3, end=None)
            xmin, ymin, xmax, ymax = roi.split(axis=-1, num_outputs=4)
            width = xmax - xmin
            height = ymax - ymin
            # TODO:(zhreshold), there's im_ratio to handle here, but it requires
            # add' info, and we don't expect big difference
            invalid = (width < self._min_size) + (height < self._min_size)

            # # remove out of bound anchors
            # axmin, aymin, axmax, aymax = F.split(anchor, axis=-1, num_outputs=4)
            # # it's a bit tricky to get right/bottom boundary in hybridblock
            # wrange = F.arange(0, 2560).reshape((1, 1, 1, 2560)).slice_like(
            #    img, axes=(3)).max().reshape((1, 1, 1))
            # hrange = F.arange(0, 2560).reshape((1, 1, 2560, 1)).slice_like(
            #    img, axes=(2)).max().reshape((1, 1, 1))
            # invalid = (axmin < 0) + (aymin < 0) + F.broadcast_greater(axmax, wrange) + \
            #    F.broadcast_greater(aymax, hrange)
            # avoid invalid anchors suppress anchors with 0 confidence
            score = F.where(invalid, F.ones_like(invalid) * -1, score)
            invalid = F.repeat(invalid, axis=-1, repeats=4)
            roi = F.where(invalid, F.ones_like(invalid) * -1, roi)

            # Non-maximum suppression
            pre = F.concat(score, roi, dim=-1)
            tmp = F.contrib.box_nms(pre,
                                    overlap_thresh=self._nms_thresh,
                                    topk=pre_nms,
                                    coord_start=1,
                                    score_index=0,
                                    id_index=-1,
                                    force_suppress=True)

            # slice post_nms number of boxes
            result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
            rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_bbox = F.slice_axis(result, axis=-1, begin=1, end=None)

        return rpn_scores, rpn_bbox
Exemple #10
0
    def hybrid_forward(self, F, x, *args, **kw):
        """
        This function does all the preprocesses and postprocesses for the execution of a InferenceAlgorithm.

        :param F: the MXNet computation mode
        :type F: mxnet.symbol or mxnet.ndarray
        :param x: a dummy variable to enable the execution of this Gluon block
        :type x: MXNet NDArray or MXNet Symbol
        :param *arg: all the positional arguments, which correspond to the data provided to the InferenceAlgorithm.
        :type *arg: list of MXNet NDArray or MXNet Symbol
        :param **kw: all the keyword arguments, which correspond to the parameters that may require gradients.
        :type kw: {str(UUID): MXNet NDArray or MXNet Symbol}
        :returns: the outcome of the InferenceAlgorithm that are determined by the inference algorithm.
        :rtypes: {str: MXNet NDArray or MXNet Symbol}
        """
        for to_uuid, from_uuid in self._var_ties.items():
            kw[to_uuid] = kw[from_uuid]
        data = {k: v for k, v in zip(self._data_def, args)}
        variables = add_sample_dimension_to_arrays(F, data)
        for k, v in self._var_trans.items():
            kw[k] = v.transform(kw[k], F=F)
        add_sample_dimension_to_arrays(F, kw, out=variables)
        add_sample_dimension_to_arrays(F, self._constants, out=variables)
        obj = self._infr_method.compute(F=F, variables=variables)
        with autograd.pause():
            # An inference algorithm may directly set the value of a parameter instead of computing its gradient.
            # This part handles the setting of parameters.
            for k, v in variables.items():
                if k.startswith(SET_PARAMETER_PREFIX):
                    self._infr_params[v[0]] = v[1]
        return obj
Exemple #11
0
    def hybrid_forward(self, F, roi, samples, matches, gt_label, gt_box):
        """Components can handle batch images

        Parameters
        ----------
        roi: (B, N, 4), input proposals
        samples: (B, N), value +1: positive / -1: negative.
        matches: (B, N), value [0, M), index to gt_label and gt_box.
        gt_label: (B, M), value [0, num_class), excluding background class.
        gt_box: (B, M, 4), input ground truth box corner coordinates.

        Returns
        -------
        cls_target: (B, N), value [0, num_class + 1), including background.
        box_target: (B, N, C, 4), only foreground class has nonzero target.
        box_weight: (B, N, C, 4), only foreground class has nonzero weight.

        """
        with autograd.pause():
            # cls_target (B, N)
            cls_target = self._cls_encoder(samples, matches, gt_label)
            # box_target, box_weight (C, B, N, 4)
            box_target, box_mask = self._box_encoder(
                samples, matches, roi, gt_label, gt_box)
        return cls_target, box_target, box_mask
Exemple #12
0
    def _spectral_norm(self, weight: Tensor, u: Tensor) -> Tensor:
        """
        Adapted from https://github.com/apache/incubator-
        mxnet/blob/master/example/gluon/sn_gan/model.py.
        """
        w = weight
        w_mat = nd.reshape(w, [w.shape[0], -1])

        _u = u
        _v = None

        for _ in range(self._num_power_iter):
            _v = nd.L2Normalization(nd.dot(_u, w_mat))
            _u = nd.L2Normalization(nd.dot(_v, w_mat.T))

        sigma = nd.sum(nd.dot(_u, w_mat) * _v)

        # this is different from standard spectral normalization
        sigma = nd.maximum(nd.ones(1, ctx=self._ctx), sigma / self._coeff)

        if sigma == 0.0:
            sigma = EPSILON

        with autograd.pause():
            self._u.set_data(_u)

        return w / sigma
Exemple #13
0
    def forward(self, roi, samples, matches, gt_label, gt_box):
        """Components can handle batch images

        Parameters
        ----------
        roi: (B, N, 4), input proposals
        samples: (B, N), value +1: positive / -1: negative.
        matches: (B, N), value [0, M), index to gt_label and gt_box.
        gt_label: (B, M), value [0, num_class), excluding background class.
        gt_box: (B, M, 4), input ground truth box corner coordinates.

        Returns
        -------
        cls_target: (B, N), value [0, num_class + 1), including background.
        box_target: (B, N, C, 4), only foreground class has nonzero target.
        box_weight: (B, N, C, 4), only foreground class has nonzero weight.

        """
        with autograd.pause():
            # cls_target (B, N), set positive as gt_label class + 1, negative as 0, ignored as -1
            cls_target = self._cls_encoder(samples, matches, gt_label)
            # box_target, box_weight (C, B, N, 4), negative and ignored bboxes are set to zero
            box_target, box_mask = self._box_encoder(samples, matches, roi,
                                                     gt_label, gt_box)
            # modify shapes to match predictions
            # box (C, B, N, 4) -> (B, N, C, 4)
            box_target = box_target.transpose((1, 2, 0, 3))
            box_mask = box_mask.transpose((1, 2, 0, 3))
        return cls_target, box_target, box_mask
Exemple #14
0
    def __call__(self, module):

        if hasattr(module, 'rho'):
            with autograd.pause():
                w = module.rho.data()
                w = w.clip(self.clip_min, self.clip_max)
                module.rho.data()[:] = w
    def hybrid_forward(self, F, box_preds, gt_boxes):
        """Short summary.

        Parameters
        ----------
        F : mxnet.nd or mxnet.sym
            `F` is mxnet.sym if hybridized or mxnet.nd if not.
        box_preds : mxnet.nd.NDArray
            Predicted bounding boxes.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth bounding boxes.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        with autograd.pause():
            box_preds = box_preds.reshape((0, -1, 4))
            objness_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=1))
            center_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2))
            scale_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2))
            weight_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2))
            class_t = F.ones_like(objness_t.tile(reps=(self._num_class))) * -1
            batch_ious = self._batch_iou(box_preds, gt_boxes)  # (B, N, M)
            ious_max = batch_ious.max(axis=-1, keepdims=True)  # (B, N, 1)
            objness_t = (ious_max > self._ignore_iou_thresh) * -1  # use -1 for ignored
        return objness_t, center_t, scale_t, weight_t, class_t
Exemple #16
0
    def hybrid_forward(self, F, box_preds, gt_boxes):
        """Short summary.

        Parameters
        ----------
        F : mxnet.nd or mxnet.sym
            `F` is mxnet.sym if hybridized or mxnet.nd if not.
        box_preds : mxnet.nd.NDArray
            Predicted bounding boxes.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth bounding boxes.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        with autograd.pause():
            box_preds = box_preds.reshape((0, -1, 4))
            objness_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=1))
            center_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2))
            scale_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2))
            weight_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2))
            class_t = F.ones_like(objness_t.tile(reps=(self._num_class))) * -1
            batch_ious = self._batch_iou(box_preds, gt_boxes)  # (B, N, M)
            ious_max = batch_ious.max(axis=-1, keepdims=True)  # (B, N, 1)
            objness_t = (ious_max > self._ignore_iou_thresh) * -1  # use -1 for ignored
        return objness_t, center_t, scale_t, weight_t, class_t
Exemple #17
0
 def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var):
     """Hybrid forward"""
     if not autograd.is_training():
         return F.BatchNorm(x,
                            gamma,
                            beta,
                            running_mean,
                            running_var,
                            name='fwd',
                            **self._kwargs)
     isum, isqu = F.SumSquare(x)
     #isum = x.sum(axis=1, exclude=True)
     #isqu = (x**2).sum(axis=1, exclude=True)
     N = self.ndevices * x.shape[0] * x.shape[2] * x.shape[3]
     allreduce = AllReduce(self._prefix)
     osum, osqu = allreduce(isum, isqu)
     # calc mean and std
     mean = osum / N
     sumvar = osqu - osum * osum / N
     bias_var = sumvar / N
     std = F.sqrt(F.maximum(bias_var, self.eps))
     # update running mean and var
     with autograd.pause():
         unbias_var = sumvar / (N - 1)
         self.updater(self.running_mean, self.running_var, mean, unbias_var,
                      self.momentum, x.context)
     # update running mean and var
     output = F.DecoupleBatchNorm(x, gamma, beta, mean, std)
     return output
Exemple #18
0
    def train_the(self, batch_xs, batch_ys):
        loss = []
        with autograd.record():
            for gpu_index, (batch_x,
                            batch_y) in enumerate(zip(batch_xs, batch_ys)):
                C_pred = self.net(batch_x)
                with autograd.pause():
                    C_label, C_mask = self.loss_mask(batch_y, gpu_index)

                    C_score_weight = nd.where(C_mask > 0,
                                              nd.ones_like(C_mask) * 10.0,
                                              nd.ones_like(C_mask) * 0.1,
                                              ctx=ctx[gpu_index])

                Csl = self.LG_loss(C_pred[0], C_label[0], C_score_weight * 0.1)
                Cbl = self.L2_loss(C_pred[1], C_label[1], C_mask * 1.0)
                Ccl = self.CE_loss(C_pred[2], C_label[2],
                                   C_mask * 0.1)  #0.1 after 1day:1.0
                loss.append(Csl + Cbl + Ccl)

        for l in loss:
            l.backward()
        self.trainer.step(batch_size)

        self.record_to_tensorboard_and_save([Csl, Cbl, Ccl])
Exemple #19
0
    def compute(self, F, variables):
        X = variables[self.model.X]
        Y = variables[self.model.Y]
        noise_var = variables[self.model.noise_var]
        D = Y.shape[-1]
        N = X.shape[-2]
        kern = self.model.kernel
        kern_params = kern.fetch_parameters(variables)

        X, Y, noise_var, kern_params = arrays_as_samples(
            F, [X, Y, noise_var, kern_params])

        K = kern.K(F, X, **kern_params) + \
            F.expand_dims(F.eye(N, dtype=X.dtype), axis=0) * \
            F.expand_dims(noise_var, axis=-2)
        L = F.linalg.potrf(K)

        if self.model.mean_func is not None:
            mean = self.model.mean_func(F, X)
            Y = Y - mean
        LinvY = F.linalg.trsm(L, Y)
        logdet_l = F.linalg.sumlogdiag(F.abs(L))
        tmp = F.sum(F.reshape(F.square(LinvY) + np.log(2. * np.pi),
                              shape=(Y.shape[0], -1)), axis=-1)
        logL = - logdet_l * D - tmp/2

        with autograd.pause():
            self.set_parameter(variables, self.posterior.X, X[0])
            self.set_parameter(variables, self.posterior.L, L[0])
            self.set_parameter(variables, self.posterior.LinvY, LinvY[0])
        return logL
Exemple #20
0
    def forward(self, rcnn_cls_pred, rcnn_bbox_pred, rcnn_cls_gt,
                rcnn_bbox_gt):
        with autograd.pause():
            ctx = rcnn_cls_pred.context
            roi_num = rcnn_cls_pred.shape[0]
            roi_idx = nd.arange(roi_num, ctx=ctx).reshape(-1, 1)
            fg_bbox_mask = (rcnn_cls_gt > 0).reshape(0, 1, 1)
            bbox_weights = nd.zeros_like(rcnn_bbox_gt).reshape(0, -1, 4)
            bbox_weights[roi_idx, rcnn_cls_gt[:], :] = \
                self._bbox_weights.data(ctx).broadcast_to((roi_num, 1, 4)) * fg_bbox_mask
            bbox_weights = bbox_weights.reshape(0, -1)

        # rcnn_cls_pred.shape (roi_num, num_classes)
        rcnn_cls_log = nd.log(nd.clip(rcnn_cls_pred, 1e-14, 1))
        cls_log_loss = -nd.sum(rcnn_cls_log[
            roi_idx, rcnn_cls_gt]) / self._roi_batch_size.data(ctx)

        # rcnn_bbox_pred.shape (roi_num, num_classes*4)
        rcnn_bbox_smooth_l1 = nd.smooth_l1(rcnn_bbox_pred - rcnn_bbox_gt,
                                           scalar=1.0)
        bbox_smooth_l1_loss = nd.sum(
            rcnn_bbox_smooth_l1 *
            bbox_weights) / self._roi_batch_size.data(ctx)

        return cls_log_loss, bbox_smooth_l1_loss
 def forward(self, roi, samples, matches, gt_label, gt_box):
     """Components can handle batch images
     Parameters
     ----------
     roi: (B, N, 4), input proposals
     samples: (B, N), value +1: positive / -1: negative.
     matches: (B, N), value [0, M), index to gt_label and gt_box.
     gt_label: (B, M), value [0, num_class), excluding background class.
     gt_box: (B, M, 4), input ground truth box corner coordinates.
     Returns
     -------
     cls_target: (B, N), value [0, num_class + 1), including background.
     box_target: (B, N, C, 4), only foreground class has nonzero target.
     box_weight: (B, N, C, 4), only foreground class has nonzero weight.
     """
     with autograd.pause():
         # cls_target (B, N)
         cls_target = self._cls_encoder(samples, matches, gt_label)
         # box_target, box_weight (C, B, N, 4)
         box_target, box_mask = self._box_encoder(
             samples, matches, roi, gt_box)
         # modify shapes to match predictions
         # box (C, B, N, 4) -> (B, N, C, 4)
         #print("cls_target:{} box_target:{} box_mask:{}".format(cls_target.shape,box_target.shape,box_mask.shape))
         #cls_target = cls_target
         box_target = box_target.expand_dims(axis=2)
         box_mask = box_mask.expand_dims(axis=2)
     return cls_target, box_target, box_mask
Exemple #22
0
    def batch_forward(self, batch_data, validation=False):
        splitted_batch = {
            k: gluon.utils.split_and_load(v,
                                          ctx_list=self.args.ctx,
                                          even_split=False)
            for k, v in batch_data.items()
        }
        if 'instances' in splitted_batch:
            splitted_batch['instances'] = [
                masks.reshape(shape=(-3, -2))
                for masks in splitted_batch['instances']
            ]

        metrics = self.val_metrics if validation else self.train_metrics

        losses_logging = defaultdict(list)
        with autograd.record(True) if not validation else autograd.pause(
                False):
            outputs = [
                self.net(image, points) for image, points in zip(
                    splitted_batch['images'], splitted_batch['points'])
            ]

            losses = []
            for ictx, ctx_output in enumerate(outputs):
                loss = 0.0
                loss = self.add_loss(
                    'instance_loss', loss, losses_logging, validation, lambda:
                    (ctx_output.instances, splitted_batch['instances'][ictx]))
                loss = self.add_loss(
                    'segmentation_loss', loss, losses_logging, validation,
                    lambda:
                    (ctx_output.semantic, splitted_batch['semantic'][ictx]))
                loss = self.add_loss(
                    'proposals_loss', loss, losses_logging, validation, lambda:
                    (ctx_output.instances, ctx_output.proposals,
                     splitted_batch['instances'][ictx]))

                with autograd.pause():
                    for m in metrics:
                        m.update(
                            *(getattr(ctx_output, x) for x in m.pred_outputs),
                            *(splitted_batch[x][ictx] for x in m.gt_outputs))

                losses.append(loss)

        return losses, losses_logging, splitted_batch, outputs
Exemple #23
0
    def hybrid_forward(self, F, x, num=0, fix_conv=False):
        if self.fix_layers == 0:
            out = F.L2Normalization(self.fc1(self.feats3(self.feats2(self.feats1(x)))))
        elif self.fix_layers == 1:
            with ag.pause():
                x = self.feats1(x)
            out = F.L2Normalization(self.fc1(self.feats3(self.feats2(x))))
        elif self.fix_layers == 2:
            with ag.pause():
                x = self.feats2(self.feats1(x))
            out = F.L2Normalization(self.fc1(self.feats3(x)))
        elif self.fix_layers == 3:
            if self.fix_fc:
                with ag.pause():
                    x = self.fc1(self.feats3(self.feats2(self.feats1(x))))
                out = F.L2Normalization(x)
            else:
                with ag.pause():
                    x = self.feats3(self.feats2(self.feats1(x)))
                out = F.L2Normalization(self.fc1(x))

        if self.fw:
            for i in range(num + 1):
                if i < num:
                    with ag.pause():
                        fc = eval('self.fc' + str(i + 2))
                        if i == 0:
                            output = fc(out)
                        else:
                            output = mx.nd.concat(output, fc(out), dim=1)

                else:
                    fc = eval('self.fc' + str(i + 2))
                    if i == 0:
                        output = fc(out)
                    else:
                        output = mx.nd.concat(output, fc(out), dim=1)
            return out, output

        else:
            for i in range(num + 1):
                fc = eval('self.fc' + str(i + 2))
                if i == 0:
                    output = fc(out)
                else:
                    output = mx.nd.concat(output, fc(out), dim=1)
            return out, output
Exemple #24
0
 def add_batchid(self, F, bbox):
     num_roi = self._num_sample if autograd.is_training() else self._rpn_test_post_nms
     with autograd.pause():
         roi_batchid = F.arange(0, self._max_batch, repeat=num_roi)
         # remove batch dim because ROIPooling require 2d input
         roi = F.concat(*[roi_batchid.reshape((-1, 1)), bbox.reshape((-1, 4))], dim=-1)
         roi = F.stop_gradient(roi)
         return roi
    def hybrid_forward(self, F, box_preds, gt_boxes, obj_t, centers_t,
                       scales_t, weights_t, clas_t):

        with autograd.pause():
            dynamic_t = self._dynamic_target(box_preds, gt_boxes)

            obj, centers, scales, weights, clas = zip(
                dynamic_t, [obj_t, centers_t, scales_t, weights_t, clas_t])
            '''
                mask obj[1] > 0  shape: [batch, h*w*9, 1]
                     obj[1] > 0  如果这个grid cell有对象相应位置为1,否则为0 
                     obj[1] <=0  表示这个grid cell没有对象,因为要么是被忽略的,要么是背景。被忽略的,其中心不在这个grid cell   
                objectness:
                    1:  正样本,表示这个框内确实存在目标
                    0:  负样本,表示上面的 ious_max < self._ignore_iou_thresh,这个位置不该忽略,纳入负样本损失计算
                    -1: 忽略不计算, 表示上面的 ious_max > self._ignore_iou_thresh 表示这个grid cell没有正样本,
                        同时也检测出是背景,那么损失计算就忽略,

                    如果:grid cell是正样本,但是ious_max > self._ignore_iou_thresh,那么这个位置其实忽略与否都不重要了,因为已经检测正确了。 
               
                mask2 = mask.tile(reps=(2,)) shape: [batch, 3*(h*w*3), 2]
                scale_targets = F.where(mask2, scales[1], scales[0])
                weights = F.where(mask2, weights[1], weights[0])
                    如果当前grid cell有物体, 那么就是取出该物体的中心坐标(wh, weight),没有的物体就设置为0。
                    weight_t 是在计算损失时,(x,y)和(weight, height)项的平衡系数

                mask3 = mask.tile(reps=(self._num_class,))  shape:[batch, 3*(h*w*3), 20]
                class_targets = F.where(mask3, clas[1], clas[0])
                    含义同上,如果有物体,那么就是取出其ground-truth 值(这是一个one-hot编码,只有一个是1,其余都是0),否则就设置为0
                    获得的class_target, 如果当前grid  cell 有物体,那么就对应的class_ids就是其ground-truth值,否则就是-1 
                    
                    但是预测得one-hot可能不那么完全。

                class_mask = mask3 * (class_targets >= 0)  

                shape:[batch, 3*(h*w*3), 20] 

            上述返回值中,带有“-1”的是objness, class_targets
                      
            '''
            mask = obj[1] > 0
            objectness = F.where(mask, obj[1], obj[0])
            mask2 = mask.tile(reps=(2, ))
            center_targets = F.where(mask2, centers[1], centers[0])
            scale_targets = F.where(mask2, scales[1], scales[0])
            weights = F.where(mask2, weights[1], weights[0])

            mask3 = mask.tile(reps=(self._num_class, ))
            class_targets = F.where(mask3, clas[1], clas[0])  # 就是一个one-hot编码

            class_mask = mask.tile(
                reps=(self._num_class, )) * (class_targets >= 0)
            return [
                F.stop_gradient(x) for x in [
                    objectness, center_targets, scale_targets, weights,
                    class_targets, class_mask
                ]
            ]
Exemple #26
0
 def sample(self,
            num_samples: Optional[int] = None,
            dtype=np.float32) -> Tensor:
     with autograd.pause():
         s = self.base_distribution.sample(num_samples=num_samples,
                                           dtype=dtype)
         for t in self.transforms:
             s = t.f(s)
         return s
Exemple #27
0
    def compute(self, F, variables):
        X = variables[self.model.X]
        Y = variables[self.model.Y]
        Z = variables[self.model.inducing_inputs]
        noise_var = variables[self.model.noise_var]
        D = Y.shape[-1]
        M = Z.shape[-2]
        kern = self.model.kernel
        kern_params = kern.fetch_parameters(variables)

        X, Y, Z, noise_var, kern_params = arrays_as_samples(
            F, [X, Y, Z, noise_var, kern_params])

        noise_var_m = F.expand_dims(noise_var, axis=-2)

        Kuu = kern.K(F, Z, **kern_params)
        if self.jitter > 0.:
            Kuu = Kuu + F.expand_dims(F.eye(M, dtype=Z.dtype), axis=0) * \
                self.jitter

        Kuf = kern.K(F, Z, X, **kern_params)
        Kff_diag = kern.Kdiag(F, X, **kern_params)

        L = F.linalg.potrf(Kuu)
        LinvKuf = F.linalg.trsm(L, Kuf)

        A = F.expand_dims(F.eye(M, dtype=Z.dtype), axis=0) + \
            F.broadcast_div(F.linalg.syrk(LinvKuf), noise_var_m)
        LA = F.linalg.potrf(A)

        if self.model.mean_func is not None:
            mean = self.model.mean_func(F, X)
            Y = Y - mean
        LAInvLinvKufY = F.linalg.trsm(LA, F.linalg.gemm2(LinvKuf, Y))

        logL = -D * F.linalg.sumlogdiag(LA)
        logL = logL - F.sum(F.sum(F.square(Y) / noise_var_m +
                                  np.log(2. * np.pi) + F.log(noise_var_m),
                                  axis=-1),
                            axis=-1) / 2
        logL = logL + F.sum(F.sum(
            F.square(LAInvLinvKufY) / (2 * F.square(noise_var_m)), axis=-1),
                            axis=-1)
        logL = logL - D * F.sum(Kff_diag / (2 * noise_var), axis=-1)
        logL = logL + D * F.sum(
            F.sum(F.square(LinvKuf) / (2. * noise_var_m), axis=-1), axis=-1)

        with autograd.pause():
            wv = F.broadcast_div(
                F.linalg.trsm(L,
                              F.linalg.trsm(LA, LAInvLinvKufY, transpose=True),
                              transpose=True), noise_var_m)
            self.set_parameter(variables, self.graphs[1].wv, wv[0])
            self.set_parameter(variables, self.graphs[1].L, L[0])
            self.set_parameter(variables, self.graphs[1].LA, LA[0])

        return logL
Exemple #28
0
    def hybrid_forward(self, F, x, num=0, fix_cnn=False):
        # x = self.features(x)
        # x = self.output(x)
        if fix_cnn:
            with ag.pause():
                x = self.features[:7](x)
                x = self.features[7][0](x)
            x = self.features[7][1](x)
            x = self.features[8:](x)
            out = F.L2Normalization(x)
            feat = out
        else:
            x = self.features(x)
            out = F.L2Normalization(x)
            feat = out

        if self.fw:

            for i in range(num + 1):
                if i < num:
                    with ag.pause():
                        fc = eval('self.fc' + str(i + 2))
                        if i == 0:
                            output = fc(out)
                        else:
                            output = mx.nd.concat(output, fc(out), dim=1)
                else:
                    fc = eval('self.fc' + str(i + 2))
                    if i == 0:
                        output = fc(out)
                    else:
                        output = mx.nd.concat(output, fc(out), dim=1)
            return feat, output

        else:

            for i in range(num + 1):
                fc = eval('self.fc' + str(i + 2))
                if i == 0:
                    output = fc(out)
                else:
                    output = mx.nd.concat(output, fc(out), dim=1)
            return feat, output
 def forward(self, roi):
     F = mx.nd
     with autograd.pause():
         for i in range(self._rpn_train_pre_nms):
             if  roi[0,i,0] == -1:
                 #index.append([i])
                 break
             #rpn_index = F.Custom(roi, op_type='clip_rpn_box')
         roi = roi.slice_axis(axis=1, begin=0, end=i)
     return roi      
Exemple #30
0
 def add_batchid(self, F, bbox):
     with autograd.pause():
         roi_batchid = F.arange(0, self._max_batch,
                                repeat=self._max_roi).reshape(
                                    (-1, self._max_roi))
         roi_batchid = F.slice_like(roi_batchid, bbox * 0, axes=(0, 1))
         roi = F.concat(
             *[roi_batchid.reshape((-1, 1)),
               bbox.reshape((-1, 4))], dim=-1)
         return roi
Exemple #31
0
 def update(self, rpn_cls_label, pred_rpn_box_cls):
     with ag.pause():
         pred_rpn_box_argmax = pred_rpn_box_cls.reshape(2,
                                                        -1).argmax(axis=0)
         rpn_cls_label = rpn_cls_label.reshape(-1)
         mask = (rpn_cls_label != -1).astype('f')
         rpn_accu = mx.nd.sum(
             mask * (pred_rpn_box_argmax
                     == rpn_cls_label).astype('f')) / mx.nd.sum(mask)
     self.preds.append(rpn_accu.asscalar())
    def hybrid_forward(self, F, anchor, score, bbox_pred, img):
        """
        Generate proposals. Limit to batch-size=1 in current implementation.
        """
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms

        with autograd.pause():
            # restore bounding boxes
            roi = self._box_decoder(bbox_pred, self._box_to_center(anchor))

            # clip rois to image's boundary
            # roi = F.Custom(roi, img, op_type='bbox_clip_to_image')
            roi = self._clipper(roi, img)

            # remove bounding boxes that don't meet the min_size constraint
            # by setting them to (-1, -1, -1, -1)
            # width = roi.slice_axis(axis=-1, begin=2, end=3)
            # height = roi.slice_axis(axis=-1, begin=3, end=None)
            xmin, ymin, xmax, ymax = roi.split(axis=-1, num_outputs=4)
            width = xmax - xmin
            height = ymax - ymin
            # TODO:(zhreshold), there's im_ratio to handle here, but it requires
            # add' info, and we don't expect big difference
            invalid = (width < self._min_size) + (height < self._min_size)

            # # remove out of bound anchors
            # axmin, aymin, axmax, aymax = F.split(anchor, axis=-1, num_outputs=4)
            # # it's a bit tricky to get right/bottom boundary in hybridblock
            # wrange = F.arange(0, 2560).reshape((1, 1, 1, 2560)).slice_like(
            #    img, axes=(3)).max().reshape((1, 1, 1))
            # hrange = F.arange(0, 2560).reshape((1, 1, 2560, 1)).slice_like(
            #    img, axes=(2)).max().reshape((1, 1, 1))
            # invalid = (axmin < 0) + (aymin < 0) + F.broadcast_greater(axmax, wrange) + \
            #    F.broadcast_greater(aymax, hrange)
            # avoid invalid anchors suppress anchors with 0 confidence
            score = F.where(invalid, F.ones_like(invalid) * -1, score)
            invalid = F.repeat(invalid, axis=-1, repeats=4)
            roi = F.where(invalid, F.ones_like(invalid) * -1, roi)

            # Non-maximum suppression
            pre = F.concat(score, roi, dim=-1)
            tmp = F.contrib.box_nms(pre, overlap_thresh=self._nms_thresh, topk=pre_nms,
                                    coord_start=1, score_index=0, id_index=-1, force_suppress=True)

            # slice post_nms number of boxes
            result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
            rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_bbox = F.slice_axis(result, axis=-1, begin=1, end=None)

        return rpn_scores, rpn_bbox
    def hybrid_forward(self, F, box_preds, gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t):
        """Short summary.

        Parameters
        ----------
        F : mxnet.nd or mxnet.sym
            `F` is mxnet.sym if hybridized or mxnet.nd if not.
        box_preds : mxnet.nd.NDArray
            Predicted bounding boxes.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth bounding boxes.
        obj_t : mxnet.nd.NDArray
            Prefetched Objectness targets.
        centers_t : mxnet.nd.NDArray
            Prefetched regression target for center x and y.
        scales_t : mxnet.nd.NDArray
            Prefetched regression target for scale x and y.
        weights_t : mxnet.nd.NDArray
            Prefetched element-wise gradient weights for center_targets and scale_targets.
        clas_t : mxnet.nd.NDArray
            Prefetched one-hot vector for classification.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        with autograd.pause():
            dynamic_t = self._dynamic_target(box_preds, gt_boxes)
            # use fixed target to override dynamic targets
            obj, centers, scales, weights, clas = zip(
                dynamic_t, [obj_t, centers_t, scales_t, weights_t, clas_t])
            mask = obj[1] > 0
            objectness = F.where(mask, obj[1], obj[0])
            mask2 = mask.tile(reps=(2,))
            center_targets = F.where(mask2, centers[1], centers[0])
            scale_targets = F.where(mask2, scales[1], scales[0])
            weights = F.where(mask2, weights[1], weights[0])
            mask3 = mask.tile(reps=(self._num_class,))
            class_targets = F.where(mask3, clas[1], clas[0])
            smooth_weight = 1. / self._num_class
            if self._label_smooth:
                smooth_weight = 1. / self._num_class
                class_targets = F.where(
                    class_targets > 0.5, class_targets - smooth_weight, class_targets)
                class_targets = F.where(
                    class_targets < -0.5, class_targets, F.ones_like(class_targets) * smooth_weight)
            class_mask = mask.tile(reps=(self._num_class,)) * (class_targets >= 0)
            return [F.stop_gradient(x) for x in [objectness, center_targets, scale_targets,
                                                 weights, class_targets, class_mask]]
Exemple #34
0
    def _spectral_norm(self):
        """ spectral normalization """
        w = self.params.get('weight').data(self.ctx)
        w_mat = nd.reshape(w, [w.shape[0], -1])

        _u = self.u.data(self.ctx)
        _v = None

        for _ in range(POWER_ITERATION):
            _v = nd.L2Normalization(nd.dot(_u, w_mat))
            _u = nd.L2Normalization(nd.dot(_v, w_mat.T))

        sigma = nd.sum(nd.dot(_u, w_mat) * _v)
        if sigma == 0.:
            sigma = EPSILON

        with autograd.pause():
            self.u.set_data(_u)

        return w / sigma
    def forward(self, bbox, anchor, width, height):
        """
        RPNTargetGenerator is only used in data transform with no batch dimension.
        Be careful there's numpy operations inside

        Parameters
        ----------
        bbox: (M, 4) ground truth boxes with corner encoding.
        anchor: (N, 4) anchor boxes with corner encoding.
        width: int width of input image
        height: int height of input image

        Returns
        -------
        cls_target: (N,) value +1: pos, 0: neg, -1: ignore
        box_target: (N, 4) only anchors whose cls_target > 0 has nonzero box target
        box_mask: (N, 4) only anchors whose cls_target > 0 has nonzero mask

        """
        F = mx.nd
        with autograd.pause():
            # calculate ious between (N, 4) anchors and (M, 4) bbox ground-truths
            # ious is (N, M)
            ious = mx.nd.contrib.box_iou(anchor, bbox, format='corner')

            # mask out invalid anchors, (N, 4)
            a_xmin, a_ymin, a_xmax, a_ymax = F.split(anchor, num_outputs=4, axis=-1)
            invalid_mask = (a_xmin < 0) + (a_ymin < 0) + (a_xmax >= width) + (a_ymax >= height)
            invalid_mask = F.repeat(invalid_mask, repeats=bbox.shape[0], axis=-1)
            ious = F.where(invalid_mask, mx.nd.ones_like(ious) * -1, ious)

            samples, matches = self._sampler(ious)

            # training targets for RPN
            cls_target, _ = self._cls_encoder(samples)
            box_target, box_mask = self._box_encoder(
                samples.expand_dims(axis=0), matches.expand_dims(0),
                anchor.expand_dims(axis=0), bbox.expand_dims(0))
        return cls_target, box_target[0], box_mask[0]
Exemple #36
0
 def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var):
     """Hybrid forward"""
     if not autograd.is_training():
         return F.BatchNorm(x, gamma, beta, running_mean, running_var, name='fwd',
                            **self._kwargs)
     isum, isqu = F.SumSquare(x)
     #isum = x.sum(axis=1, exclude=True)
     #isqu = (x**2).sum(axis=1, exclude=True)
     N = self.ndevices * x.shape[0] * x.shape[2] * x.shape[3]
     allreduce = AllReduce(self._prefix)
     osum, osqu = allreduce(isum, isqu)
     # calc mean and std
     mean = osum / N
     sumvar = osqu - osum * osum / N
     bias_var = sumvar / N
     std = F.sqrt(F.maximum(bias_var, self.eps))
     # update running mean and var
     with autograd.pause():
         unbias_var = sumvar / (N - 1)
         self.updater(self.running_mean, self.running_var, mean, unbias_var,
                      self.momentum, x.context)
     # update running mean and var
     output = F.DecoupleBatchNorm(x, gamma, beta, mean, std)
     return output
    def hybrid_forward(self, F, x, gt_box=None):
        """Forward Faster-RCNN network.

        The behavior during traing and inference is different.

        Parameters
        ----------
        x : mxnet.nd.NDArray or mxnet.symbol
            The network input tensor.
        gt_box : type, only required during training
            The ground-truth bbox tensor with shape (1, N, 4).

        Returns
        -------
        (ids, scores, bboxes)
            During inference, returns final class id, confidence scores, bounding
            boxes.

        """
        def _split(x, axis, num_outputs, squeeze_axis):
            x = F.split(x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis)
            if isinstance(x, list):
                return x
            else:
                return [x]

        feat = self.features(x)
        # RPN proposals
        if autograd.is_training():
            rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors = \
                self.rpn(feat, F.zeros_like(x))
            rpn_box, samples, matches = self.sampler(rpn_box, rpn_score, gt_box)
        else:
            _, rpn_box = self.rpn(feat, F.zeros_like(x))

        # create batchid for roi
        num_roi = self._num_sample if autograd.is_training() else self._rpn_test_post_nms
        with autograd.pause():
            roi_batchid = F.arange(0, self._max_batch, repeat=num_roi)
            # remove batch dim because ROIPooling require 2d input
            rpn_roi = F.concat(*[roi_batchid.reshape((-1, 1)), rpn_box.reshape((-1, 4))], dim=-1)
            rpn_roi = F.stop_gradient(rpn_roi)

        # ROI features
        if self._roi_mode == 'pool':
            pooled_feat = F.ROIPooling(feat, rpn_roi, self._roi_size, 1. / self._stride)
        elif self._roi_mode == 'align':
            pooled_feat = F.contrib.ROIAlign(feat, rpn_roi, self._roi_size, 1. / self._stride,
                                             sample_ratio=2)
        else:
            raise ValueError("Invalid roi mode: {}".format(self._roi_mode))

        # RCNN prediction
        top_feat = self.top_features(pooled_feat)
        avg_feat = self.global_avg_pool(top_feat)
        cls_pred = self.class_predictor(avg_feat)
        box_pred = self.box_predictor(avg_feat)
        # cls_pred (B * N, C) -> (B, N, C)
        cls_pred = cls_pred.reshape((self._max_batch, num_roi, self.num_class + 1))
        # box_pred (B * N, C * 4) -> (B, N, C, 4)
        box_pred = box_pred.reshape((self._max_batch, num_roi, self.num_class, 4))

        # no need to convert bounding boxes in training, just return
        if autograd.is_training():
            if self._additional_output:
                return (cls_pred, box_pred, rpn_box, samples, matches,
                        raw_rpn_score, raw_rpn_box, anchors, top_feat)
            return (cls_pred, box_pred, rpn_box, samples, matches,
                    raw_rpn_score, raw_rpn_box, anchors)

        # cls_ids (B, N, C), scores (B, N, C)
        cls_ids, scores = self.cls_decoder(F.softmax(cls_pred, axis=-1))
        # cls_ids, scores (B, N, C) -> (B, C, N) -> (B, C, N, 1)
        cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1))
        scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1))
        # box_pred (B, N, C, 4) -> (B, C, N, 4)
        box_pred = box_pred.transpose((0, 2, 1, 3))

        # rpn_boxes (B, N, 4) -> B * (1, N, 4)
        rpn_boxes = _split(rpn_box, axis=0, num_outputs=self._max_batch, squeeze_axis=False)
        # cls_ids, scores (B, C, N, 1) -> B * (C, N, 1)
        cls_ids = _split(cls_ids, axis=0, num_outputs=self._max_batch, squeeze_axis=True)
        scores = _split(scores, axis=0, num_outputs=self._max_batch, squeeze_axis=True)
        # box_preds (B, C, N, 4) -> B * (C, N, 4)
        box_preds = _split(box_pred, axis=0, num_outputs=self._max_batch, squeeze_axis=True)

        # per batch predict, nms, each class has topk outputs
        results = []
        for rpn_box, cls_id, score, box_pred in zip(rpn_boxes, cls_ids, scores, box_preds):
            # box_pred (C, N, 4) rpn_box (1, N, 4) -> bbox (C, N, 4)
            bbox = self.box_decoder(box_pred, self.box_to_center(rpn_box))
            # res (C, N, 6)
            res = F.concat(*[cls_id, score, bbox], dim=-1)
            # res (C, self.nms_topk, 6)
            res = F.contrib.box_nms(
                res, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.0001,
                id_index=0, score_index=1, coord_start=2, force_suppress=True)
            # res (C * self.nms_topk, 6)
            res = res.reshape((-3, 0))
            results.append(res)

        # result B * (C * topk, 6) -> (B, C * topk, 6)
        result = F.stack(*results, axis=0)
        ids = F.slice_axis(result, axis=-1, begin=0, end=1)
        scores = F.slice_axis(result, axis=-1, begin=1, end=2)
        bboxes = F.slice_axis(result, axis=-1, begin=2, end=6)
        if self._additional_output:
            return ids, scores, bboxes, feat
        return ids, scores, bboxes
Exemple #38
0
positive_weight = 5.0
negative_weight = 0.1
class_weight = 1.0
xywh_weight = 5.0

for epoch in range(maxEpoch):
    trainIter.reset()
    tic = time.time()
    for batchidx, batch in enumerate(trainIter):
        Y0 = batch.label[0].as_in_context(ctx)
        X = batch.data[0].as_in_context(ctx)
        with autograd.record():
            Y1 = net(X)
            predCls, predObj, predXYWH = parse_net_output(Y1,numClasses, box_per_cell)
            with autograd.pause(): #generate ground online
                boxMask, boxCls, boxObj, boxXYWH = parse_groundtruth_for_target(Y0,box_per_cell,predXYWH)
            if 0:
                lines = []
                for y in range(16):
                    for x in range(16):
                        a = boxMask[0,y,x,0,0].asnumpy()[0]
                        b = boxMask[0,y,x,1,0].asnumpy()[0]
                        c = '-'
                        #pdb.set_trace()
                        if a > 0.5:
                            c = boxXYWH[0,y,x,0,:].asnumpy().tolist()
                            c = ['%.2f'%cc for cc in c]
                            c = '-'.join(c)
                        elif b > 0.5:
                            c = boxXYWH[0,y,x,1,:].asnumpy().tolist()
Exemple #39
0
    def hybrid_forward(self, F, img, *x):
        """Forward RPN.

        The behavior during training and inference is different.

        Parameters
        ----------
        img : mxnet.nd.NDArray or mxnet.symbol
            The original input image.
        x : mxnet.nd.NDArray or mxnet.symbol(s)
            Feature tensor(s).

        Returns
        -------
        (rpn_score, rpn_box)
            Returns predicted scores and regions which are candidates of objects.

        """
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms
        anchors = []
        rpn_pre_nms_proposals = []
        raw_rpn_scores = []
        raw_rpn_boxes = []
        if self._multi_level:
            # Generate anchors in [P2, P3, P4, P5, P6] order
            for i, feat in enumerate(x):
                ag = self.anchor_generator[i]
                anchor = ag(feat)
                rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = \
                    self.rpn_head(feat)
                rpn_pre = self.region_proposer(anchor, rpn_score,
                                               rpn_box, img)
                anchors.append(anchor)
                rpn_pre_nms_proposals.append(rpn_pre)
                raw_rpn_scores.append(raw_rpn_score)
                raw_rpn_boxes.append(raw_rpn_box)
            rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1)
            raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1)
            raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1)
        else:
            x = x[0]
            anchors = self.anchor_generator(x)
            x = self.conv1(x)
            raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1))
            rpn_scores = F.sigmoid(F.stop_gradient(raw_rpn_scores))
            raw_rpn_boxes = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4))
            rpn_boxes = F.stop_gradient(raw_rpn_boxes)
            rpn_pre_nms_proposals = self.region_proposer(
                anchors, rpn_scores, rpn_boxes, img)

        # Non-maximum suppression
        with autograd.pause():
            tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self._nms_thresh,
                                    topk=pre_nms, coord_start=1, score_index=0, id_index=-1,
                                    force_suppress=True)

            # slice post_nms number of boxes
            result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
            rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None)

        if autograd.is_training():
            # return raw predictions as well in training for bp
            return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors
        return rpn_scores, rpn_boxes
    def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.
        xs : list of mxnet.nd.NDArray
            List of feature maps.
        anchors : mxnet.nd.NDArray
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray
            Pre-generated x and y offsets for YOLO3.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs.
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        assert isinstance(anchors, (list, tuple))
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0)
        assert isinstance(offsets, (list, tuple))
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)
        num_anchors = np.cumsum([a.size // 2 for a in anchors])
        num_offsets = np.cumsum([o.size // 2 for o in offsets])
        _offsets = [0] + num_offsets.tolist()
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(offsets)

        # orig image size
        orig_height = img.shape[2]
        orig_width = img.shape[3]
        with autograd.pause():
            # outputs
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0)
            center_targets = nd.zeros_like(shape_like)
            scale_targets = nd.zeros_like(center_targets)
            weights = nd.zeros_like(center_targets)
            objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0])
            class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores

            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1)
            anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1)  # zero center anchors
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)
            ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2))
            # real value is required to process, convert to Numpy
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1)  # (B, M)
            np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]]
            np_anchors = all_anchors.asnumpy()
            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.
            for b in range(matches.shape[0]):
                for m in range(matches.shape[1]):
                    if valid_gts[b, m] < 1:
                        break
                    match = int(matches[b, m])
                    nlayer = np.nonzero(num_anchors > match)[0][0]
                    height = xs[nlayer].shape[2]
                    width = xs[nlayer].shape[3]
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    # compute the location of the gt centers
                    loc_x = int(gtx / orig_width * width)
                    loc_y = int(gty / orig_height * height)
                    # write back to targets
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x  # tx
                    center_targets[b, index, match, 1] = gty / orig_height * height - loc_y  # ty
                    scale_targets[b, index, match, 0] = np.log(gtw / np_anchors[match, 0])
                    scale_targets[b, index, match, 1] = np.log(gth / np_anchors[match, 1])
                    weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height
                    objectness[b, index, match, 0] = (
                        np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1
            # since some stages won't see partial anchors, so we have to slice the correct targets
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors, num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors, num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors, num_offsets)
        return objectness, center_targets, scale_targets, weights, class_targets