コード例 #1
0
 def forward(self, x):
     
     # scale = self.weight.reshape(1, -1, 1, 1) * (1.0 / (self.running_var + self.eps).sqrt())
     mask = self.running_var >= 0
     scale = self.weight.reshape(1, -1, 1, 1) * (1.0 / F.sqrt(self.running_var * mask + self.eps))
     bias = self.bias.reshape(1, -1, 1, 1) - self.running_mean * scale
     return x * scale + bias
コード例 #2
0
ファイル: try.py プロジェクト: Songtingt/MgeEditing
def get_cls_reg_ctr_targets(points, gt_bboxes, bbox_scale = 0.25):
    """
        Compute regression, classification targets for points in multiple images.
        Args:
            points (Tensor): (1, 2, 19, 19).
            gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format.
        Returns:
            cls_labels (Tensor): Labels. (B, 1, 19, 19)   0 or 1, 0 means background, 1 means in the box.
            bbox_targets (Tensor): BBox targets. (B, 4, 19, 19)  only consider the foreground, for the background should set loss as 0!
            centerness_targets (Tensor): (B, 1, 19, 19)  only consider the foreground, for the background should set loss as 0!
    """
    gt_bboxes = F.add_axis(gt_bboxes, axis=-1)
    gt_bboxes = F.add_axis(gt_bboxes, axis=-1)  # (B,4,1,1)
    # cls_labels
    # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/2
    gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1-bbox_scale) / 2
    up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap
    left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap
    down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap
    right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap
    cls_labels = up_bound * left_bound * down_bound * right_bound
    cls_labels = F.add_axis(cls_labels, axis=1)  # (B,1,19,19)

    # bbox_targets
    # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数)
    up_left = points - gt_bboxes[:, 0:2, ...]  # (B, 2, 19, 19)
    bottom_right = gt_bboxes[:, 2:4, ...] - points
    bbox_targets = F.concat([up_left, bottom_right], axis = 1)  # (B, 4, 19, 19)

    # centerness_targets
    up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum(up_left[:, 0, ...], bottom_right[:, 0, ...])
    left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum(up_left[:, 1, ...], bottom_right[:, 1, ...])
    centerness_targets = F.sqrt(F.abs(up_bottom * left_right))
    return cls_labels, bbox_targets, centerness_targets
コード例 #3
0
ファイル: test_rng.py プロジェクト: mozre/MegEngine
def test_GammaRNG():
    m1 = RNG(seed=111, device="xpu0")
    m2 = RNG(seed=111, device="xpu1")
    m3 = RNG(seed=222, device="xpu0")
    out1 = m1.gamma(2, size=(100, ))
    out1_ = m1.uniform(size=(100, ))
    out2 = m2.gamma(2, size=(100, ))
    out3 = m3.gamma(2, size=(100, ))

    np.testing.assert_allclose(out1.numpy(), out2.numpy(), atol=1e-6)
    assert out1.device == "xpu0" and out2.device == "xpu1"
    assert not (out1.numpy() == out3.numpy()).all()
    assert not (out1.numpy() == out1_.numpy()).all()

    shape = Tensor([[2, 3, 4], [9, 10, 11]], dtype=np.float32, device="xpu0")
    scale = Tensor([0.5, 1, 1.5], dtype=np.float32, device="xpu0")
    expected_mean = (shape * scale).numpy()
    expected_std = (F.sqrt(shape) * scale).numpy()
    out = m1.gamma(shape=shape, scale=scale, size=(20, 30, 40))
    out_shp = out.shape
    if isinstance(out_shp, tuple):
        assert out_shp == (20, 30, 40, 2, 3)
    else:
        assert all(out.shape.numpy() == np.array([20, 30, 40, 2, 3]))
    assert (np.abs(out.mean(axis=(0, 1)).numpy() - expected_mean) /
            expected_std).mean() < 0.1
    assert (np.abs(np.std(out.numpy(), axis=(0, 1)) -
                   expected_std)).mean() < 0.1
コード例 #4
0
ファイル: test_rng.py プロジェクト: mozre/MegEngine
def test_BetaRNG():
    m1 = RNG(seed=111, device="xpu0")
    m2 = RNG(seed=111, device="xpu1")
    m3 = RNG(seed=222, device="xpu0")
    out1 = m1.beta(2, 1, size=(100, ))
    out1_ = m1.uniform(size=(100, ))
    out2 = m2.beta(2, 1, size=(100, ))
    out3 = m3.beta(2, 1, size=(100, ))

    np.testing.assert_allclose(out1.numpy(), out2.numpy(), atol=1e-6)
    assert out1.device == "xpu0" and out2.device == "xpu1"
    assert not (out1.numpy() == out3.numpy()).all()
    assert not (out1.numpy() == out1_.numpy()).all()

    alpha = Tensor([[2, 3, 4], [9, 10, 11]], dtype=np.float32, device="xpu0")
    beta = Tensor([0.5, 1, 1.5], dtype=np.float32, device="xpu0")
    expected_mean = (alpha / (alpha + beta)).numpy()
    expected_std = (F.sqrt(alpha * beta / (F.pow(alpha + beta, 2) *
                                           (alpha + beta + 1)))).numpy()
    out = m1.beta(alpha=alpha, beta=beta, size=(20, 30))
    out_shp = out.shape
    if isinstance(out_shp, tuple):
        assert out_shp == (20, 30, 2, 3)
    else:
        assert all(out.shape.numpy() == np.array([20, 30, 2, 3]))
    assert (np.abs(out.mean(axis=(0, 1)).numpy() - expected_mean) /
            expected_std).mean() < 0.1
    assert (np.abs(np.std(out.numpy(), axis=(0, 1)) -
                   expected_std)).mean() < 0.1
コード例 #5
0
 def get_plane_anchors(self, anchor_scales: np.ndarray):
     """get anchors per location on feature map.
     The anchor number is anchor_scales x anchor_ratios
     """
     base_anchor = Tensor([0, 0, self.base_size - 1, self.base_size - 1])
     base_anchor = base_anchor.reshape(1, -1)
     w, h, x_ctr, y_ctr = self._whctrs(base_anchor)
     # ratio enumerate
     size = w * h
     size_ratios = size / self.anchor_ratios
     #pdb.set_trace()
     ws = F.sqrt(size_ratios)
     hs = ws * self.anchor_ratios
     # ws = size_ratios.sqrt().round()
     # hs = (ws * self.anchor_ratios).round()
     # scale enumerate
     anchor_scales = anchor_scales.reshape(1, -1).astype(np.float32)
     ws = F.expand_dims(ws, 1)
     hs = F.expand_dims(hs, 1)
     ws = (ws * anchor_scales).reshape(-1, 1)
     hs = (hs * anchor_scales).reshape(-1, 1)
     # make anchors
     anchors = F.concat(
         [
             x_ctr - 0.5 * (ws - 1),
             y_ctr - 0.5 * (hs - 1),
             x_ctr + 0.5 * (ws - 1),
             y_ctr + 0.5 * (hs - 1),
         ],
         axis=1,
     )
     return anchors.astype(np.float32)
コード例 #6
0
    def _ternary_transform_mge(image):
        n, c, h, w = image.shape
        if c == 3:
            R, G, B = F.split(image, 3, 1)
            intensities = (0.2989 * R + 0.5870 * G + 0.1140 * B
                           )  # * 255  # convert to gray
        elif c == 1:
            intensities = image
        else:
            raise ValueError('image channel should be 3 or 1: %s' % c)
        # intensities = tf.image.rgb_to_grayscale(image) * 255
        out_channels = patch_size * patch_size
        w = np.eye(out_channels).reshape(
            (patch_size, patch_size, 1, out_channels))  # h,w,1,out_c
        w_ = np.transpose(w, (3, 2, 0, 1))  # 1,out_c,h,w
        # weight = torch.from_numpy(w_).float()
        weight = mge.tensor(w_.astype(np.float32))  # need check cuda?

        # if image.is_cuda:
        #     weight = weight.cuda()
        # patches_torch = torch.conv2d(input=out_channels, weight=weight, bias=None, stride=[1, 1], padding=[max_distance, max_distance])
        patches_mge = F.nn.conv2d(inp=intensities,
                                  weight=weight,
                                  bias=None,
                                  stride=[1, 1],
                                  padding=[max_distance, max_distance])
        transf_mge = patches_mge - intensities
        transf_norm_mge = transf_mge / F.sqrt(0.81 + transf_mge**2)
        return transf_norm_mge
コード例 #7
0
ファイル: pooler.py プロジェクト: zzh7982/Models
def roi_pool(
    rpn_fms,
    rois,
    stride,
    pool_shape,
    pooler_type="roi_align",
):
    rois = rois.detach()
    assert len(stride) == len(rpn_fms)
    canonical_level = 4
    canonical_box_size = 224
    min_level = int(math.log2(stride[0]))
    max_level = int(math.log2(stride[-1]))

    num_fms = len(rpn_fms)
    box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])
    assigned_level = F.floor(canonical_level +
                             F.log(F.sqrt(box_area) / canonical_box_size) /
                             np.log(2)).astype("int32")
    assigned_level = F.minimum(assigned_level, max_level)
    assigned_level = F.maximum(assigned_level, min_level)
    assigned_level = assigned_level - min_level

    # avoid empty assignment
    assigned_level = F.concat([
        assigned_level,
        F.arange(num_fms, dtype="int32", device=assigned_level.device)
    ], )
    rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))])

    pool_list, inds_list = [], []
    for i in range(num_fms):
        _, inds = F.cond_take(assigned_level == i, assigned_level)
        level_rois = rois[inds]

        if pooler_type == "roi_pool":
            pool_fm = F.nn.roi_pooling(rpn_fms[i],
                                       level_rois,
                                       pool_shape,
                                       mode="max",
                                       scale=1.0 / stride[i])
        elif pooler_type == "roi_align":
            pool_fm = F.nn.roi_align(
                rpn_fms[i],
                level_rois,
                pool_shape,
                mode="average",
                spatial_scale=1.0 / stride[i],
                sample_points=2,
                aligned=True,
            )
        pool_list.append(pool_fm)
        inds_list.append(inds)

    fm_order = F.argsort(F.concat(inds_list, axis=0))
    pool_feature = F.concat(pool_list, axis=0)
    pool_feature = pool_feature[fm_order][:-num_fms]

    return pool_feature
コード例 #8
0
ファイル: frn.py プロジェクト: Feynman1999/MAI-VSR-Diggers
 def forward(self, x):
     B, C, _, _ = x.shape
     # avg_dims = tuple(range(2, len(x.shape)))  # [2 ,3 ]
     nu2 = F.expand_dims(F.pow(x, 2).reshape(B, C, -1).mean(axis=-1,
                                                            keepdims=True),
                         axis=-1)  # [B, C, 1, 1]
     x = x / F.sqrt(nu2 + F.abs(self.eps))
     return F.maximum(self.gamma * x + self.beta, self.tau)
コード例 #9
0
def layernorm(x):
    original_shape = x.shape
    x = x.reshape(original_shape[0], -1)
    m = F.mean(x, axis=1, keepdims=True)
    v = F.mean((x - m)**2, axis=1, keepdims=True)
    x = (x - m) / F.maximum(F.sqrt(v), 1e-6)
    x = x.reshape(original_shape)
    return x
コード例 #10
0
def gelu(x):
    """Implementation of the gelu activation function.
        For information: OpenAI GPT's gelu is slightly different
        (and gives slightly different results):
        x * 0.5 * (1.0 + F.tanh((F.sqrt(2 / math.pi) * (x + 0.044715 * (x **  3)))))
        Also see https://arxiv.org/abs/1606.08415
    """
    return x * 0.5 * (1.0 + F.tanh(F.sqrt(2 / math.pi) * (x + 0.044715 * (x ** 3))))
コード例 #11
0
 def calc(self, X, Y, mask=None):
     diff = X - Y
     error = F.sqrt(diff * diff + self.eps)
     if mask is not None:
         error = error * mask
     if self.reduction == "mean":
         loss = F.mean(error)
     else:
         loss = F.sum(error)
     return loss
コード例 #12
0
    def forward(self, x):
        output = x.reshape(x.shape[0], self.num_groups, -1)
        mean = F.mean(output, axis=2, keepdims=True)
        mean2 = F.mean(output**2, axis=2, keepdims=True)
        var = mean2 - mean * mean

        output = (output - mean) / F.sqrt(var + self.eps)
        output = output.reshape(x.shape)
        if self.affine:
            output = self.weight.reshape(1, -1, 1, 1) * output + \
                self.bias.reshape(1, -1, 1, 1)

        return output
コード例 #13
0
def fold_linear_bn(linear_weight, linear_bias, gamma, beta, bn_mean, bn_var, eps):
    linear_bias = linear_bias.reshape(1, -1)
    gamma = gamma.reshape(1, -1)
    beta = beta.reshape(1, -1)
    bn_mean = bn_mean.reshape(1, -1)
    bn_var = bn_var.reshape(1, -1)
    # bn_istd = 1 / bn_std
    bn_istd = 1.0 / sqrt(bn_var + eps)  # type: ignore[attr-defined]
    # w_fold = gamma / bn_std * W
    scale_factor = gamma * bn_istd
    w_fold = linear_weight * scale_factor.reshape(-1, 1)
    b_fold = beta + gamma * (linear_bias - bn_mean) * bn_istd
    return w_fold, b_fold
コード例 #14
0
ファイル: roi_pool.py プロジェクト: zymale/CrowdDetection
def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', 
             labels=None, bbox_targets=None):
    assert len(stride) == len(rpn_fms)
    canonical_level = 4
    canonical_box_size = 224
    min_level = math.log2(stride[0])
    max_level = math.log2(stride[-1])

    num_fms = len(rpn_fms)
    box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))
    level_assignments = F.floor(
	canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2)
    )
    level_assignments = F.minimum(level_assignments, max_level)
    level_assignments = F.maximum(level_assignments, min_level)
    level_assignments = level_assignments - min_level
    available_masks = F.concat(
        [mge.ones(level_assignments.shapeof()[0]), mge.zeros(num_fms)], axis=0)
    level_assignments = F.concat([level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0)
    rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof()[-1]))], axis=0)
    if labels is not None:
        labels = F.concat([labels, mge.ones((num_fms, labels.shapeof()[-1]))], axis=0)
        bbox_targets = F.concat([bbox_targets, mge.zeros((num_fms, bbox_targets.shapeof()[-1]))], axis=0)
    pool_list, inds_list = [], []
    for i in range(len(rpn_fms)):
        mask = level_assignments == i
        inds = mask_to_inds(mask)
        rois_fm = rois.ai[inds]
        if roi_type == 'roi_pool':
            pool_fm = F.roi_pooling(
                    rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0/stride[i])
        elif roi_type == 'roi_align':
            pool_fm = F.roi_align(
                    rpn_fms[i], rois_fm, pool_shape, mode='average', 
                    spatial_scale=1.0/stride[i], sample_points=2, aligned=True)
        pool_list.append(pool_fm)
        inds_list.append(inds)

    fm_order = F.concat(inds_list, axis=0)
    pool_feature = F.concat(pool_list, axis=0)

    ordered_available_masks = available_masks.ai[fm_order]
    available_inds = mask_to_inds(ordered_available_masks)
    pool_feature = pool_feature.ai[available_inds]
    rois = rois.ai[fm_order, :].ai[available_inds, :]
    if labels is not None:
        labels = labels.ai[fm_order].ai[available_inds]
        bbox_targets = bbox_targets.ai[fm_order, :].ai[available_inds, :]
        return pool_feature, rois, F.zero_grad(labels), F.zero_grad(bbox_targets)
    else:
        return pool_feature, rois, None, None
コード例 #15
0
    def forward(self, x):
        N, C, H, W = x.shape
        assert C == self.num_channels
        x = x.reshape(N, C, -1)
        mean = x.mean(axis=2, keepdims=True)
        var = (x**2).mean(axis=2, keepdims=True) - mean * mean

        x = (x - mean) / F.sqrt(var + self.eps)
        x = x.reshape(N, C, H, W)
        if self.affine:
            x = self.weight.reshape(1, -1, 1, 1) * x + self.bias.reshape(
                1, -1, 1, 1)

        return x
コード例 #16
0
    def get_sample_code(self, gaussian, mean, var, onehot):

        #z = mge.random.gaussian(mean.shape, mean=0, std=1)
        #mean = mean.reshape(*mean.shape, 1, 1)
        #mean = F.add_axis(F.add_axis(mean, 2), 3)
        #var = F.add_axis(F.add_axis(var, 2), 3)

        z = gaussian
        z = z * F.sqrt(var) + mean

        print('gaussian, mean, var, z', gaussian.shape, mean.shape, var.shape,
              z.shape)
        z = F.concat([z, onehot], axis=1)

        return z
コード例 #17
0
    def forward(self, x):
        N, C, H, W = x.shape
        assert C == self.num_channels
        x = x.reshape(x.shape[0], -1)
        # NOTE mean will keepdims in next two lines.
        mean = x.mean(axis=1, keepdims=1)
        var = (x**2).mean(axis=1, keepdims=1) - mean * mean

        x = (x - mean) / F.sqrt(var + self.eps)
        x = x.reshape(N, C, H, W)
        if self.affine:
            x = self.weight.reshape(1, -1, 1, 1) * x + self.bias.reshape(
                1, -1, 1, 1)

        return x
コード例 #18
0
    def get_cls_reg_ctr_targets(self, points, gt_bboxes, bbox_scale=0.15):
        """
            Compute regression, classification targets for points in multiple images.
            Args:
                points (Tensor): (1, 2, 37, 37). 每个点在原图上对应的点的位置
                gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. 左上角右下角 原图上的bbox框
            Returns:
                cls_labels (Tensor): Labels. (B, 1, 37, 37)   0 or 1, 0 means background, 1 means in the box.
                bbox_targets (Tensor): BBox targets. (B, 4, 37, 37)  only consider the foreground, for the background should set loss as 0!
                centerness_targets (Tensor): (B, 1, 37, 37)  only consider the foreground, for the background should set loss as 0!
        """
        B, _ = gt_bboxes.shape
        gt_bboxes = F.add_axis(gt_bboxes, axis=-1)
        gt_bboxes = F.add_axis(gt_bboxes, axis=-1)  # (B,4,1,1)
        # cls_labels
        # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/4
        gap = (gt_bboxes[:, 2, ...] -
               gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2  #求出bbox的边长
        up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap
        left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap
        down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap
        right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap
        cls_labels = up_bound * left_bound * down_bound * right_bound
        cls_labels = F.add_axis(cls_labels, axis=1)  # (B, 1, 37, 37)
        cls_labels.requires_grad = False

        # bbox_targets
        # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数)
        up_left = points - gt_bboxes[:, 0:2,
                                     ...]  # (B, 2, 37, 37) score map每个点和左上角点的差
        bottom_right = gt_bboxes[:, 2:4, ...] - points
        bbox_targets = F.concat([up_left, bottom_right],
                                axis=1)  # (B, 4, 37, 37)
        bbox_targets.requires_grad = False

        # centerness_targets
        up_bottom = F.minimum(up_left[:, 0, ...],
                              bottom_right[:, 0, ...]) / F.maximum(
                                  up_left[:, 0, ...], bottom_right[:, 0, ...])
        left_right = F.minimum(up_left[:, 1, ...],
                               bottom_right[:, 1, ...]) / F.maximum(
                                   up_left[:, 1, ...], bottom_right[:, 1, ...])
        centerness_targets = F.sqrt(F.abs(up_bottom * left_right))
        centerness_targets = F.add_axis(centerness_targets,
                                        axis=1)  # (B,1,37,37)
        centerness_targets.requires_grad = False
        return cls_labels, bbox_targets, centerness_targets
コード例 #19
0
def fold_conv_bn(
    conv_weight, conv_bias, conv_groups, gamma, beta, bn_mean, bn_var, eps
):
    conv_bias = conv_bias.reshape(1, -1, 1, 1)
    gamma = gamma.reshape(1, -1, 1, 1)
    beta = beta.reshape(1, -1, 1, 1)
    bn_mean = bn_mean.reshape(1, -1, 1, 1)
    bn_var = bn_var.reshape(1, -1, 1, 1)

    # bn_istd = 1 / bn_std
    bn_istd = 1.0 / sqrt(bn_var + eps)  # type: ignore[attr-defined]
    # w_fold = gamma / bn_std * W
    scale_factor = gamma * bn_istd
    if conv_groups == 1:
        w_fold = conv_weight * scale_factor.reshape(-1, 1, 1, 1)
    else:
        w_fold = conv_weight * scale_factor.reshape(conv_groups, -1, 1, 1, 1)
    # b_fold = gamma * (b - bn_mean) / bn_std + beta
    b_fold = beta + gamma * (conv_bias - bn_mean) * bn_istd

    return w_fold, b_fold
コード例 #20
0
ファイル: pixelwise_loss.py プロジェクト: xxoox168/MgeEditing
 def forward(self, X, Y):
     diff = X - Y
     error = F.sqrt(diff * diff + self.eps)
     loss = F.mean(error)
     return loss
コード例 #21
0
ファイル: utils.py プロジェクト: lhaippp/GyroFlow
def euclidean(t):
    return F.sqrt(F.sum(t**2, axis=(1, ), keepdims=True))
コード例 #22
0
ファイル: atss.py プロジェクト: zzh7982/Models
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        labels_list = []
        offsets_list = []
        ctrness_list = []

        all_level_anchors = F.concat(anchors_list, axis=0)
        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            ious = []
            candidate_idxs = []
            base = 0
            for stride, anchors_i in zip(self.cfg.stride, anchors_list):
                ious.append(
                    layers.get_iou(
                        gt_boxes[:, :4],
                        F.concat([
                            anchors_i - stride * self.cfg.anchor_scale / 2,
                            anchors_i + stride * self.cfg.anchor_scale / 2,
                        ],
                                 axis=1)))
                gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2
                distances = F.sqrt(
                    F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2,
                          axis=2))
                _, topk_idxs = F.topk(distances, self.cfg.anchor_topk)
                candidate_idxs.append(base + topk_idxs)
                base += anchors_i.shape[0]
            ious = F.concat(ious, axis=1)
            candidate_idxs = F.concat(candidate_idxs, axis=1)

            candidate_ious = F.gather(ious, 1, candidate_idxs)
            ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) +
                        F.std(candidate_ious, axis=1, keepdims=True))
            is_foreground = F.scatter(
                F.zeros(ious.shape), 1, candidate_idxs,
                F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr)

            is_in_boxes = F.min(self.point_coder.encode(
                all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)),
                                axis=2) > 0

            ious[~is_foreground] = -1
            ious[~is_in_boxes] = -1

            match_indices = F.argmax(ious, axis=0)
            gt_boxes_matched = gt_boxes[match_indices]
            anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0)

            labels = gt_boxes_matched[:, 4].astype(np.int32)
            labels[anchor_max_iou == -1] = 0
            offsets = self.point_coder.encode(all_level_anchors,
                                              gt_boxes_matched[:, :4])

            left_right = offsets[:, [0, 2]]
            top_bottom = offsets[:, [1, 3]]
            ctrness = F.sqrt(
                F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1),
                       lower=0) *
                F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1),
                       lower=0))

            labels_list.append(labels)
            offsets_list.append(offsets)
            ctrness_list.append(ctrness)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
            F.stack(ctrness_list, axis=0).detach(),
        )
コード例 #23
0
 def forward(self, x):
     scale = self.weight.reshape(
         1, -1, 1, 1) * (1.0 / F.sqrt(self.running_var + self.eps))
     bias = self.bias.reshape(1, -1, 1, 1) - self.running_mean * scale
     return x * scale.detach() + bias.detach()
コード例 #24
0
def isru(input, alpha):
    return input / (F.sqrt(1 + alpha * F.pow(input, 2)))
コード例 #25
0
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        labels_list = []
        offsets_list = []
        ctrness_list = []

        all_level_anchors = F.concat(anchors_list, axis=0)
        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            offsets = self.point_coder.encode(
                all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1))

            object_sizes_of_interest = F.concat([
                F.broadcast_to(
                    F.expand_dims(mge.tensor(size, dtype=np.float32), axis=0),
                    (anchors_i.shape[0], 2)) for anchors_i, size in zip(
                        anchors_list, self.cfg.object_sizes_of_interest)
            ],
                                                axis=0)
            max_offsets = F.max(offsets, axis=2)
            is_cared_in_the_level = (
                (max_offsets >= F.expand_dims(object_sizes_of_interest[:, 0],
                                              axis=0))
                & (max_offsets <= F.expand_dims(object_sizes_of_interest[:, 1],
                                                axis=0)))

            if self.cfg.center_sampling_radius > 0:
                gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2
                is_in_boxes = []
                for stride, anchors_i in zip(self.cfg.stride, anchors_list):
                    radius = stride * self.cfg.center_sampling_radius
                    center_boxes = F.concat([
                        F.maximum(gt_centers - radius, gt_boxes[:, :2]),
                        F.minimum(gt_centers + radius, gt_boxes[:, 2:4]),
                    ],
                                            axis=1)
                    center_offsets = self.point_coder.encode(
                        anchors_i, F.expand_dims(center_boxes, axis=1))
                    is_in_boxes.append(F.min(center_offsets, axis=2) > 0)
                is_in_boxes = F.concat(is_in_boxes, axis=1)
            else:
                is_in_boxes = F.min(offsets, axis=2) > 0

            gt_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] -
                                                           gt_boxes[:, 1])
            # FIXME: use repeat instead of broadcast_to
            areas = F.broadcast_to(F.expand_dims(gt_area, axis=1),
                                   offsets.shape[:2])
            areas[~is_cared_in_the_level] = float("inf")
            areas[~is_in_boxes] = float("inf")

            match_indices = F.argmin(areas, axis=0)
            gt_boxes_matched = gt_boxes[match_indices]
            anchor_min_area = F.indexing_one_hot(areas, match_indices, axis=0)

            labels = gt_boxes_matched[:, 4].astype(np.int32)
            labels[anchor_min_area == float("inf")] = 0
            offsets = self.point_coder.encode(all_level_anchors,
                                              gt_boxes_matched[:, :4])

            left_right = offsets[:, [0, 2]]
            top_bottom = offsets[:, [1, 3]]
            ctrness = F.sqrt(
                F.maximum(
                    F.min(left_right, axis=1) / F.max(left_right, axis=1), 0) *
                F.maximum(
                    F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), 0))

            labels_list.append(labels)
            offsets_list.append(offsets)
            ctrness_list.append(ctrness)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
            F.stack(ctrness_list, axis=0).detach(),
        )
コード例 #26
0
def _anchor_double_target(gt_boxes, im_info, all_anchors):

    gt_boxes, im_info = gt_boxes.detach(), im_info.detach()
    all_anchors = all_anchors.detach()

    gt_boxes = gt_boxes[:im_info[5].astype(np.int32), :]
    dummy = -F.ones([1, gt_boxes.shape[1]]).to(gt_boxes.device)
    gt_boxes = F.concat([gt_boxes, dummy], axis=0)
    valid_mask = 1 - (gt_boxes[:, 4] < 0).astype(np.float32)

    anchor_centers = _compute_center(all_anchors)
    gtboxes_centers = _compute_center(gt_boxes)
    # gtboxes_centers = gtboxes_centers * valid_mask.unsqueeze(1)
    gtboxes_centers = gtboxes_centers * F.expand_dims(valid_mask, axis=1)

    N, K = all_anchors.shape[0], gt_boxes.shape[0]
    an_centers = F.expand_dims(anchor_centers, axis=1)
    gt_centers = F.expand_dims(gtboxes_centers, axis=0)
    # an_centers = anchor_centers.unsqueeze(1).repeat(1, K, 1)
    # gt_centers = gtboxes_centers.unsqueeze(0).repeat(N, 1, 1)

    distance = F.abs(an_centers - gt_centers)
    distance = F.sqrt(F.pow(distance, 2).sum(axis=2))

    start = 0
    end = 5
    overlaps = box_overlap_opr(all_anchors[:, :4], gt_boxes[:, :4])
    overlaps *= F.expand_dims(valid_mask, axis=0)
    default_num = 16

    ious_list = []

    for l in range(start, end):

        _, index = F.cond_take(all_anchors[:, 4] == l, all_anchors[:, 4])

        level_dist = distance[index, :].transpose(1, 0)
        ious = overlaps[index, :].transpose(1, 0)
        sorted_index = F.argsort(level_dist, descending=False)
        n = min(sorted_index.shape[1], default_num)
        ious = F.gather(ious, 1, sorted_index[:, :n]).transpose(1, 0)

        ious_list.append(ious)

    ious = F.concat(ious_list, axis=0)
    mean_var = F.mean(ious, axis=0)
    std_var = F.std(ious, 0)
    iou_thresh_per_gt = mean_var + std_var

    iou_thresh_per_gt = F.maximum(iou_thresh_per_gt, 0.2)

    # limits the anchor centers in the gtboxes
    N, K = all_anchors.shape[0], gt_boxes.shape[0]
    anchor_points = an_centers
    pos_area = _compute_pos_area(gt_boxes, 0.3)
    # pos_area = pos_area.unsqueeze(0).repeat(N, 1, 1)
    pos_area = F.broadcast_to(F.expand_dims(pos_area, axis=0),
                              (N, K, pos_area.shape[-1]))

    l = anchor_points[:, :, 0] - pos_area[:, :, 0]
    r = pos_area[:, :, 2] - anchor_points[:, :, 0]
    t = anchor_points[:, :, 1] - pos_area[:, :, 1]
    b = pos_area[:, :, 3] - anchor_points[:, :, 1]

    is_in_gt = F.stack([l, r, t, b], axis=2)
    is_in_gt = is_in_gt.min(axis=2) > 0.1
    valid_mask = (overlaps >= F.expand_dims(
        iou_thresh_per_gt, axis=0)) * is_in_gt.astype(np.float32)
    ious = overlaps * valid_mask

    sorted_index = F.argsort(ious, 1)
    sorted_overlaps = F.gather(ious, 1, sorted_index)
    max_overlaps = sorted_overlaps[:, :2].flatten()
    argmax_overlaps = sorted_index[:, :2].flatten()

    n, c = all_anchors.shape
    device = all_anchors.device
    labels = -F.ones(2 * n).to(device)
    positive_mask = (max_overlaps >= 0.2).to(device).astype(np.float32)
    negative_mask = (max_overlaps < 0.2).to(device).astype(np.float32)
    labels = positive_mask + labels * (1 - positive_mask) * (1 - negative_mask)

    bbox_targets = gt_boxes[argmax_overlaps, :4]
    all_anchors = F.broadcast_to(F.expand_dims(all_anchors, axis=1),
                                 (n, 2, c)).reshape(-1, c)

    bbox_targets = bbox_transform_opr(all_anchors[:, :4], bbox_targets)

    labels_cat = gt_boxes[argmax_overlaps, 4]
    labels_cat = labels_cat * (1 - F.equal(labels, -1).astype(
        np.float32)) - F.equal(labels, -1).astype(np.float32)

    return labels, bbox_targets, labels_cat
コード例 #27
0
    def forward(self, image, im_info, gt_boxes=None):
        image = self.preprocess_image(image)
        features = self.backbone(image)
        features = [features[f] for f in self.in_features]

        box_logits, box_offsets, box_ctrness = self.head(features)

        box_logits_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1,
                                            self.cfg.num_classes)
            for _ in box_logits
        ]
        box_offsets_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4)
            for _ in box_offsets
        ]
        box_ctrness_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 1)
            for _ in box_ctrness
        ]

        anchors_list = self.anchor_generator(features)

        all_level_box_logits = F.concat(box_logits_list, axis=1)
        all_level_box_offsets = F.concat(box_offsets_list, axis=1)
        all_level_box_ctrness = F.concat(box_ctrness_list, axis=1)

        if self.training:
            gt_labels, gt_offsets, gt_ctrness = self.get_ground_truth(
                anchors_list,
                gt_boxes,
                im_info[:, 4].astype(np.int32),
            )

            all_level_box_logits = all_level_box_logits.reshape(
                -1, self.cfg.num_classes)
            all_level_box_offsets = all_level_box_offsets.reshape(-1, 4)
            all_level_box_ctrness = all_level_box_ctrness.flatten()

            gt_labels = gt_labels.flatten()
            gt_offsets = gt_offsets.reshape(-1, 4)
            gt_ctrness = gt_ctrness.flatten()

            valid_mask = gt_labels >= 0
            fg_mask = gt_labels > 0
            num_fg = fg_mask.sum()
            sum_ctr = gt_ctrness[fg_mask].sum()
            # add detach() to avoid syncing across ranks in backward
            num_fg = layers.all_reduce_mean(num_fg).detach()
            sum_ctr = layers.all_reduce_mean(sum_ctr).detach()

            gt_targets = F.zeros_like(all_level_box_logits)
            gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1

            loss_cls = layers.sigmoid_focal_loss(
                all_level_box_logits[valid_mask],
                gt_targets[valid_mask],
                alpha=self.cfg.focal_loss_alpha,
                gamma=self.cfg.focal_loss_gamma,
            ).sum() / F.maximum(num_fg, 1)

            loss_bbox = (layers.iou_loss(
                all_level_box_offsets[fg_mask],
                gt_offsets[fg_mask],
                box_mode="ltrb",
                loss_type=self.cfg.iou_loss_type,
            ) * gt_ctrness[fg_mask]).sum() / F.maximum(
                sum_ctr, 1e-5) * self.cfg.loss_bbox_weight

            loss_ctr = layers.binary_cross_entropy(
                all_level_box_ctrness[fg_mask],
                gt_ctrness[fg_mask],
            ).sum() / F.maximum(num_fg, 1)

            total = loss_cls + loss_bbox + loss_ctr
            loss_dict = {
                "total_loss": total,
                "loss_cls": loss_cls,
                "loss_bbox": loss_bbox,
                "loss_ctr": loss_ctr,
            }
            self.cfg.losses_keys = list(loss_dict.keys())
            return loss_dict
        else:
            # currently not support multi-batch testing
            assert image.shape[0] == 1

            all_level_anchors = F.concat(anchors_list, axis=0)
            pred_boxes = self.point_coder.decode(all_level_anchors,
                                                 all_level_box_offsets[0])
            pred_boxes = pred_boxes.reshape(-1, 4)

            scale_w = im_info[0, 1] / im_info[0, 3]
            scale_h = im_info[0, 0] / im_info[0, 2]
            pred_boxes = pred_boxes / F.concat(
                [scale_w, scale_h, scale_w, scale_h], axis=0)
            clipped_boxes = layers.get_clipped_boxes(pred_boxes,
                                                     im_info[0, 2:4]).reshape(
                                                         -1, 4)
            pred_score = F.sqrt(
                F.sigmoid(all_level_box_logits) *
                F.sigmoid(all_level_box_ctrness))[0]
            return pred_score, clipped_boxes
コード例 #28
0
def _anchor_target(gt_boxes, im_info, all_anchors):

    gt_boxes, im_info = gt_boxes.detach(), im_info.detach()
    all_anchors = all_anchors.detach()

    gt_boxes = gt_boxes[:im_info[5], :]
    valid_mask = 1 - (gt_boxes[:, 4] < 0).astype(np.float32)

    anchor_centers = _compute_center(all_anchors)
    gtboxes_centers = _compute_center(gt_boxes) * F.expand_dims(valid_mask,
                                                                axis=0)

    N, K = all_anchors.shape[0], gt_boxes.shape[0]
    # an_centers = anchor_centers.unsqueeze(1).repeat(1, K, 1)
    an_centers = F.expand_dims(anchor_centers, axis=1)
    gt_centers = F.expand_dims(gtboxes_centers, axis=0)
    # gt_centers = gtboxes_centers.unsqueeze(0).repeat(N, 1, 1)

    distance = F.abs(an_centers - gt_centers)
    distance = F.sqrt(F.pow(distance, 2).sum(axis=2))

    start = 0
    end = 5
    overlaps = box_overlap_opr(all_anchors[:, :4], gt_boxes[:, :4])
    overlaps = overlaps * valid_mask.unsqueeze(0)
    default_num = 9

    ious_list = []
    for l in range(start, end):

        index = torch.nonzero(all_anchors[:, 4].eq(l), as_tuple=False)[:, 0]
        level_dist = level_dist[index, :].transpose(1, 0)
        ious = distance[index, :].transpose(1, 0)
        sorted_index = torch.argsort(ious, 1, descending=False)
        n = min(default_num, sorted_index.shape[1])
        ious = torch.gather(ious, 1, sorted_index[:, :n]).transpose(1, 0)
        ious_list.append(ious)

    ious = F.concat(ious_list, axis=0)
    mean_var = ious.mean(0)
    std_var = ious.std(0)
    iou_thresh_per_gt = mean_var + std_var

    iou_thresh_per_gt = torch.clamp(iou_thresh_per_gt, 0.35)
    n = iou_thresh_per_gt.shape[0]

    # limits the anchor centers in the gtboxes
    N, K = all_anchors.shape[0], gt_boxes.shape[0]
    anchor_points = an_centers
    proxies = gt_boxes.unsqueeze(0).repeat(N, 1, 1)
    l = anchor_points[:, :, 0] - proxies[:, :, 0]
    r = proxies[:, :, 2] - anchor_points[:, :, 0]
    t = anchor_points[:, :, 1] - proxies[:, :, 1]
    b = proxies[:, :, 3] - anchor_points[:, :, 1]

    is_in_gt = F.stack([l, r, t, b], axis=2)
    is_in_gt = is_in_gt.min(axis=2) > 0.1
    valid_mask = (overlaps >= iou_thresh_per_gt.unsqueeze(0)) * is_in_gt
    ious = overlaps * valid_mask

    argmax_overlaps = torch.argmax(ious, axis=1)
    max_overlaps = torch.gather(ious, 1, argmax_overlaps.unsqueeze(1))

    n = all_anchors.shape[0]
    labels = -F.ones(n)
    positive_mask = max_overlaps > 0
    negative_mask = max_overlaps < config.rpn_negative_overlap
    labels = positive_mask + labels * (1 - positive_mask) * (1 - negative_mask)

    bbox_targets = gt_boxes[argmax_overlaps, :4]
    bbox_targets = bbox_transform_opr(all_anchors[:, :4], bbox_targets)

    labels_cat = gt_boxes[argmax_overlaps, 4]
    labels_cat = labels_cat * (1 - labels.eq(0).astype(np.float32))
    labels_cat = labels_cat * (1 - labels.eq(-1).astype(
        np.float32)) - labels.eq(-1).astype(np.float32)

    return labels, bbox_targets, labels_cat