def test_mem_stats(): memstat = MemStat("xpux:0", "xpux:1") F.arange(1024, device="xpux:0") mge._full_sync() assert 4096 <= memstat.get_max("xpux:0") == memstat.get_max("xpux:1") <= 4096 + 128
def create_anchor_grid(featmap_size, offsets, stride, device): step_x, step_y = featmap_size shift = offsets * stride grid_x = F.arange(shift, step_x * stride + shift, step=stride, device=device) grid_y = F.arange(shift, step_y * stride + shift, step=stride, device=device) grids_x, grids_y = meshgrid(grid_y, grid_x) return grids_x.reshape(-1), grids_y.reshape(-1)
def mesh_grid(B, H, W): # mesh grid x_base = F.arange(0, W) x_base = F.tile(x_base, (B, H, 1)) y_base = F.arange(0, H) # BHW y_base = F.tile(y_base, (B, W, 1)).transpose(0, 2, 1) base_grid = F.stack([x_base, y_base], 1) # B2HW return base_grid
def mesh_grid_mge(B, H, W): # mesh grid x_base = F.arange(0, W) x_base = F.tile(x_base, (B, H, 1)) y_base = F.arange(0, H) # BHW y_base = F.tile(y_base, (B, W, 1)).transpose(0, 2, 1) ones = F.ones_like(x_base) base_grid = F.stack([x_base, y_base, ones], 1) # B3HW return base_grid
def decode_outputs(self, outputs): grids = [] strides = [] for (hsize, wsize), stride in zip(self.hw, self.strides): xv, yv = meshgrid(F.arange(hsize), F.arange(wsize)) grid = F.stack((xv, yv), 2).reshape(1, -1, 2) grids.append(grid) shape = grid.shape[:2] strides.append(F.full((*shape, 1), stride)) grids = F.concat(grids, axis=1) strides = F.concat(strides, axis=1) outputs[..., :2] = (outputs[..., :2] + grids) * strides outputs[..., 2:4] = F.exp(outputs[..., 2:4]) * strides return outputs
def forward(self, mid, ref): B, C, H, W = mid.shape mid = F.normalize(mid, p=2, axis=1) ref = F.normalize(ref, p=2, axis=1) cost_volume, ref = compute_cost_volume( mid, ref, max_displacement=self.d) # [B, (2d+1)**2, H, W] cost_volume = F.dimshuffle(cost_volume, (0, 2, 3, 1)) cost_volume = cost_volume.reshape((-1, (2 * self.d + 1)**2)) # argmax indices = F.top_k(cost_volume, k=self.K, descending=True)[1] # [B*H*W, K] del cost_volume ref_list = [] # [B, C, H, W] origin_i_j = F.arange(0, H * W, 1) # float32 origin_i = F.floor(origin_i_j / W) # (H*W, ) origin_j = F.mod(origin_i_j, W) # (H*W, ) del origin_i_j # reshape ref ref = ref.reshape((B, C, (H + 2 * self.d) * (W + 2 * self.d))) for i in range(self.K): index = indices[:, i] # [B*H*W, ] index = index.reshape((-1, H * W)) index_i = F.floor(index / (2 * self.d + 1)) + origin_i # [B, H*W] index_j = F.mod(index, (2 * self.d + 1)) + origin_j # [B, H*W] # 根据每个pixel的i,j 算出index index = index_i * W + index_j # [B, H*W] index = index.astype('int32') # add axis index = F.add_axis(index, axis=1) # [B, 1, H*W] # broadcast index = F.broadcast_to(index, (B, C, H * W)) # gather output = F.gather(ref, axis=2, index=index) # [B, C, H*W] ref_list.append(output.reshape((B, C, H, W))) return self.conv(F.concat(ref_list, axis=1))
def fun(inp): shape = inp.shape H = shape[-1] NH = H * 8 + 4 arr = F.arange(4, NH, 8) arr_shape = arr.shape return arr_shape[0]
def roi_pool( rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", ): rois = rois.detach() assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = int(math.log2(stride[0])) max_level = int(math.log2(stride[-1])) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) assigned_level = F.floor(canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / np.log(2)).astype("int32") assigned_level = F.minimum(assigned_level, max_level) assigned_level = F.maximum(assigned_level, min_level) assigned_level = assigned_level - min_level # avoid empty assignment assigned_level = F.concat([ assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device) ], ) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) pool_list, inds_list = [], [] for i in range(num_fms): _, inds = F.cond_take(assigned_level == i, assigned_level) level_rois = rois[inds] if pooler_type == "roi_pool": pool_fm = F.nn.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif pooler_type == "roi_align": pool_fm = F.nn.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.argsort(F.concat(inds_list, axis=0)) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature[fm_order][:-num_fms] return pool_feature
def test_slice(): @trace def f(x): return x[:, 1::2] x = F.arange(8).reshape(2, 4) f(x) y = f(x) np.testing.assert_array_equal(y.numpy(), x.numpy()[:, 1::2]) y + y
def get_focal_loss( logits: Tensor, labels: Tensor, ignore_label: int = -1, background: int = 0, alpha: float = 0.5, gamma: float = 0, norm_type: str = "fg", ) -> Tensor: r"""Focal Loss for Dense Object Detection: <https://arxiv.org/pdf/1708.02002.pdf> .. math:: FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t) Args: logits (Tensor): the predicted logits with the shape of :math:`(B, A, C)` labels (Tensor): the assigned labels of boxes with shape of :math:`(B, A)` ignore_label (int): the value of ignore class. Default: -1 background (int): the value of background class. Default: 0 alpha (float): parameter to mitigate class imbalance. Default: 0.5 gamma (float): parameter to mitigate easy/hard loss imbalance. Default: 0 norm_type (str): current support "fg", "none": "fg": loss will be normalized by number of fore-ground samples "none": not norm Returns: the calculated focal loss. """ class_range = F.arange(1, logits.shape[2] + 1) labels = F.add_axis(labels, axis=2) scores = F.sigmoid(logits) pos_part = (1 - scores)**gamma * layers.logsigmoid(logits) neg_part = scores**gamma * layers.logsigmoid(-logits) pos_loss = -(labels == class_range) * pos_part * alpha neg_loss = (-(labels != class_range) * (labels != ignore_label) * neg_part * (1 - alpha)) loss = (pos_loss + neg_loss).sum() if norm_type == "fg": fg_mask = (labels != background) * (labels != ignore_label) return loss / F.maximum(fg_mask.sum(), 1) elif norm_type == "none": return loss else: raise NotImplementedError
def get_output_and_grid(self, output, k, stride, dtype): grid = self.grids[k] batch_size = output.shape[0] n_ch = 5 + self.num_classes hsize, wsize = output.shape[-2:] if grid.shape[2:4] != output.shape[2:4]: yv, xv = meshgrid([F.arange(hsize), F.arange(wsize)]) grid = F.stack((xv, yv), 2).reshape(1, 1, hsize, wsize, 2).type(dtype) self.grids[k] = grid output = output.view(batch_size, self.n_anchors, n_ch, hsize, wsize) output = (output.permute(0, 1, 3, 4, 2).reshape(batch_size, self.n_anchors * hsize * wsize, -1)) grid = grid.view(1, -1, 2) output[..., :2] = (output[..., :2] + grid) * stride output[..., 2:4] = F.exp(output[..., 2:4]) * stride return output, grid
import megengine as mge # 我们习惯将 MegEngine 缩写为 mge import megengine.functional as F # 我们习惯将 functional 缩写为 F from megengine import tensor from megengine import Tensor # 1. 生成 Python List,然后转化为 MegEngine Tensor py_list = range(5) print(mge.tensor(py_list)) # 2. 生成 Numpy ndarray,然后转化为 MegEngine Tensor np_ndarray = np.arange(5).astype("float32") print(mge.tensor(np_ndarray)) # 3. 使用 functional 模块直接生成 MegEngine Tensor mge_tensor = F.arange(5) print(mge_tensor) print(mge_tensor.dtype) print(type(mge_tensor)) new_tensor = mge_tensor.astype("float16") print(new_tensor) print(type(mge_tensor.numpy())) print(tensor([1, 2, 3])) # 实际上我们更希望使用 float32 类型的 Tensor print(Tensor([1., 2., 3.])) # 因此我们会习惯性地加上一个点表示这是浮点数 matrix_tensor = mge.tensor([[1., 2., 3.], [4., 5., 6.]]) print(matrix_tensor.shape)
def forward(self, features, label=None, mask=None): """ if label and mask both None, the loss will degenerate to SimSLR unsupervised loss. Reference: "A Simple Framework for Contrastive Learning of Visual Representations"<https://arxiv.org/pdf/2002.05709.pdf> "Supervised Contrastive Learning"<https://arxiv.org/abs/2004.11362> Args: features(tensor): The embedding feature. shape=[bs, n_views, ...] label(tensor): The label of images, shape=[bs] mask(tensor): contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j has the same class as sample i. Can be asymmetric. return: loss """ if len(features.shape) < 3: raise ValueError("Features need have 3 dimensions at least") bs, num_view = features.shape[:2] #if dimension > 3, change the shape of the features to [bs, num_view, ...] if len(features.shape) > 3: features = features.reshape(bs, num_view, -1) #label and mask cannot provided at the same time if (label is not None) and (mask is not None): raise ValueError("label and mask cannot provided at the same time") elif (label is None) and (mask is None): mask = F.eye(bs, dtype="float32") elif label is not None: label = label.reshape(-1, 1) if label.shape[0] != bs: raise RuntimeError( "Num of labels does not match num of features") mask = F.equal(label, label.T) else: mask = mask.astype("float32") contrast_count = features.shape[1] features = F.split(features, features.shape[1], axis=1) contrast_feature = F.squeeze(F.concat(features, axis=0), axis=1) if self.contrast_mode == "one": anchor_feature = features[:, 0] anchor_count = 1 elif self.contrast_mode == "all": anchor_feature = contrast_feature anchor_count = contrast_count else: raise ValueError("Unknown mode:{}".format(self.contrast_mode)) #compute logits anchor_dot_contrast = F.div( F.matmul(anchor_feature, contrast_feature.T), self.temperate) #for numerical stability logits_max = F.max(anchor_dot_contrast, axis=-1, keepdims=True) logits = anchor_dot_contrast - logits_max #tile mask an1, con = mask.shape[:2] nums = anchor_count * contrast_count # mask-out self-contrast cases mask = F.stack([mask] * nums).reshape(an1 * anchor_count, con * contrast_count) logits_mask = F.scatter( F.ones_like(mask), 1, F.arange(0, int(bs * anchor_count), dtype="int32").reshape(-1, 1), F.zeros(int(bs * anchor_count), dtype="int32").reshape(-1, 1)) mask = mask * logits_mask #compute log_prob exp_logits = F.exp(logits) * logits_mask log_prob = logits - F.log(F.sum(exp_logits, axis=1, keepdims=True)) #equation 2 #mean mean_log_prob_pos = F.sum(mask * log_prob, axis=1) / F.sum(mask, axis=1) #loss loss = -(self.temperate / self.base_temperate) * mean_log_prob_pos loss = F.mean(loss.reshape(anchor_count, bs)) return loss