def get_iou(boxes1: Tensor, boxes2: Tensor, return_ignore=False) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ box = boxes1 gt = boxes2 target_shape = (boxes1.shapeof(0), boxes2.shapeof(0), 4) b_box = F.add_axis(boxes1, 1).broadcast(*target_shape) b_gt = F.add_axis(boxes2[:, :4], 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0] ) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1] ) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) area_gt = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1]) area_target_shape = (box.shapeof(0), gt.shapeof(0)) b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) union = b_area_box + b_area_gt - inter overlaps = F.maximum(inter / union, 0) if return_ignore: overlaps_ignore = F.maximum(inter / b_area_box, 0) gt_ignore_mask = F.add_axis((gt[:, 4] == -1), 0).broadcast(*area_target_shape) overlaps *= (1 - gt_ignore_mask) overlaps_ignore *= gt_ignore_mask return overlaps, overlaps_ignore return overlaps
def get_cls_reg_ctr_targets(self, points, gt_bboxes, bbox_scale=0.15): """ Compute regression, classification targets for points in multiple images. Args: points (Tensor): (1, 2, 37, 37). 每个点在原图上对应的点的位置 gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. 左上角右下角 原图上的bbox框 Returns: cls_labels (Tensor): Labels. (B, 1, 37, 37) 0 or 1, 0 means background, 1 means in the box. bbox_targets (Tensor): BBox targets. (B, 4, 37, 37) only consider the foreground, for the background should set loss as 0! centerness_targets (Tensor): (B, 1, 37, 37) only consider the foreground, for the background should set loss as 0! """ B, _ = gt_bboxes.shape gt_bboxes = F.add_axis(gt_bboxes, axis=-1) gt_bboxes = F.add_axis(gt_bboxes, axis=-1) # (B,4,1,1) # cls_labels # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/4 gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2 #求出bbox的边长 up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap cls_labels = up_bound * left_bound * down_bound * right_bound cls_labels = F.add_axis(cls_labels, axis=1) # (B, 1, 37, 37) cls_labels.requires_grad = False # bbox_targets # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数) up_left = points - gt_bboxes[:, 0:2, ...] # (B, 2, 37, 37) score map每个点和左上角点的差 bottom_right = gt_bboxes[:, 2:4, ...] - points bbox_targets = F.concat([up_left, bottom_right], axis=1) # (B, 4, 37, 37) bbox_targets.requires_grad = False # centerness_targets up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum( up_left[:, 0, ...], bottom_right[:, 0, ...]) left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum( up_left[:, 1, ...], bottom_right[:, 1, ...]) centerness_targets = F.sqrt(F.abs(up_bottom * left_right)) centerness_targets = F.add_axis(centerness_targets, axis=1) # (B,1,37,37) centerness_targets.requires_grad = False return cls_labels, bbox_targets, centerness_targets
def get_center_offsets(self, featmap, stride): f_shp = featmap.shape fm_height, fm_width = f_shp[-2], f_shp[-1] shift_x = F.linspace(0, fm_width - 1, fm_width) * stride shift_y = F.linspace(0, fm_height - 1, fm_height) * stride # make the mesh grid of shift_x and shift_y mesh_shape = (fm_height, fm_width) broad_shift_x = shift_x.reshape(-1, shift_x.shape[0]).broadcast(*mesh_shape) broad_shift_y = shift_y.reshape(shift_y.shape[0], -1).broadcast(*mesh_shape) flatten_shift_x = F.add_axis(broad_shift_x.reshape(-1), 1) flatten_shift_y = F.add_axis(broad_shift_y.reshape(-1), 1) shifts = F.concat( [flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y,], axis=1) return shifts
def get_focal_loss( logits: Tensor, labels: Tensor, ignore_label: int = -1, background: int = 0, alpha: float = 0.5, gamma: float = 0, norm_type: str = "fg", ) -> Tensor: r"""Focal Loss for Dense Object Detection: <https://arxiv.org/pdf/1708.02002.pdf> .. math:: FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t) Args: logits (Tensor): the predicted logits with the shape of :math:`(B, A, C)` labels (Tensor): the assigned labels of boxes with shape of :math:`(B, A)` ignore_label (int): the value of ignore class. Default: -1 background (int): the value of background class. Default: 0 alpha (float): parameter to mitigate class imbalance. Default: 0.5 gamma (float): parameter to mitigate easy/hard loss imbalance. Default: 0 norm_type (str): current support "fg", "none": "fg": loss will be normalized by number of fore-ground samples "none": not norm Returns: the calculated focal loss. """ class_range = F.arange(1, logits.shape[2] + 1) labels = F.add_axis(labels, axis=2) scores = F.sigmoid(logits) pos_part = (1 - scores)**gamma * layers.logsigmoid(logits) neg_part = scores**gamma * layers.logsigmoid(-logits) pos_loss = -(labels == class_range) * pos_part * alpha neg_loss = (-(labels != class_range) * (labels != ignore_label) * neg_part * (1 - alpha)) loss = (pos_loss + neg_loss).sum() if norm_type == "fg": fg_mask = (labels != background) * (labels != ignore_label) return loss / F.maximum(fg_mask.sum(), 1) elif norm_type == "none": return loss else: raise NotImplementedError
def box_overlap_opr(boxes1: Tensor, boxes2: Tensor) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ box = boxes1 gt = boxes2 target_shape = (boxes1.shape[0], boxes2.shapeof()[0], 4) b_box = F.add_axis(boxes1, 1).broadcast(*target_shape) b_gt = F.add_axis(boxes2, 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0]) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1]) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) area_gt = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1]) area_target_shape = (box.shape[0], gt.shapeof()[0]) b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) union = b_area_box + b_area_gt - inter overlaps = F.maximum(inter / union, 0) return overlaps
def get_cls_reg_ctr_targets(points, gt_bboxes, bbox_scale=0.25): """ Compute regression, classification targets for points in multiple images. Args: points (Tensor): (1, 2, 19, 19). gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. Returns: cls_labels (Tensor): Labels. (B, 1, 19, 19) 0 or 1, 0 means background, 1 means in the box. bbox_targets (Tensor): BBox targets. (B, 4, 19, 19) only consider the foreground, for the background should set loss as 0! centerness_targets (Tensor): (B, 1, 19, 19) only consider the foreground, for the background should set loss as 0! """ gt_bboxes = F.add_axis(gt_bboxes, axis=-1) gt_bboxes = F.add_axis(gt_bboxes, axis=-1) # (B,4,1,1) # cls_labels # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/2 gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2 up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap cls_labels = up_bound * left_bound * down_bound * right_bound cls_labels = F.add_axis(cls_labels, axis=1) # (B,1,19,19) # bbox_targets # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数) up_left = points - gt_bboxes[:, 0:2, ...] # (B, 2, 19, 19) bottom_right = gt_bboxes[:, 2:4, ...] - points bbox_targets = F.concat([up_left, bottom_right], axis=1) # (B, 4, 19, 19) # centerness_targets up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum( up_left[:, 0, ...], bottom_right[:, 0, ...]) left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum( up_left[:, 1, ...], bottom_right[:, 1, ...]) centerness_targets = F.sqrt(F.abs(up_bottom * left_right)) return cls_labels, bbox_targets, centerness_targets
def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None): rcnn_rois, labels, bbox_targets = self.get_ground_truth( rcnn_rois, im_info, gt_boxes) fpn_fms = [fpn_fms[x] for x in self.in_features] pool_features = layers.roi_pool( fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method, ) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for classification loss_rcnn_cls = layers.softmax_loss(pred_cls, labels) # loss for regression pred_delta = pred_delta.reshape(-1, self.cfg.num_classes + 1, 4) vlabels = labels.reshape(-1, 1).broadcast((labels.shapeof(0), 4)) pred_delta = F.indexing_one_hot(pred_delta, vlabels, axis=1) loss_rcnn_loc = layers.get_smooth_l1_loss( pred_delta, bbox_targets, labels, self.cfg.rcnn_smooth_l1_beta, norm_type="all", ) loss_dict = { 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_loc': loss_rcnn_loc } return loss_dict else: # slice 1 for removing background pred_scores = F.softmax(pred_cls, axis=1)[:, 1:] pred_delta = pred_delta[:, 4:].reshape(-1, 4) target_shape = (rcnn_rois.shapeof(0), self.cfg.num_classes, 4) # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4) base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4) pred_bbox = self.box_coder.decode(base_rois, pred_delta) return pred_bbox, pred_scores
def test_generator_batch(optical, sar, *, netG): netG.eval() cls_score, offsets, ctr_score = netG( sar, optical) # [B,1,19,19] [B,2,19,19] [B,1,19,19] B, _, _, _ = cls_score.shape # 加权 # cls_score = cls_score * ctr_score cls_score = cls_score.reshape(B, -1) # [B,19*19] # find the max max_id = F.argmax(cls_score, axis=1) # (B, ) pred_box = get_box(netG.fm_ctr, offsets) # (B,4,H,W) pred_box = pred_box.reshape(B, 4, -1) output = [] for i in range(B): output.append(F.add_axis(pred_box[i, :, max_id[i]], axis=0)) # (1, 4) return F.concat(output, axis=0) # [B,4]
def forward(self, input_ids, token_type_ids=None): seq_length = input_ids.shape[1] if token_type_ids is None: token_type_ids = zeros_like(input_ids) position_ids = F.linspace(0, seq_length - 1, seq_length).astype(np.int32) position_ids = F.add_axis(position_ids, 0).broadcast(*input_ids.shape) words_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = words_embeddings + position_embeddings + token_type_embeddings embeddings = self.LayerNorm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for regression labels = labels.astype(np.int32).reshape(-1) # mulitple class to one pos_masks = labels > 0 pred_delta = pred_delta.reshape(-1, config.num_classes, 4) indexing_label = (labels * pos_masks).reshape(-1, 1) indexing_label = indexing_label.broadcast((labels.shapeof()[0], 4)) pred_delta = F.indexing_one_hot(pred_delta, indexing_label, 1) localization_loss = smooth_l1_loss(pred_delta, bbox_targets, config.rcnn_smooth_l1_beta) localization_loss = localization_loss * pos_masks # loss for classification valid_masks = labels >= 0 objectness_loss = softmax_loss(pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / (valid_masks.sum()) loss_rcnn_cls = objectness_loss.sum() * normalizer loss_rcnn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rcnn_cls'] = loss_rcnn_cls loss_dict['loss_rcnn_loc'] = loss_rcnn_loc return loss_dict else: pred_scores = F.softmax(pred_cls)[:, 1:].reshape(-1, 1) pred_delta = pred_delta[:, 4:].reshape(-1, 4) target_shape = (rcnn_rois.shapeof()[0], config.num_classes - 1, 4) base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4) pred_bbox = restore_bbox(base_rois, pred_delta, True) pred_bbox = F.concat([pred_bbox, pred_scores], axis=1) return pred_bbox
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_emd_pred_cls_0 = self.emd_pred_cls_0(roi_feature) pred_emd_pred_delta_0 = self.emd_pred_delta_0(roi_feature) pred_emd_pred_cls_1 = self.emd_pred_cls_1(roi_feature) pred_emd_pred_delta_1 = self.emd_pred_delta_1(roi_feature) if self.training: loss0 = emd_loss( pred_emd_pred_delta_0, pred_emd_pred_cls_0, pred_emd_pred_delta_1, pred_emd_pred_cls_1, bbox_targets, labels) loss1 = emd_loss( pred_emd_pred_delta_1, pred_emd_pred_cls_1, pred_emd_pred_delta_0, pred_emd_pred_cls_0, bbox_targets, labels) loss = F.concat([loss0, loss1], axis=1) indices = F.argmin(loss, axis=1) loss_emd = F.indexing_one_hot(loss, indices, 1) loss_emd = loss_emd.sum()/loss_emd.shapeof()[0] loss_dict = {} loss_dict['loss_rcnn_emd'] = loss_emd return loss_dict else: pred_scores_0 = F.softmax(pred_emd_pred_cls_0)[:, 1:].reshape(-1, 1) pred_scores_1 = F.softmax(pred_emd_pred_cls_1)[:, 1:].reshape(-1, 1) pred_delta_0 = pred_emd_pred_delta_0[:, 4:].reshape(-1, 4) pred_delta_1 = pred_emd_pred_delta_1[:, 4:].reshape(-1, 4) target_shape = (rcnn_rois.shapeof()[0], config.num_classes - 1, 4) base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4) pred_bbox_0 = restore_bbox(base_rois, pred_delta_0, True) pred_bbox_1 = restore_bbox(base_rois, pred_delta_1, True) pred_bbox_0 = F.concat([pred_bbox_0, pred_scores_0], axis=1) pred_bbox_1 = F.concat([pred_bbox_1, pred_scores_1], axis=1) #[{head0, pre1, tag1}, {head1, pre1, tag1}, {head0, pre1, tag2}, ...] pred_bbox = F.concat((pred_bbox_0, pred_bbox_1), axis=1).reshape(-1,5) return pred_bbox
def train_generator_batch(image, label, *, opt, netG, netloss): netG.train() B, T, _, H, W = image.shape # image image_S = image.reshape((B * T, -1, H, W)) image_S = F.interpolate(image_S, scale_factor=[0.25, 0.25]) image_S = F.interpolate(image_S, size=[H, W]) image_S = image_S.reshape((B, T, -1, H, W)) image_D = image - image_S # label label_S = label.reshape((B * T, -1, 4 * H, 4 * W)) label_S = F.interpolate(label_S, scale_factor=[0.25, 0.25]) label_S = F.interpolate(label_S, size=[4 * H, 4 * W]) label_S = label_S.reshape((B, T, -1, 4 * H, 4 * W)) label_D = label - label_S HR_G = [] HR_D = [] HR_S = [] pre_S_hat = mge.tensor( np.zeros((B, hidden_channels, H, W), dtype=np.float32)) pre_D_hat = F.zeros_like(pre_S_hat) pre_SD = F.zeros_like(pre_S_hat) imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG( image[:, 0, ...], image_S[:, 0, ...], image_D[:, 0, ...], image_S[:, 1, ...], image_D[:, 1, ...], pre_S_hat, pre_D_hat, pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) HR_D.append(F.add_axis(img_D, axis=1)) HR_S.append(F.add_axis(img_S, axis=1)) for t in range(1, T): imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG( image[:, t, ...], image_S[:, t, ...], image_D[:, t, ...], image_S[:, t - 1, ...], image_D[:, t - 1, ...], pre_S_hat, pre_D_hat, pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) HR_D.append(F.add_axis(img_S, axis=1)) HR_S.append(F.add_axis(img_D, axis=1)) HR_G = F.concat(HR_G, axis=1) HR_D = F.concat(HR_D, axis=1) HR_S = F.concat(HR_S, axis=1) # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W] loss = netloss(HR_G, HR_D, HR_S, label, label_D, label_S) opt.backward(loss) if dist.is_distributed(): # do all reduce mean pass return loss
def forward( self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True, ): if attention_mask is None: attention_mask = ones_like(input_ids) if token_type_ids is None: token_type_ids = zeros_like(input_ids) # print('input_ids', input_ids.sum()) # We create a 3D attention mask from a 2D tensor mask. # Sizes are [batch_size, 1, 1, to_seq_length] # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] # this attention mask is more simple than the triangular masking of causal attention # used in OpenAI GPT, we just need to prepare the broadcast dimension here. # print('attention_mask', attention_mask.sum()) extended_attention_mask = F.add_axis(attention_mask, (1, 2)) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and -10000.0 for masked positions. # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. extended_attention_mask = extended_attention_mask.astype( next(self.parameters()).dtype) # fp16 compatibility extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 embedding_output = self.embeddings(input_ids, token_type_ids) encoded_layers = self.encoder( embedding_output, extended_attention_mask, output_all_encoded_layers=output_all_encoded_layers, ) sequence_output = encoded_layers[-1] pooled_output = self.pooler(sequence_output) if not output_all_encoded_layers: encoded_layers = encoded_layers[-1] return encoded_layers, pooled_output
def forward(self, now_LR, pre_h_SD): """ now_LR: B,3,H,W pre_h_SD: B,48,H,W """ batch, C, H, W = pre_h_SD.shape kernels = self.conv(now_LR) # [B, k*k, H, W] batchwise_ans = [] for idx in range(batch): kernel = kernels[idx] # [k*k, H, W] kernel = F.dimshuffle(kernel, (1, 2, 0)) # [H, W , k*k] kernel = F.reshape(kernel, (H, W, 1, self.K, self.K, 1)) kernel = F.broadcast_to(kernel, (C, H, W, 1, self.K, self.K, 1)) batchwise_ans.append( F.local_conv2d( F.add_axis(pre_h_SD[idx], 0), kernel, [1, 1], [1, 1], [1, 1])) # [1, C, H, W] some bug with padding similarity_matrix = F.concat(batchwise_ans, axis=0) # [B,C,H,W] del batchwise_ans similarity_matrix = F.sigmoid(similarity_matrix) return F.multiply(pre_h_SD, similarity_matrix)
def forward(self, now_LR, pre_h_SD): """ now_LR: B,3,H,W pre_h_SD: B,64,H,W """ pad = self.K // 2 batch, C, H, W = pre_h_SD.shape kernels = self.conv(now_LR) # [B, k*k, H, W] # 对 pre_h_SD进行padding similarity_matrix = F.zeros_like(pre_h_SD) pre_h_SD = add_H_W_Padding(pre_h_SD, margin=pad) for i in range(self.K): for j in range(self.K): # 做点乘 kernel = kernels[:, i * self.K + j, :, :] # [B, H, W] kernel = F.add_axis(kernel, axis=1) # [B, 1 ,H, W] kernel = F.broadcast_to(kernel, [batch, C, H, W]) corr = kernel * pre_h_SD[:, :, i:(H + i), j:(W + j)] similarity_matrix = similarity_matrix + corr # [B, C, H, W] similarity_matrix = F.sigmoid(similarity_matrix) return F.multiply(pre_h_SD[:, :, pad:(H + pad), pad:(W + pad)], similarity_matrix)
def forward(self, x): if self.transform_input: x_ch0 = F.add_axis(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x_ch1 = F.add_axis(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x_ch2 = F.add_axis(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 x = F.concat([x_ch0, x_ch1, x_ch2], 1) # N x 3 x 224 x 224 x = self.conv1(x) # N x 64 x 112 x 112 x = self.maxpool1(x) # N x 64 x 56 x 56 x = self.conv2(x) # N x 64 x 56 x 56 x = self.conv3(x) # N x 192 x 56 x 56 x = self.maxpool2(x) # N x 192 x 28 x 28 x = self.inception3a(x) # N x 256 x 28 x 28 x = self.inception3b(x) # N x 480 x 28 x 28 x = self.maxpool3(x) # N x 480 x 14 x 14 x = self.inception4a(x) # N x 512 x 14 x 14 if self.training and self.aux_logits: aux1 = self.aux1(x) x = self.inception4b(x) # N x 512 x 14 x 14 x = self.inception4c(x) # N x 512 x 14 x 14 x = self.inception4d(x) # N x 528 x 14 x 14 if self.training and self.aux_logits: aux2 = self.aux2(x) x = self.inception4e(x) # N x 832 x 14 x 14 print(x.shape) x = self.maxpool4(x) # N x 832 x 7 x 7 x = self.inception5a(x) # N x 832 x 7 x 7 x = self.inception5b(x) # N x 1024 x 7 x 7 print(x.shape) x = self.avgpool(x) # N x 1024 x 1 x 1 x = x.reshape(x.shape[0], -1) # N x 1024 x = self.dropout(x) x = self.fc(x) # N x 1000 (num_classes) if self.training and self.aux_logits: return _GoogLeNetOuputs(x, aux2, aux1) return x
def train_generator_batch(image, label, *, opt, netG, netloss): netG.train() B, T, _, H, W = image.shape HR_G = [] # first frame pre_SD = mge.tensor(np.zeros((B, hidden_channels, H, W), dtype=np.float32)) LR = F.concat([ F.add_axis(image[:, 2, ...], axis=1), F.add_axis(image[:, 1, ...], axis=1), image[:, 0:3, ...] ], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # first frame result HR_G.append(F.add_axis(imgHR, axis=1)) # second frame LR = F.concat([F.add_axis(image[:, 1, ...], axis=1), image[:, 0:4, ...]], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # second frame result HR_G.append(F.add_axis(imgHR, axis=1)) for t in range(2, T - 2): imgHR, pre_SD = netG(image[:, t - 2:t + 3, ...], pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) # T-2 frame LR = F.concat( [image[:, T - 4:T, ...], F.add_axis(image[:, -2, ...], axis=1)], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # T-2 frame result HR_G.append(F.add_axis(imgHR, axis=1)) # T-1 frame LR = F.concat([ image[:, T - 3:T, ...], F.add_axis(image[:, -2, ...], axis=1), F.add_axis(image[:, -3, ...], axis=1) ], axis=1) imgHR, pre_SD = netG(LR, pre_SD) # T-1 frame result HR_G.append(F.add_axis(imgHR, axis=1)) HR_G = F.concat(HR_G, axis=1) # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W] loss = netloss(HR_G, label) opt.backward(loss) if dist.is_distributed(): # do all reduce mean pass return loss
def cascade_roi_target(rpn_rois, im_info, gt_boxes, pos_threshold=0.5, top_k=1): return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(config.batch_per_gpu): gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5], :] batch_inds = mge.ones((gt_boxes_perimg.shapeof()[0], 1)) * bid #if config.proposal_append_gt: gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid batch_roi_inds = mask_to_inds(batch_roi_mask) all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0) overlaps_normal, overlaps_ignore = box_overlap_ignore_opr( all_rois[:, 1:5], gt_boxes_perimg) overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True) overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True) # gt max and indices, ignore max and indices max_overlaps_normal = overlaps_normal[:, :top_k].reshape(-1) gt_assignment_normal = overlaps_normal_indices[:, :top_k].reshape(-1) max_overlaps_ignore = overlaps_ignore[:, :top_k].reshape(-1) gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].reshape(-1) # cons masks ignore_assign_mask = (max_overlaps_normal < config.fg_threshold) * ( max_overlaps_ignore > max_overlaps_normal) max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask) + \ max_overlaps_ignore * ignore_assign_mask gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \ gt_assignment_ignore * ignore_assign_mask gt_assignment = gt_assignment.astype(np.int32) labels = gt_boxes_perimg.ai[gt_assignment, 4] fg_mask = (max_overlaps >= config.fg_threshold) * (1 - F.equal(labels, config.ignore_label)) bg_mask = (max_overlaps < config.bg_threshold_high) * ( max_overlaps >= config.bg_threshold_low) fg_mask = fg_mask.reshape(-1, top_k) bg_mask = bg_mask.reshape(-1, top_k) #pos_max = config.num_rois * config.fg_ratio #fg_inds_mask = _bernoulli_sample_masks(fg_mask[:, 0], pos_max, 1) #neg_max = config.num_rois - fg_inds_mask.sum() #bg_inds_mask = _bernoulli_sample_masks(bg_mask[:, 0], neg_max, 1) labels = labels * fg_mask.reshape(-1) #keep_mask = fg_inds_mask + bg_inds_mask #keep_inds = mask_to_inds(keep_mask) #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])] # labels labels = labels.reshape(-1, top_k) gt_assignment = gt_assignment.reshape(-1, top_k).reshape(-1) target_boxes = gt_boxes_perimg.ai[gt_assignment, :4] #rois = all_rois.ai[keep_inds] target_shape = (all_rois.shapeof()[0], top_k, all_rois.shapeof()[-1]) target_rois = F.add_axis(all_rois, 1).broadcast(target_shape).reshape(-1, all_rois.shapeof()[-1]) bbox_targets = bbox_transform_opr(target_rois[:, 1:5], target_boxes) if config.rcnn_bbox_normalize_targets: std_opr = mge.tensor(config.bbox_normalize_stds[None, :]) mean_opr = mge.tensor(config.bbox_normalize_means[None, :]) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr bbox_targets = bbox_targets.reshape(-1, top_k * 4) return_rois.append(all_rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) if config.batch_per_gpu == 1: return F.zero_grad(all_rois), F.zero_grad(labels), F.zero_grad(bbox_targets) else: return_rois = F.concat(return_rois, axis=0) return_labels = F.concat(return_labels, axis=0) return_bbox_targets = F.concat(return_bbox_targets, axis=0) return F.zero_grad(return_rois), F.zero_grad(return_labels), F.zero_grad(return_bbox_targets)