def forward(self, It, S, D, pre_S, pre_D, pre_S_hat=None, pre_D_hat=None, pre_SD=None): B, _, H, W = It.shape if pre_S_hat is None: assert pre_D_hat is None and pre_SD is None pre_S_hat = megengine.tensor( np.zeros((B, self.hidden_channels, H, W), dtype=np.float32)) pre_D_hat = F.zeros_like(pre_S_hat) pre_SD = F.zeros_like(pre_S_hat) # pre_SD = self.hsa(It, pre_SD) # auto select S = F.concat([pre_S, S, pre_S_hat, pre_SD], axis=1) S = self.pre_SD_S(S) D = F.concat([pre_D, D, pre_D_hat, pre_SD], axis=1) D = self.pre_SD_D(D) for i in range(self.blocknums): S, D = self.SDBlocks[i](S, D) pre_SD = self.conv_SD(S + D) S = self.convS(S) D = self.convD(D) I = self.convHR(F.concat([S, D], axis=1)) return self.trans_HR(I), pre_SD, S, D, self.trans_S(S), self.trans_D(D)
def train_generator_batch(image, label, *, opt, netG, netloss): netG.train() B, T, _, H, W = image.shape # image image_S = image.reshape((B * T, -1, H, W)) image_S = F.interpolate(image_S, scale_factor=[0.25, 0.25]) image_S = F.interpolate(image_S, size=[H, W]) image_S = image_S.reshape((B, T, -1, H, W)) image_D = image - image_S # label label_S = label.reshape((B * T, -1, 4 * H, 4 * W)) label_S = F.interpolate(label_S, scale_factor=[0.25, 0.25]) label_S = F.interpolate(label_S, size=[4 * H, 4 * W]) label_S = label_S.reshape((B, T, -1, 4 * H, 4 * W)) label_D = label - label_S HR_G = [] HR_D = [] HR_S = [] pre_S_hat = mge.tensor( np.zeros((B, hidden_channels, H, W), dtype=np.float32)) pre_D_hat = F.zeros_like(pre_S_hat) pre_SD = F.zeros_like(pre_S_hat) imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG( image[:, 0, ...], image_S[:, 0, ...], image_D[:, 0, ...], image_S[:, 1, ...], image_D[:, 1, ...], pre_S_hat, pre_D_hat, pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) HR_D.append(F.add_axis(img_D, axis=1)) HR_S.append(F.add_axis(img_S, axis=1)) for t in range(1, T): imgHR, pre_SD, pre_S_hat, pre_D_hat, img_S, img_D = netG( image[:, t, ...], image_S[:, t, ...], image_D[:, t, ...], image_S[:, t - 1, ...], image_D[:, t - 1, ...], pre_S_hat, pre_D_hat, pre_SD) HR_G.append(F.add_axis(imgHR, axis=1)) HR_D.append(F.add_axis(img_S, axis=1)) HR_S.append(F.add_axis(img_D, axis=1)) HR_G = F.concat(HR_G, axis=1) HR_D = F.concat(HR_D, axis=1) HR_S = F.concat(HR_S, axis=1) # assert HR_G.shape == HR_D.shape and HR_D.shape == HR_S.shape # [B,T,C,H,W] loss = netloss(HR_G, HR_D, HR_S, label, label_D, label_S) opt.backward(loss) if dist.is_distributed(): # do all reduce mean pass return loss
def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): box_corner = F.zeros_like(prediction) box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 prediction[:, :, :4] = box_corner[:, :, :4] output = [None for _ in range(len(prediction))] for i, image_pred in enumerate(prediction): # If none are remaining => process next image if not image_pred.shape[0]: continue # Get score and class with highest confidence class_conf = F.max(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) class_pred = F.argmax(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) class_conf_squeeze = F.squeeze(class_conf) conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1) detections = detections[conf_mask] if not detections.shape[0]: continue nms_out_index = F.vision.nms( detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre, ) detections = detections[nms_out_index] if output[i] is None: output[i] = detections else: output[i] = F.concat((output[i], detections)) return output
def jvp(inp, expr): with GradManager() as gm: with GradManager().attach([inp]) as gm2: oup = expr(inp) oup_grad = F.zeros_like(oup) gm.attach(oup_grad) gm2.backward(oup, oup_grad) gm.backward(inp.grad) return oup, oup_grad.grad
def forward(self, input_ids, token_type_ids=None): seq_length = input_ids.shape[1] if token_type_ids is None: token_type_ids = F.zeros_like(input_ids) position_ids = F.linspace(0, seq_length - 1, seq_length).astype(np.int32) position_ids = F.broadcast_to(F.expand_dims(position_ids, 0), input_ids.shape) words_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = words_embeddings + position_embeddings + token_type_embeddings embeddings = self.LayerNorm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward( self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True, ): if attention_mask is None: attention_mask = F.ones_like(input_ids) if token_type_ids is None: token_type_ids = F.zeros_like(input_ids) # print('input_ids', input_ids.sum()) # We create a 3D attention mask from a 2D tensor mask. # Sizes are [batch_size, 1, 1, to_seq_length] # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] # this attention mask is more simple than the triangular masking of causal attention # used in OpenAI GPT, we just need to prepare the broadcast dimension here. # print('attention_mask', attention_mask.sum()) extended_attention_mask = F.expand_dims(attention_mask, (1, 2)) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and -10000.0 for masked positions. # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. extended_attention_mask = extended_attention_mask.astype( next(self.parameters()).dtype ) # fp16 compatibility extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 embedding_output = self.embeddings(input_ids, token_type_ids) encoded_layers = self.encoder( embedding_output, extended_attention_mask, output_all_encoded_layers=output_all_encoded_layers, ) sequence_output = encoded_layers[-1] pooled_output = self.pooler(sequence_output) if not output_all_encoded_layers: encoded_layers = encoded_layers[-1] return encoded_layers, pooled_output
def sample_labels(labels, num_samples, label_value, ignore_label=-1): """sample N labels with label value = sample_labels Args: labels(Tensor): shape of label is (N,) num_samples(int): label_value(int): Returns: label(Tensor): label after sampling """ assert labels.ndim == 1, "Only tensor of dim 1 is supported." mask = (labels == label_value) num_valid = mask.sum() if num_valid <= num_samples: return labels random_tensor = F.zeros_like(labels).astype("float32") random_tensor[mask] = uniform(size=num_valid) _, invalid_inds = F.topk(random_tensor, k=num_samples - num_valid) labels[invalid_inds] = ignore_label return labels
def forward(self, now_LR, pre_h_SD): """ now_LR: B,3,H,W pre_h_SD: B,64,H,W """ pad = self.K // 2 batch, C, H, W = pre_h_SD.shape kernels = self.conv(now_LR) # [B, k*k, H, W] # 对 pre_h_SD进行padding similarity_matrix = F.zeros_like(pre_h_SD) pre_h_SD = add_H_W_Padding(pre_h_SD, margin=pad) for i in range(self.K): for j in range(self.K): # 做点乘 kernel = kernels[:, i * self.K + j, :, :] # [B, H, W] kernel = F.add_axis(kernel, axis=1) # [B, 1 ,H, W] kernel = F.broadcast_to(kernel, [batch, C, H, W]) corr = kernel * pre_h_SD[:, :, i:(H + i), j:(W + j)] similarity_matrix = similarity_matrix + corr # [B, C, H, W] similarity_matrix = F.sigmoid(similarity_matrix) return F.multiply(pre_h_SD[:, :, pad:(H + pad), pad:(W + pad)], similarity_matrix)
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets, box_ctrness = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] box_ctrness_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 1) for _ in box_ctrness ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_box_ctrness = F.concat(box_ctrness_list, axis=1) if self.training: gt_labels, gt_offsets, gt_ctrness = self.get_ground_truth( anchors_list, gt_boxes, im_info[:, 4].astype(np.int32), ) all_level_box_logits = all_level_box_logits.reshape( -1, self.cfg.num_classes) all_level_box_offsets = all_level_box_offsets.reshape(-1, 4) all_level_box_ctrness = all_level_box_ctrness.flatten() gt_labels = gt_labels.flatten() gt_offsets = gt_offsets.reshape(-1, 4) gt_ctrness = gt_ctrness.flatten() valid_mask = gt_labels >= 0 fg_mask = gt_labels > 0 num_fg = fg_mask.sum() sum_ctr = gt_ctrness[fg_mask].sum() # add detach() to avoid syncing across ranks in backward num_fg = layers.all_reduce_mean(num_fg).detach() sum_ctr = layers.all_reduce_mean(sum_ctr).detach() gt_targets = F.zeros_like(all_level_box_logits) gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1 loss_cls = layers.sigmoid_focal_loss( all_level_box_logits[valid_mask], gt_targets[valid_mask], alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ).sum() / F.maximum(num_fg, 1) loss_bbox = (layers.iou_loss( all_level_box_offsets[fg_mask], gt_offsets[fg_mask], box_mode="ltrb", loss_type=self.cfg.iou_loss_type, ) * gt_ctrness[fg_mask]).sum() / F.maximum( sum_ctr, 1e-5) * self.cfg.loss_bbox_weight loss_ctr = layers.binary_cross_entropy( all_level_box_ctrness[fg_mask], gt_ctrness[fg_mask], ).sum() / F.maximum(num_fg, 1) total = loss_cls + loss_bbox + loss_ctr loss_dict = { "total_loss": total, "loss_cls": loss_cls, "loss_bbox": loss_bbox, "loss_ctr": loss_ctr, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 all_level_anchors = F.concat(anchors_list, axis=0) pred_boxes = self.point_coder.decode(all_level_anchors, all_level_box_offsets[0]) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes(pred_boxes, im_info[0, 2:4]).reshape( -1, 4) pred_score = F.sqrt( F.sigmoid(all_level_box_logits) * F.sigmoid(all_level_box_ctrness))[0] return pred_score, clipped_boxes
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: gt_labels, gt_offsets = self.get_ground_truth( all_level_anchors, gt_boxes, im_info[:, 4].astype(np.int32), ) all_level_box_logits = all_level_box_logits.reshape( -1, self.cfg.num_classes) all_level_box_offsets = all_level_box_offsets.reshape(-1, 4) gt_labels = gt_labels.flatten() gt_offsets = gt_offsets.reshape(-1, 4) valid_mask = gt_labels >= 0 fg_mask = gt_labels > 0 num_fg = fg_mask.sum() gt_targets = F.zeros_like(all_level_box_logits) gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1 loss_cls = layers.sigmoid_focal_loss( all_level_box_logits[valid_mask], gt_targets[valid_mask], alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ).sum() / F.maximum(num_fg, 1) loss_bbox = layers.smooth_l1_loss( all_level_box_offsets[fg_mask], gt_offsets[fg_mask], beta=self.cfg.smooth_l1_beta, ).sum() / F.maximum(num_fg, 1) * self.cfg.loss_bbox_weight total = loss_cls + loss_bbox loss_dict = { "total_loss": total, "loss_cls": loss_cls, "loss_bbox": loss_bbox, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 pred_boxes = self.box_coder.decode(all_level_anchors, all_level_box_offsets[0]) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes(pred_boxes, im_info[0, 2:4]).reshape( -1, 4) pred_score = F.sigmoid(all_level_box_logits)[0] return pred_score, clipped_boxes