def layernorm(x): original_shape = x.shape x = x.reshape(original_shape[0], -1) m = F.mean(x, axis=1, keepdims=True) v = F.mean((x - m)**2, axis=1, keepdims=True) x = (x - m) / F.maximum(F.sqrt(v), 1e-6) x = x.reshape(original_shape) return x
def smooth_grad_1st(flo, image, alpha): img_dx, img_dy = gradient(image) weights_x = F.exp(-F.mean(F.abs(img_dx), 1, keepdims=True) * alpha) weights_y = F.exp(-F.mean(F.abs(img_dy), 1, keepdims=True) * alpha) dx, dy = gradient(flo) loss_x = weights_x * F.abs(dx) / 2.0 loss_y = weights_y * F.abs(dy) / 2.0 return F.mean(loss_x) / 2.0 + F.mean(loss_y) / 2.0
def forward(self, x): x1 = self.conv1(x) # [B, C, H, W] w = F.mean(x1, axis=-1, keepdims=False) # [B,C,H] w = F.mean(w, axis=-1, keepdims=False) # [B,C] w = self.linear(w) w = F.add_axis(w, axis=-1) w = F.add_axis(w, axis=-1) # [B,C,1,1] x1 = F.concat((x1, F.multiply(x1, w)), axis=1) # [B, 2C, H, W] del w x1 = self.conv2(x1) # [B, C, H, W] return self.lrelu(x + x1)
def mean(inputs: meg.Tensor, axis: Iterable[int], keepdims=False) -> meg.Tensor: inp = inputs if keepdims: for ax in axis: inp = F.mean(inp, ax, keepdims=keepdims) else: axis = sorted(axis) for i, ax in enumerate(axis): inp = F.mean(inp, ax - i, keepdims=keepdims) return inp
def forward(self, x): output = x.reshape(x.shape[0], self.num_groups, -1) mean = F.mean(output, axis=2, keepdims=True) mean2 = F.mean(output**2, axis=2, keepdims=True) var = mean2 - mean * mean output = (output - mean) / F.sqrt(var + self.eps) output = output.reshape(x.shape) if self.affine: output = self.weight.reshape(1, -1, 1, 1) * output + \ self.bias.reshape(1, -1, 1, 1) return output
def forward(self, x): conv1 = self.conv1(x) conv31 = self.conv2(x) conv32 = self.conv3(x) conv33 = self.conv4(x) gp = F.mean(x, 2, True) gp = F.mean(gp, 3, True) gp = self.convgp(gp) gp = F.interpolate(gp, (x.shapeof(2), x.shapeof(3))) out = F.concat([conv1, conv31, conv32, conv33, gp], axis=1) out = self.convout(out) return out
def _L1(diff, occ_mask=None, if_mask_=False): loss_diff = F.abs(diff) if not if_mask_: photo_loss = F.mean(loss_diff) else: photo_loss = F.sum(loss_diff * occ_mask) / (F.sum(occ_mask) + 1e-6) return photo_loss
def _charbonnier(diff, occ_mask=None, if_mask_=False): loss_diff = F.pow((diff**2 + 1e-6), 0.4) if not if_mask_: photo_loss = F.mean(loss_diff) else: photo_loss = F.sum(loss_diff * occ_mask) / (F.sum(occ_mask) + 1e-6) return photo_loss
def _abs_robust(diff, occ_mask=None, if_mask_=False): loss_diff = F.pow((F.abs(diff) + 0.01), 0.4) if not if_mask_: photo_loss = F.mean(loss_diff) else: photo_loss = F.sum(loss_diff * occ_mask) / (F.sum(occ_mask) + 1e-6) return photo_loss
def forward(self, a): if self.mode == "sum": return F.sum(a, axis=2) elif self.mode == "mean": return F.mean(a, axis=2) else: return F.max(a, axis=2)
def compute_cost_volume(features1, features2, max_displacement): """Compute the cost volume between features1 and features2. Displace features2 up to max_displacement in any direction and compute the per pixel cost of features1 and the displaced features2. Args: features1: tensor of shape [b, c, h, w] features2: tensor of shape [b, c, h, w] max_displacement: int, maximum displacement for cost volume computation. Returns: tensor of shape [b, (2 * max_displacement + 1) ** 2, h, w] of costs for all displacements. """ # Set maximum displacement and compute the number of image shifts. _, _, height, width = features1.shape # if max_displacement <= 0 or max_displacement >= height: # raise ValueError(f'Max displacement of {max_displacement} is too large.') max_disp = max_displacement num_shifts = 2 * max_disp + 1 # Pad features2 and shift it while keeping features1 fixed to compute the cost volume through correlation. # Pad features2 such that shifts do not go out of bounds. features2_padded = add_H_W_Padding(features2, margin=max_disp) cost_list = [] for i in range(num_shifts): for j in range(num_shifts): corr = F.mean( features1 * features2_padded[:, :, i:(height + i), j:(width + j)], axis=1, keepdims=True) # [B, 1, H, W] cost_list.append(corr) cost_volume = F.concat(cost_list, axis=1) return cost_volume, features2_padded
def forward(self, x): identity = x n, c, h, w = x.shape x_h = F.mean(x, axis=3, keepdims=True) # [B,C,H,1] x_w = F.mean(x, axis=2, keepdims=True).transpose(0, 1, 3, 2) # [B,C,W,1] y = F.concat([x_h, x_w], axis=2) # [B,C,H+W,1] y = self.conv1(y) # y = self.bn1(y) y = self.act(y) # [B, mip, H+W, 1] x_h = y[:, :, :h, :] # [B,mip,H,1] x_w = y[:, :, h:, :] x_w = x_w.transpose(0, 1, 3, 2) # [B,mip,1,W] a_h = F.sigmoid(self.conv_h(x_h)) a_w = F.sigmoid(self.conv_w(x_w)) out = identity * a_w * a_h return out
def forward(self, x): w = F.mean(x, axis=3, keepdims=True) w = self.conv1(w) w = self.activ(w) w = self.conv2(w) w = self.sigmoid(w) x = x * w return x
def calc(self, X, Y, mask=None): diff = X - Y error = F.sqrt(diff * diff + self.eps) if mask is not None: error = error * mask if self.reduction == "mean": loss = F.mean(error) else: loss = F.sum(error) return loss
def flow_error_avg(pred_flow, gt_flow): _, _, H, W = gt_flow.shape _, _, h, w = pred_flow.shape assert (H == h) and (W == w), "inps shape is not the same: {} - {}".format( (H, W), (h, w)) diff = euclidean(pred_flow - gt_flow) diff_s = F.mean(diff) error = diff_s return error
def calculate_psnr(im1, im2, border=0): if not im1.shape == im2.shape: raise ValueError('Input images must have the same dimensions.') h, w = im1.shape[:2] im1 = im1[border:h - border, border:w - border] im2 = im2[border:h - border, border:w - border] mse = F.mean((im1 - im2)**2) if mse == 0: return float('inf') return 10 * F.log(1.0 / mse) / F.log(10.)
def forward(self, x): conv1 = self.conv1(x) conv31 = self.conv2(x) conv32 = self.conv3(x) conv33 = self.conv4(x) gp = F.mean(x, [2, 3], True) gp = self.conv_gp(gp) gp = F.nn.interpolate(gp, x.shape[2:]) out = F.concat([conv1, conv31, conv32, conv33, gp], axis=1) out = self.conv_out(out) return out
def forward(self, x): u = F.mean(x, len(x.shape) - 1, True) s = F.mean((x - u)**2, len(x.shape) - 1, True) x = (x - u) / ((s + self.variance_epsilon)**0.5) return self.weight * x + self.bias
def _mean(inp): inp = mge.tensor(inp) return F.mean(inp).numpy()
def forward(self, X, Y): diff = X - Y error = F.sqrt(diff * diff + self.eps) loss = F.mean(error) return loss
def forward(self, features, label=None, mask=None): """ if label and mask both None, the loss will degenerate to SimSLR unsupervised loss. Reference: "A Simple Framework for Contrastive Learning of Visual Representations"<https://arxiv.org/pdf/2002.05709.pdf> "Supervised Contrastive Learning"<https://arxiv.org/abs/2004.11362> Args: features(tensor): The embedding feature. shape=[bs, n_views, ...] label(tensor): The label of images, shape=[bs] mask(tensor): contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j has the same class as sample i. Can be asymmetric. return: loss """ if len(features.shape) < 3: raise ValueError("Features need have 3 dimensions at least") bs, num_view = features.shape[:2] #if dimension > 3, change the shape of the features to [bs, num_view, ...] if len(features.shape) > 3: features = features.reshape(bs, num_view, -1) #label and mask cannot provided at the same time if (label is not None) and (mask is not None): raise ValueError("label and mask cannot provided at the same time") elif (label is None) and (mask is None): mask = F.eye(bs, dtype="float32") elif label is not None: label = label.reshape(-1, 1) if label.shape[0] != bs: raise RuntimeError( "Num of labels does not match num of features") mask = F.equal(label, label.T) else: mask = mask.astype("float32") contrast_count = features.shape[1] features = F.split(features, features.shape[1], axis=1) contrast_feature = F.squeeze(F.concat(features, axis=0), axis=1) if self.contrast_mode == "one": anchor_feature = features[:, 0] anchor_count = 1 elif self.contrast_mode == "all": anchor_feature = contrast_feature anchor_count = contrast_count else: raise ValueError("Unknown mode:{}".format(self.contrast_mode)) #compute logits anchor_dot_contrast = F.div( F.matmul(anchor_feature, contrast_feature.T), self.temperate) #for numerical stability logits_max = F.max(anchor_dot_contrast, axis=-1, keepdims=True) logits = anchor_dot_contrast - logits_max #tile mask an1, con = mask.shape[:2] nums = anchor_count * contrast_count # mask-out self-contrast cases mask = F.stack([mask] * nums).reshape(an1 * anchor_count, con * contrast_count) logits_mask = F.scatter( F.ones_like(mask), 1, F.arange(0, int(bs * anchor_count), dtype="int32").reshape(-1, 1), F.zeros(int(bs * anchor_count), dtype="int32").reshape(-1, 1)) mask = mask * logits_mask #compute log_prob exp_logits = F.exp(logits) * logits_mask log_prob = logits - F.log(F.sum(exp_logits, axis=1, keepdims=True)) #equation 2 #mean mean_log_prob_pos = F.sum(mask * log_prob, axis=1) / F.sum(mask, axis=1) #loss loss = -(self.temperate / self.base_temperate) * mean_log_prob_pos loss = F.mean(loss.reshape(anchor_count, bs)) return loss
[(64, 512, 16, 16), (1, )], True, 1000, ), ( "reduce.max", lambda x: MF.max(x, 0), lambda x: torch.max(x, 0), [(100, 100)], [(64, 512, 16, 16)], True, 1000, ), ( "reduce.mean", lambda x: MF.mean(x, 0), lambda x: torch.mean(x, 0), [(100, 100)], [(64, 512, 16, 16)], True, 1000, ), ( "reduce.mean", lambda x: MF.mean(x, 0), lambda x: torch.mean(x, 0), [(100, 100)], [(64, 512, 16, 16)], True, 1000, ),
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] ious = [] candidate_idxs = [] base = 0 for stride, anchors_i in zip(self.cfg.stride, anchors_list): ious.append( layers.get_iou( gt_boxes[:, :4], F.concat([ anchors_i - stride * self.cfg.anchor_scale / 2, anchors_i + stride * self.cfg.anchor_scale / 2, ], axis=1))) gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 distances = F.sqrt( F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2, axis=2)) _, topk_idxs = F.topk(distances, self.cfg.anchor_topk) candidate_idxs.append(base + topk_idxs) base += anchors_i.shape[0] ious = F.concat(ious, axis=1) candidate_idxs = F.concat(candidate_idxs, axis=1) candidate_ious = F.gather(ious, 1, candidate_idxs) ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) + F.std(candidate_ious, axis=1, keepdims=True)) is_foreground = F.scatter( F.zeros(ious.shape), 1, candidate_idxs, F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr) is_in_boxes = F.min(self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)), axis=2) > 0 ious[~is_foreground] = -1 ious[~is_in_boxes] = -1 match_indices = F.argmax(ious, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_max_iou == -1] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1), lower=0) * F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), lower=0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def forward(self, x): x = self.features(x) x = F.mean(x, axis=3, keepdims=True) x = x.reshape(x.shape[0], -1) x = self.output(x) return x
def _anchor_double_target(gt_boxes, im_info, all_anchors): gt_boxes, im_info = gt_boxes.detach(), im_info.detach() all_anchors = all_anchors.detach() gt_boxes = gt_boxes[:im_info[5].astype(np.int32), :] dummy = -F.ones([1, gt_boxes.shape[1]]).to(gt_boxes.device) gt_boxes = F.concat([gt_boxes, dummy], axis=0) valid_mask = 1 - (gt_boxes[:, 4] < 0).astype(np.float32) anchor_centers = _compute_center(all_anchors) gtboxes_centers = _compute_center(gt_boxes) # gtboxes_centers = gtboxes_centers * valid_mask.unsqueeze(1) gtboxes_centers = gtboxes_centers * F.expand_dims(valid_mask, axis=1) N, K = all_anchors.shape[0], gt_boxes.shape[0] an_centers = F.expand_dims(anchor_centers, axis=1) gt_centers = F.expand_dims(gtboxes_centers, axis=0) # an_centers = anchor_centers.unsqueeze(1).repeat(1, K, 1) # gt_centers = gtboxes_centers.unsqueeze(0).repeat(N, 1, 1) distance = F.abs(an_centers - gt_centers) distance = F.sqrt(F.pow(distance, 2).sum(axis=2)) start = 0 end = 5 overlaps = box_overlap_opr(all_anchors[:, :4], gt_boxes[:, :4]) overlaps *= F.expand_dims(valid_mask, axis=0) default_num = 16 ious_list = [] for l in range(start, end): _, index = F.cond_take(all_anchors[:, 4] == l, all_anchors[:, 4]) level_dist = distance[index, :].transpose(1, 0) ious = overlaps[index, :].transpose(1, 0) sorted_index = F.argsort(level_dist, descending=False) n = min(sorted_index.shape[1], default_num) ious = F.gather(ious, 1, sorted_index[:, :n]).transpose(1, 0) ious_list.append(ious) ious = F.concat(ious_list, axis=0) mean_var = F.mean(ious, axis=0) std_var = F.std(ious, 0) iou_thresh_per_gt = mean_var + std_var iou_thresh_per_gt = F.maximum(iou_thresh_per_gt, 0.2) # limits the anchor centers in the gtboxes N, K = all_anchors.shape[0], gt_boxes.shape[0] anchor_points = an_centers pos_area = _compute_pos_area(gt_boxes, 0.3) # pos_area = pos_area.unsqueeze(0).repeat(N, 1, 1) pos_area = F.broadcast_to(F.expand_dims(pos_area, axis=0), (N, K, pos_area.shape[-1])) l = anchor_points[:, :, 0] - pos_area[:, :, 0] r = pos_area[:, :, 2] - anchor_points[:, :, 0] t = anchor_points[:, :, 1] - pos_area[:, :, 1] b = pos_area[:, :, 3] - anchor_points[:, :, 1] is_in_gt = F.stack([l, r, t, b], axis=2) is_in_gt = is_in_gt.min(axis=2) > 0.1 valid_mask = (overlaps >= F.expand_dims( iou_thresh_per_gt, axis=0)) * is_in_gt.astype(np.float32) ious = overlaps * valid_mask sorted_index = F.argsort(ious, 1) sorted_overlaps = F.gather(ious, 1, sorted_index) max_overlaps = sorted_overlaps[:, :2].flatten() argmax_overlaps = sorted_index[:, :2].flatten() n, c = all_anchors.shape device = all_anchors.device labels = -F.ones(2 * n).to(device) positive_mask = (max_overlaps >= 0.2).to(device).astype(np.float32) negative_mask = (max_overlaps < 0.2).to(device).astype(np.float32) labels = positive_mask + labels * (1 - positive_mask) * (1 - negative_mask) bbox_targets = gt_boxes[argmax_overlaps, :4] all_anchors = F.broadcast_to(F.expand_dims(all_anchors, axis=1), (n, 2, c)).reshape(-1, c) bbox_targets = bbox_transform_opr(all_anchors[:, :4], bbox_targets) labels_cat = gt_boxes[argmax_overlaps, 4] labels_cat = labels_cat * (1 - F.equal(labels, -1).astype( np.float32)) - F.equal(labels, -1).astype(np.float32) return labels, bbox_targets, labels_cat