def train_generator_batch(image, label, *, gm, netG, netloss): B, T, _, h, w = image.shape biup = get_bilinear(image) netG.train() with gm: forward_hiddens = [] backward_hiddens = [] res = [] hidden = F.zeros((2 * B, netG.hidden_channels, h, w)) for i in range(T): now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]], axis=0) if i == 0: flow = netG.flownet(now_frame, now_frame) else: ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]], axis=0) flow = netG.flownet(now_frame, ref) hidden = netG(hidden, flow, now_frame) forward_hiddens.append(hidden[0:B, ...]) backward_hiddens.append(hidden[B:2 * B, ...]) for i in range(T): res.append( netG.do_upsample(forward_hiddens[i], backward_hiddens[T - i - 1])) res = F.stack(res, axis=1) # [B,T,3,H,W] loss = netloss(res + biup, label) gm.backward(loss) if dist.is_distributed(): loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() return loss
def bbox_transform_inv_opr(bbox, deltas): max_delta = math.log(1000.0 / 16) """ Transforms the learned deltas to the final bbox coordinates, the axis is 1""" bbox_width = bbox[:, 2] - bbox[:, 0] + 1 bbox_height = bbox[:, 3] - bbox[:, 1] + 1 bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height pred_ctr_x = bbox_ctr_x + deltas[:, 0] * bbox_width pred_ctr_y = bbox_ctr_y + deltas[:, 1] * bbox_height dw = deltas[:, 2] dh = deltas[:, 3] dw = F.minimum(dw, max_delta) dh = F.minimum(dh, max_delta) pred_width = bbox_width * F.exp(dw) pred_height = bbox_height * F.exp(dh) pred_x1 = pred_ctr_x - 0.5 * pred_width pred_y1 = pred_ctr_y - 0.5 * pred_height pred_x2 = pred_ctr_x + 0.5 * pred_width pred_y2 = pred_ctr_y + 0.5 * pred_height # pred_boxes = F.concat((pred_x1.reshape(-1, 1), pred_y1.reshape(-1, 1), # pred_x2.reshape(-1, 1), pred_y2.reshape(-1, 1)), axis=1) pred_boxes = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=1) return pred_boxes
def test_generator_batch(image, *, netG): # image: [1,100,3,180,320] B, T, _, h, w = image.shape biup = get_bilinear(image) netG.eval() forward_hiddens = [] backward_hiddens = [] res = [] hidden = F.zeros((2 * B, netG.hidden_channels, h, w)) for i in range(T): now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]], axis=0) if i == 0: flow = netG.flownet(now_frame, now_frame) else: ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]], axis=0) flow = netG.flownet(now_frame, ref) hidden = netG(hidden, flow, now_frame) forward_hiddens.append(hidden[0:B, ...]) backward_hiddens.append(hidden[B:2 * B, ...]) for i in range(T): res.append( netG.do_upsample(forward_hiddens[i], backward_hiddens[T - i - 1])) res = F.stack(res, axis=1) # [B,T,3,H,W] return res + biup
def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor: return F.stack([ F.expand_dims(anchors[:, 0], axis=1) - deltas[:, 0::4], F.expand_dims(anchors[:, 1], axis=1) - deltas[:, 1::4], F.expand_dims(anchors[:, 0], axis=1) + deltas[:, 2::4], F.expand_dims(anchors[:, 1], axis=1) + deltas[:, 3::4], ], axis=2).reshape(deltas.shape)
def get_center_offsets(self, featmap, stride): # f_shp = featmap.shape # fm_height, fm_width = f_shp[-2], f_shp[-1] fm_height, fm_width = featmap.shape[2:] shift_x = F.linspace(0, fm_width - 1, fm_width) * stride shift_y = F.linspace(0, fm_height - 1, fm_height) * stride # make the mesh grid of shift_x and shift_y mesh_shape = (fm_height, fm_width) broad_shift_x = F.broadcast_to(shift_x.reshape(1, -1), mesh_shape) broad_shift_y = F.broadcast_to(shift_y.reshape(-1, 1), mesh_shape) # broad_shift_x = shift_x.reshape(-1, shift_x.shape[0]).broadcast_to(*mesh_shape) # broad_shift_y = shift_y.reshape(shift_y.shape[0], -1).broadcast_to(*mesh_shape) flatten_shift_x = broad_shift_x.flatten() flatten_shift_y = broad_shift_y.flatten() shifts = F.stack([ flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y ], axis=1) # flatten_shift_x = F.add_axis(broad_shift_x.reshape(-1), 1) # flatten_shift_y = F.add_axis(broad_shift_y.reshape(-1), 1) # shifts = F.concat( # [flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y,], # axis=1) return shifts
def generate_anchors_opr(self, fm_3x3, fm_stride, anchor_scales=(8, 16, 32, 64, 128), anchor_ratios=(1, 2, 3), base_size=4): np_anchors = generate_anchors(base_size=base_size, ratios=np.array(anchor_ratios), scales=np.array(anchor_scales)) device = fm_3x3.device anchors = mge.tensor(np_anchors).to(device) height, width = fm_3x3.shape[2], fm_3x3.shape[3] shift_x = F.linspace(0, width - 1, width).to(device) * fm_stride shift_y = F.linspace(0, height - 1, height).to(device) * fm_stride broad_shift_x = F.broadcast_to(shift_x.reshape(1, -1), (height, width)).flatten() broad_shift_y = F.broadcast_to(shift_y.reshape(-1, 1), (height, width)).flatten() shifts = F.stack( [broad_shift_x, broad_shift_y, broad_shift_x, broad_shift_y], axis=1) c = anchors.shape[1] all_anchors = F.expand_dims(anchors, axis=0) + F.expand_dims(shifts, axis=1) all_anchors = all_anchors.reshape(-1, c).detach() return all_anchors
def anchor_iou_target_opr(self, boxes, im_info, all_anchors, rpn_bbox_offsets): n = rpn_bbox_offsets.shape[0] res = [] for i in range(n): gtboxes = boxes[i, :im_info[i, 5].astype(np.int32)] offsets = rpn_bbox_offsets[i].reshape(-1, 4).detach() m = offsets.shape[0] an, ac = all_anchors.shape[0], all_anchors.shape[1] anchors = F.broadcast_to(F.expand_dims(all_anchors, 1), (an, 2, ac)).reshape(-1, ac) dtboxes = bbox_transform_inv_opr(anchors[:, :4], offsets[:, :4]) overlaps = box_overlap_opr(dtboxes, gtboxes[:, :4]) ignore_mask = 1 - F.equal( gtboxes[:, 4], config.anchor_ignore_label).astype(np.float32) ignore_mask = F.expand_dims(ignore_mask, axis=0) overlaps = overlaps * ignore_mask overlaps = overlaps.reshape(-1, 2, overlaps.shape[1]).transpose(1, 0, 2) a, b = overlaps[0], overlaps[1] index = F.argmax(a, axis=1) a = F.nn.indexing_one_hot(a, index, 1) b = F.scatter(b, 1, index.reshape(-1, 1), F.zeros([b.shape[0], 1])) index = F.argmax(b, axis=1) b = F.nn.indexing_one_hot(b, index, 1) value = F.expand_dims(F.stack([a, b], axis=1), axis=0) res.append(value) result = F.concat(res, 0) return result
def forward(self, pred_cls_list, rpn_num_prob_list, pred_reg_list, anchors_list, rpn_iou_list, boxes, im_info): all_anchors_list = [ F.concat([a, i * F.ones([a.shape[0], 1]).to(a.device)], axis=1) for i, a in enumerate(anchors_list) ] all_anchors_final = F.concat(all_anchors_list, axis=0) rpn_bbox_offset_final = F.concat(pred_reg_list, axis=1) rpn_cls_prob_final = F.concat(pred_cls_list, axis=1) rpn_iou_prob_final = F.concat(rpn_iou_list, axis=1) rpn_num_per_points_final = F.concat(rpn_num_prob_list, axis=1) rpn_labels, rpn_target_boxes = rpn_anchor_target_opr( boxes, im_info, all_anchors_final) ious_target = self.anchor_iou_target_opr(boxes, im_info, all_anchors_final, rpn_bbox_offset_final) n = rpn_labels.shape[0] target_boxes = rpn_target_boxes.reshape(n, -1, 4) rpn_cls_prob_final = rpn_cls_prob_final.reshape(n, -1, 1) offsets_final = rpn_bbox_offset_final.reshape(n, -1, 4) rpn_labels = rpn_labels.transpose(2, 0, 1) a, b = rpn_labels[0], rpn_labels[1] ignores = b - F.equal(a, 0).astype(np.float32) * F.equal(b, 0).astype( np.float32) labels = F.stack([a, ignores], axis=2).reshape(n, -1) cls_loss = sigmoid_cross_entropy_retina(rpn_cls_prob_final, labels, alpha=config.focal_loss_alpha, gamma=config.focal_loss_gamma) rpn_bbox_loss = smooth_l1_loss_retina(offsets_final, target_boxes, labels) rpn_labels = labels.reshape(n, -1, 2) rpn_iou_loss = iou_l1_loss(rpn_iou_prob_final, ious_target, rpn_labels) # whether one anchor produce one proposal or two. nlabels = ((labels.reshape(n, -1, 2) > 0).sum(2)).flatten() - 1 c = rpn_num_per_points_final.shape[2] num_per_anchor = rpn_num_per_points_final.reshape(-1, c) rpn_num_per_points_final = rpn_num_per_points_final.reshape(-1, c) nlabels = nlabels.reshape(-1) rpn_num_loss = softmax_loss(rpn_num_per_points_final, nlabels) loss_dict = {} loss_dict['rpn_cls_loss'] = cls_loss loss_dict['rpn_bbox_loss'] = 2 * rpn_bbox_loss loss_dict['rpn_iou_loss'] = 2 * rpn_iou_loss loss_dict['rpn_num_loss'] = rpn_num_loss return loss_dict
def mesh_grid(B, H, W): # mesh grid x_base = F.arange(0, W) x_base = F.tile(x_base, (B, H, 1)) y_base = F.arange(0, H) # BHW y_base = F.tile(y_base, (B, W, 1)).transpose(0, 2, 1) base_grid = F.stack([x_base, y_base], 1) # B2HW return base_grid
def _compute_pos_area(gtboxes, ratio=0.3): H, W = gtboxes[:, 3] - gtboxes[:, 1], gtboxes[:, 2] - gtboxes[:, 0] centres = _compute_center(gtboxes) l = centres[:, 0] - ratio * W r = centres[:, 0] + ratio * W t = centres[:, 1] - ratio * H b = centres[:, 1] + ratio * H boundary = F.stack([l, t, r, b], axis=1) return boundary
def compute_gemini_loss(self, prob, bbox_targets, labels): c = prob.shape[1] prob = prob.reshape(-1, 2, c).transpose(1, 0, 2) a, b = prob[0], prob[1] loss0 = self.compute_emd_loss(a, b, bbox_targets, labels) loss1 = self.compute_emd_loss(b, a, bbox_targets, labels) loss = F.stack([loss0, loss1], axis=1) vlabel = (labels > -1).reshape(-1, 2).sum(axis=1) > 1 emd_loss = loss.min(axis=1).sum() / F.maximum(vlabel.sum(), 1) return emd_loss
def compute_gemini_loss_opr(self, prob, bbox_targets, labels): prob = prob.reshape(prob.shape[0], 2, -1) n, _, c = prob.shape prob = prob.transpose(1, 0, 2) a, b = prob[0], prob[1] loss0 = self.compute_emd_loss_opr(a, b, bbox_targets, labels) loss1 = self.compute_emd_loss_opr(b, a, bbox_targets, labels) loss = F.stack([loss0, loss1], dim=1) emd_loss = loss.min(axis=1)[0].sum() / F.maximum(loss.shape[0], 1) loss = {'rcnn_emd_loss': emd_loss} return loss
def generate_anchors_by_features(self, sizes, device): all_anchors = [] assert len(sizes) == self.num_features, ( "input features expected {}, got {}".format(self.num_features, len(sizes)) ) for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors): grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device) grids = F.stack([grid_x, grid_y, grid_x, grid_y], axis=1) all_anchors.append( (F.expand_dims(grids, axis=1) + F.expand_dims(base_anchor, axis=0)).reshape(-1, 4) ) return all_anchors
def mesh_grid_mge(B, H, W): # mesh grid x_base = F.arange(0, W) x_base = F.tile(x_base, (B, H, 1)) y_base = F.arange(0, H) # BHW y_base = F.tile(y_base, (B, W, 1)).transpose(0, 2, 1) ones = F.ones_like(x_base) base_grid = F.stack([x_base, y_base, ones], 1) # B3HW return base_grid
def train_generator_batch(image, label, *, gm, netG, netloss): B, T, _, h, w = image.shape biup = get_bilinear(image) # np_weight = [0,-1,0,-1,4,-1,0,-1,0] # (1,1,3,3) # conv_weight = mge.tensor(np.array(np_weight).astype(np.float32)).reshape(1,1,3,3) # HR_mask = F.mean(label, axis=2, keepdims=False) # [B,T,H,W] 对T是做depthwise # HR_mask = HR_mask.reshape(B*T, 1, 4*h, 4*w) # HR_mask = F.conv2d(HR_mask, conv_weight, padding=1) # # HR_mask = (F.abs(HR_mask) > 0.1).astype("float32") # [B*T, 1, H, W] # HR_mask = HR_mask.reshape(B, T, 1, 4*h, 4*w) # HR_mask = 1 + HR_mask * 0.1 HR_mask = 1 netG.train() with gm: forward_hiddens = [] backward_hiddens = [] res = [] # 对所有的image提取特征 image = image.reshape(B * T, 3, h, w) image = netG.rgb(image).reshape(B, T, -1, h, w) # T=0 now_frame = image[:, 0, ...] hidden = now_frame forward_hiddens.append(now_frame) for i in range(1, T): now_frame = image[:, i, ...] hidden = netG.aggr(F.concat([hidden, now_frame], axis=1)) forward_hiddens.append(hidden) # T=-1 now_frame = image[:, T - 1, ...] hidden = now_frame backward_hiddens.append(now_frame) for i in range(T - 2, -1, -1): now_frame = image[:, i, ...] hidden = netG.aggr(F.concat([hidden, now_frame], axis=1)) backward_hiddens.append(hidden) # do upsample for all frames for i in range(T): res.append( netG.upsample( F.concat([forward_hiddens[i], backward_hiddens[T - i - 1]], axis=1))) res = F.stack(res, axis=1) # [B,T,3,H,W] res = res + biup loss = netloss(res, label, HR_mask) # 加上edge损失 # 探测label的edge map gm.backward(loss) if dist.is_distributed(): loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() return loss
def encode(self, bbox: Tensor, gt: Tensor) -> Tensor: bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y = self._box_ltrb_to_cs_opr(bbox) gt_width, gt_height, gt_ctr_x, gt_ctr_y = self._box_ltrb_to_cs_opr(gt) target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height target_dw = F.log(gt_width / bbox_width) target_dh = F.log(gt_height / bbox_height) target = F.stack([target_dx, target_dy, target_dw, target_dh], axis=1) target -= self.reg_mean target /= self.reg_std return target
def get_ground_truth(self, anchors, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] overlaps = layers.get_iou(gt_boxes[:, :4], anchors) match_indices, labels = self.matcher(overlaps) gt_boxes_matched = gt_boxes[match_indices] fg_mask = labels == 1 labels[fg_mask] = gt_boxes_matched[fg_mask, 4].astype(np.int32) offsets = self.box_coder.encode(anchors, gt_boxes_matched[:, :4]) labels_list.append(labels) offsets_list.append(offsets) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), )
def generate_anchors_by_features(self, sizes, device): all_anchors = [] assert len(sizes) == self.num_features, ( "input features expected {}, got {}".format(self.num_features, len(sizes)) ) for size, stride in zip(sizes, self.strides): grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device) grids = F.stack([grid_x, grid_y], axis=1) all_anchors.append( F.broadcast_to( F.expand_dims(grids, axis=1), (grids.shape[0], self.num_anchors, 2) ).reshape(-1, 2) ) # FIXME: need F.repeat return all_anchors
def compute_emd_loss(self, a, b, bbox_targets, labels): c = a.shape[1] prob = F.stack([a, b], axis = 1).reshape(-1, c) pred_bbox, cls_scores = prob[:,:-self.n], prob[:,-self.n:] n, c = bbox_targets.shape[0], bbox_targets.shape[1] bbox_targets, labels = bbox_targets.reshape(-1, 4), labels.flatten() cls_loss = softmax_loss_opr(cls_scores, labels) pred_bbox = pred_bbox.reshape(-1, self.n, 4) rcnn_bbox_loss = smooth_l1_loss_rcnn_opr(pred_bbox, bbox_targets, labels, config.rcnn_smooth_l1_beta) loss = cls_loss + rcnn_bbox_loss loss = loss.reshape(-1, 2).sum(axis=1) return loss
def decode_outputs(self, outputs): grids = [] strides = [] for (hsize, wsize), stride in zip(self.hw, self.strides): xv, yv = meshgrid(F.arange(hsize), F.arange(wsize)) grid = F.stack((xv, yv), 2).reshape(1, -1, 2) grids.append(grid) shape = grid.shape[:2] strides.append(F.full((*shape, 1), stride)) grids = F.concat(grids, axis=1) strides = F.concat(strides, axis=1) outputs[..., :2] = (outputs[..., :2] + grids) * strides outputs[..., 2:4] = F.exp(outputs[..., 2:4]) * strides return outputs
def compute_emd_loss_opr(self, a, b, bbox_targets, labels): labels = labels.flatten() c = a.shape[1] prob = F.stack([a, b], axis=1).reshape(-1, c) offsets, cls_score = prob[:, :-self.n], prob[:,-self.n:] cls_loss = softmax_loss_opr(cls_score, labels) n = offsets.shape[0] offsets = offsets.reshape(n, -1, 4) bbox_targets = bbox_targets.reshape(-1, 4) reg_loss = smooth_l1_loss_rcnn_opr(offsets, bbox_targets, labels, sigma = config.rcnn_smooth_l1_beta) vlabel = 1 - ((labels < 0).reshape(-1, 2).sum(axis=1) > 1) loss = (cls_loss + 1 * reg_loss).reshape(-1, 2).sum(axis=1) * vlabel return loss
def bbox_transform_opr(bbox, gt): """ Transform the bounding box and ground truth to the loss targets. The 4 box coordinates are in axis 1""" bbox_width = bbox[:, 2] - bbox[:, 0] + 1 bbox_height = bbox[:, 3] - bbox[:, 1] + 1 bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height gt_width = gt[:, 2] - gt[:, 0] + 1 gt_height = gt[:, 3] - gt[:, 1] + 1 gt_ctr_x = gt[:, 0] + 0.5 * gt_width gt_ctr_y = gt[:, 1] + 0.5 * gt_height target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height target_dw = F.log(gt_width / bbox_width) target_dh = F.log(gt_height / bbox_height) target = F.stack([target_dx, target_dy, target_dw, target_dh], axis=1) return target
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:] fpn_fms.reverse() stride = [4, 8, 16, 32] poo5, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) poo5 = F.flatten(poo5, start_axis=1) fc1 = F.relu(self.fc1(poo5)) fc2 = F.relu(self.fc2(fc1)) a = self.a(fc2) b = self.b(fc2) prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1]) if self.refinement: final_prob = self.refinement_module(prob, fc2) if self.training: emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels) loss_dict = {} loss_dict['loss_rcnn_emd'] = emd_loss if self.refinement_module: final_emd_loss = self.compute_gemini_loss( final_prob, bbox_targets, labels) loss_dict['final_rcnn_emd'] = final_emd_loss return loss_dict else: offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:] pred_bbox = offsets.reshape(-1, self.n, 4) cls_prob = F.softmax(cls_scores, axis=1) n = rcnn_rois.shape[0] rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1), (n, 2, 4)).reshape(-1, 4) normalized = config.rcnn_bbox_normalize_targets pred_boxes = restore_bbox(rois, pred_bbox, normalized, config) pred_bbox = F.concat( [pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2) return pred_bbox
def refinement_module(self, prob, fc2): m = prob.reshape(-1, 5*self.n) offsets, scores = m[:, :-self.n], m[:, -self.n:] n = offsets.shape[0] offsets = offsets.reshape(-1, self.n, 4) cls_scores = F.expand_dims(F.softmax(scores, axis=1), axis=2) pred_boxes = F.concat([offsets, cls_scores], axis=2)[:, 1] n, c = pred_boxes.shape pred_boxes = F.broadcast_to(F.expand_dims(pred_boxes, axis=1), (n, 6, c)).reshape(n,-1) n, c = fc2.shape fc3 = F.broadcast_to(F.expand_dims(fc2, axis=1), (n, 2, c)).reshape(-1, c) fc3 = F.concat([fc3, pred_boxes], axis=1) fc3 = self.relu(self.fc3(fc3)) fc3 = fc3.reshape(n, 2, -1).transpose(1, 0, 2) a = self.q(fc3[0]) b = self.r(fc3[1]) prob = F.stack([a, b], axis=1).reshape(-1, 10*self.n) return prob
def get_output_and_grid(self, output, k, stride, dtype): grid = self.grids[k] batch_size = output.shape[0] n_ch = 5 + self.num_classes hsize, wsize = output.shape[-2:] if grid.shape[2:4] != output.shape[2:4]: yv, xv = meshgrid([F.arange(hsize), F.arange(wsize)]) grid = F.stack((xv, yv), 2).reshape(1, 1, hsize, wsize, 2).type(dtype) self.grids[k] = grid output = output.view(batch_size, self.n_anchors, n_ch, hsize, wsize) output = (output.permute(0, 1, 3, 4, 2).reshape(batch_size, self.n_anchors * hsize * wsize, -1)) grid = grid.view(1, -1, 2) output[..., :2] = (output[..., :2] + grid) * stride output[..., 2:4] = F.exp(output[..., 2:4]) * stride return output, grid
def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor: deltas *= self.reg_std deltas += self.reg_mean ( anchor_width, anchor_height, anchor_ctr_x, anchor_ctr_y, ) = self._box_ltrb_to_cs_opr(anchors, 1) pred_ctr_x = anchor_ctr_x + deltas[:, 0::4] * anchor_width pred_ctr_y = anchor_ctr_y + deltas[:, 1::4] * anchor_height pred_width = anchor_width * F.exp(deltas[:, 2::4]) pred_height = anchor_height * F.exp(deltas[:, 3::4]) pred_x1 = pred_ctr_x - 0.5 * pred_width pred_y1 = pred_ctr_y - 0.5 * pred_height pred_x2 = pred_ctr_x + 0.5 * pred_width pred_y2 = pred_ctr_y + 0.5 * pred_height pred_box = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=2) pred_box = pred_box.reshape(pred_box.shape[0], -1) return pred_box
def test_generator_batch(image, *, netG): B, T, _, h, w = image.shape biup = get_bilinear(image) netG.eval() forward_hiddens = [] backward_hiddens = [] res = [] # 对所有的image提取特征 image = image.reshape(B * T, 3, h, w) image = netG.rgb(image).reshape(B, T, -1, h, w) # T=0 now_frame = image[:, 0, ...] hidden = now_frame forward_hiddens.append(now_frame) for i in tqdm(range(1, T)): now_frame = image[:, i, ...] hidden = netG.aggr(F.concat([hidden, now_frame], axis=1)) forward_hiddens.append(hidden) # T=-1 now_frame = image[:, T - 1, ...] hidden = now_frame backward_hiddens.append(now_frame) for i in tqdm(range(T - 2, -1, -1)): now_frame = image[:, i, ...] hidden = netG.aggr(F.concat([hidden, now_frame], axis=1)) backward_hiddens.append(hidden) # do upsample for all frames for i in tqdm(range(T)): res.append( netG.upsample( F.concat([forward_hiddens[i], backward_hiddens[T - i - 1]], axis=1))) res = F.stack(res, axis=1) # [B,T,3,H,W] res = res + biup return res
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] offsets = self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)) object_sizes_of_interest = F.concat([ F.broadcast_to( F.expand_dims(mge.tensor(size, dtype=np.float32), axis=0), (anchors_i.shape[0], 2)) for anchors_i, size in zip( anchors_list, self.cfg.object_sizes_of_interest) ], axis=0) max_offsets = F.max(offsets, axis=2) is_cared_in_the_level = ( (max_offsets >= F.expand_dims(object_sizes_of_interest[:, 0], axis=0)) & (max_offsets <= F.expand_dims(object_sizes_of_interest[:, 1], axis=0))) if self.cfg.center_sampling_radius > 0: gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 is_in_boxes = [] for stride, anchors_i in zip(self.cfg.stride, anchors_list): radius = stride * self.cfg.center_sampling_radius center_boxes = F.concat([ F.maximum(gt_centers - radius, gt_boxes[:, :2]), F.minimum(gt_centers + radius, gt_boxes[:, 2:4]), ], axis=1) center_offsets = self.point_coder.encode( anchors_i, F.expand_dims(center_boxes, axis=1)) is_in_boxes.append(F.min(center_offsets, axis=2) > 0) is_in_boxes = F.concat(is_in_boxes, axis=1) else: is_in_boxes = F.min(offsets, axis=2) > 0 gt_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1]) # FIXME: use repeat instead of broadcast_to areas = F.broadcast_to(F.expand_dims(gt_area, axis=1), offsets.shape[:2]) areas[~is_cared_in_the_level] = float("inf") areas[~is_in_boxes] = float("inf") match_indices = F.argmin(areas, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_min_area = F.indexing_one_hot(areas, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_min_area == float("inf")] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.maximum( F.min(left_right, axis=1) / F.max(left_right, axis=1), 0) * F.maximum( F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), 0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def run(data1, data2): return F.stack([data1, data2], axis=ai)
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] ious = [] candidate_idxs = [] base = 0 for stride, anchors_i in zip(self.cfg.stride, anchors_list): ious.append( layers.get_iou( gt_boxes[:, :4], F.concat([ anchors_i - stride * self.cfg.anchor_scale / 2, anchors_i + stride * self.cfg.anchor_scale / 2, ], axis=1))) gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 distances = F.sqrt( F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2, axis=2)) _, topk_idxs = F.topk(distances, self.cfg.anchor_topk) candidate_idxs.append(base + topk_idxs) base += anchors_i.shape[0] ious = F.concat(ious, axis=1) candidate_idxs = F.concat(candidate_idxs, axis=1) candidate_ious = F.gather(ious, 1, candidate_idxs) ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) + F.std(candidate_ious, axis=1, keepdims=True)) is_foreground = F.scatter( F.zeros(ious.shape), 1, candidate_idxs, F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr) is_in_boxes = F.min(self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)), axis=2) > 0 ious[~is_foreground] = -1 ious[~is_in_boxes] = -1 match_indices = F.argmax(ious, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_max_iou == -1] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1), lower=0) * F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), lower=0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )