def _point_score(self, inputs, labels, lengths): batch_size, seq_len, n_labels = inputs.shape # Get the true label logit value flattened_inputs = inputs.reshape([-1]) offsets = paddle.unsqueeze( self._get_batch_index(batch_size) * seq_len * n_labels, 1) offsets += paddle.unsqueeze(self._get_seq_index(seq_len) * n_labels, 0) flattened_tag_indices = paddle.reshape(offsets + labels, [-1]) scores = paddle.gather(flattened_inputs, flattened_tag_indices).reshape( [batch_size, seq_len]) mask = paddle.cast( sequence_mask(self._get_batch_seq_index(batch_size, seq_len), lengths), 'float32') mask = mask[:, :seq_len] mask_scores = scores * mask score = paddle.sum(mask_scores, 1) return score
def outdegree(self, nodes=None): """Return the outdegree of the given nodes. This function will return outdegree of given nodes. Args: nodes: Return the outdegree of given nodes, if nodes is None, return outdegree for all nodes Return: A numpy.array or paddle.Tensor as the given nodes' outdegree. """ if nodes is None: return self.adj_src_index.degree else: if self._is_tensor: return paddle.gather(self.adj_src_index.degree, nodes) else: return self.adj_src_index.degree[nodes]
def rpn_anchor_target(anchors, gt_boxes, rpn_batch_size_per_im, rpn_positive_overlap, rpn_negative_overlap, rpn_fg_fraction, use_random=True, batch_size=1, weights=[1., 1., 1., 1.]): tgt_labels = [] tgt_bboxes = [] tgt_deltas = [] for i in range(batch_size): gt_bbox = gt_boxes[i] # Step1: match anchor and gt_bbox matches, match_labels = label_box(anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True) # Step2: sample anchor fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im, rpn_fg_fraction, 0, use_random) # Fill with the ignore label (-1), then set positive and negative labels labels = paddle.full(match_labels.shape, -1, dtype='int32') labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds)) labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds)) # Step3: make output matched_gt_boxes = paddle.gather(gt_bbox, matches) tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights) labels.stop_gradient = True matched_gt_boxes.stop_gradient = True tgt_delta.stop_gradient = True tgt_labels.append(labels) tgt_bboxes.append(matched_gt_boxes) tgt_deltas.append(tgt_delta) return tgt_labels, tgt_bboxes, tgt_deltas
def _trans_score(self, labels, lengths): batch_size, seq_len = labels.shape if self.with_start_stop_tag: # Add START and STOP on either side of the labels start_tensor, stop_tensor = self._get_start_stop_tensor(batch_size) labels_ext = paddle.concat( [start_tensor, labels, stop_tensor], axis=1) mask = paddle.cast( sequence_mask( self._get_batch_seq_index(batch_size, seq_len), lengths + 1), 'int64') pad_stop = paddle.full( (batch_size, seq_len + 2), dtype='int64', fill_value=self.stop_idx) labels_ext = (1 - mask) * pad_stop + mask * labels_ext else: mask = paddle.cast( sequence_mask( self._get_batch_seq_index(batch_size, seq_len), lengths), 'int64') labels_ext = labels start_tag_indices = labels_ext[:, :-1] stop_tag_indices = labels_ext[:, 1:] # Encode the indices in a flattened representation. transition_indices = start_tag_indices * self.num_tags + stop_tag_indices flattened_transition_indices = transition_indices.reshape([-1]) flattened_transition_params = paddle.flatten(self.transitions) scores = paddle.gather( flattened_transition_params, flattened_transition_indices).reshape([batch_size, -1]) mask_scores = scores * mask[:, 1:] # Accumulate the transition score score = paddle.sum(mask_scores, 1) return score
def forward(self, graph, feature, norm=None): """ Args: graph: `pgl.Graph` instance. feature: A tensor with shape (num_nodes, input_size) norm: (default None). If :code:`norm` is not None, then the feature will be normalized by given norm. If :code:`norm` is None, then we use `lapacian degree norm`. Return: A tensor with shape (num_nodes, output_size) """ if self.self_loop: index = paddle.arange(start=0, end=graph.num_nodes, dtype="int64") self_loop_edges = paddle.transpose(paddle.stack((index, index)), [1, 0]) mask = graph.edges[:, 0] != graph.edges[:, 1] mask_index = paddle.masked_select( paddle.arange(end=graph.num_edges), mask) edges = paddle.gather(graph.edges, mask_index) # remove self loop edges = paddle.concat((self_loop_edges, edges), axis=0) graph = pgl.Graph(num_nodes=graph.num_nodes, edges=edges) if norm is None: norm = GF.degree_norm(graph) h0 = feature for _ in range(self.k_hop): feature = feature * norm feature = graph.send_recv(feature) feature = feature * norm feature = self.alpha * h0 + (1 - self.alpha) * feature return feature
def build_net(input_size, num_class, hidden_size, num_layers): num_nodes = F.data("num_nodes", shape=[1], dtype="int32") edges = F.data("edges", shape=[None, 2], dtype="int32") sample_index = F.data("sample_index", shape=[None], dtype="int32") index = F.data("index", shape=[None], dtype="int32") label = F.data("label", shape=[None], dtype="int64") label = paddle.reshape(label, [-1, 1]) graph = pgl.Graph(num_nodes=num_nodes, edges=edges) feat = F.data("feature", shape=[None, input_size], dtype="float32") model = GraphSage( input_size=input_size, num_class=num_class, hidden_size=hidden_size, num_layers=num_layers) g = pgl.Graph(num_nodes=num_nodes, edges=edges) pred = model(g, feat) pred = paddle.gather(pred, index) loss = paddle.nn.functional.cross_entropy(pred, label) acc = paddle.metric.accuracy(input=pred, label=label, k=1) return loss, acc
def forward_train(self, body_feats, rois, rois_num, inputs, targets, bbox_feat): """ body_feats (list[Tensor]): Multi-level backbone features rois (list[Tensor]): Proposals for each batch with shape [N, 4] rois_num (Tensor): The number of proposals for each batch inputs (dict): ground truth info """ tgt_labels, _, tgt_gt_inds = targets rois, rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights = self.mask_assigner( rois, tgt_labels, tgt_gt_inds, inputs) if self.share_bbox_feat: rois_feat = paddle.gather(bbox_feat, mask_index) else: rois_feat = self.roi_extractor(body_feats, rois, rois_num) mask_feat = self.head(rois_feat) mask_logits = self.mask_fcn_logits(mask_feat) loss_mask = self.get_loss(mask_logits, tgt_classes, tgt_masks, tgt_weights) return {'loss_mask': loss_mask}
def read_rows(data, index): """Slice tensor with given index from dictionary of tensor or tensor This function helps to slice data from nested dictionary structure. Args: data: A dictionary of tensor or tensor index: A tensor of slicing index Returns: Return a dictionary of tensor or tensor. """ if data is None: return None elif isinstance(data, dict): new_data = {} for key, value in data.items(): new_data[key] = read_rows(value, index) return new_data else: return paddle.gather(data, index)
def lovasz_hinge_flat(logits, labels): r""" Binary Lovasz hinge loss. Args: logits (Tensor): Shape is [P], logits at each prediction (between -\infty and +\infty). labels (Tensor): Shape is [P], binary ground truth labels (0 or 1). """ if len(labels) == 0: # only void pixels, the gradients should be 0 return logits.sum() * 0. signs = 2. * labels - 1. signs.stop_gradient = True errors = 1. - logits * signs errors_sorted, perm = paddle.fluid.core.ops.argsort( errors, 'axis', 0, 'descending', True) errors_sorted.stop_gradient = False gt_sorted = paddle.gather(labels, perm) grad = lovasz_grad(gt_sorted) grad.stop_gradient = True loss = paddle.sum(F.relu(errors_sorted) * grad) return loss
def forward_test(self, body_feats, rois, rois_num, scale_factor, feat_func=None): """ body_feats (list[Tensor]): Multi-level backbone features rois (Tensor): Prediction from bbox head with shape [N, 6] rois_num (Tensor): The number of prediction for each batch scale_factor (Tensor): The scale factor from origin size to input size """ if rois.shape[0] == 0: mask_out = paddle.full([1, 1, 1, 1], -1) else: bbox = [rois[:, 2:]] labels = rois[:, 0].cast('int32') rois_feat = self.roi_extractor(body_feats, bbox, rois_num) if self.share_bbox_feat: assert feat_func is not None rois_feat = feat_func(rois_feat) mask_feat = self.head(rois_feat) mask_logit = self.mask_fcn_logits(mask_feat) mask_num_class = mask_logit.shape[1] if mask_num_class == 1: mask_out = F.sigmoid(mask_logit) else: num_masks = mask_logit.shape[0] mask_out = [] # TODO: need to optimize gather for i in range(mask_logit.shape[0]): pred_masks = paddle.unsqueeze(mask_logit[i, :, :, :], axis=0) mask = paddle.gather(pred_masks, labels[i], axis=1) mask_out.append(mask) mask_out = F.sigmoid(paddle.concat(mask_out)) return mask_out
def forward(self, inputs): token_ids = inputs['token_ids'] type_ids = inputs['type_ids'] pos_ids = inputs['pos_ids'] attention_mask = inputs['attention_mask'] label_pos = inputs["label_pos"] out, self_attn_mask = self.gen_input(token_ids, type_ids, pos_ids, attention_mask) # [-1, seq_len, hidden_size] enc_out = self.encoder(out, self_attn_mask) enc_out = paddle.reshape(enc_out, [-1, self.hidden_size]) label_pos = paddle.cast(label_pos, 'int64') out = paddle.gather(enc_out, label_pos) pooled_out = self.fc1(out) pooled_out = self.tanh_layer(pooled_out) # [-1, 2] logits = self.fc2(pooled_out) probs = self.softmax(logits) return probs
def sample_logits(embedding, bias, labels, inputs, sampler): true_log_probs, samp_log_probs, neg_samples = sampler.sample(labels) n_sample = neg_samples.shape[0] b1, b2 = labels.shape[0], labels.shape[1] all_ids = paddle.concat([paddle.reshape(labels, shape=[-1]), neg_samples]) all_w = embedding(all_ids) true_w = paddle.reshape(all_w[:-n_sample], shape=[b1, b2, -1]) sample_w = paddle.reshape(all_w[-n_sample:], shape=[n_sample, -1]) all_b = paddle.gather(bias, all_ids) true_b = paddle.reshape(all_b[:-n_sample], shape=[b1, b2]) sample_b = all_b[-n_sample:] hit = paddle.cast( (labels.unsqueeze([2]) == neg_samples), dtype=global_dtype).detach() true_logits = paddle.sum(true_w * inputs, axis=-1) + true_b - true_log_probs sample_logits = paddle.transpose( paddle.matmul(sample_w, paddle.transpose(inputs, [0, 2, 1])), [0, 2, 1]) + sample_b - samp_log_probs sample_logits = sample_logits - 1e30 * hit logits = paddle.concat([true_logits.unsqueeze([2]), sample_logits], -1) return logits
def __getitem__(self, idx): is_bool = False if self.dtype == paddle_dtypes.t_bool: self = self.cast("int32") is_bool = True if isinstance(idx, paddle.Tensor) and len(idx.shape) == 1: out = paddle.gather(self, idx) return out.cast("bool") if is_bool else out elif isinstance(idx, paddle.Tensor) and idx.dtype == paddle_dtypes.t_bool: idx = paddle.cast(idx, "int32") idx = paddle.nonzero(idx) out = paddle.gather_nd(self, idx) return out.cast("bool") if is_bool else out elif isinstance(idx, tuple): if is_condition_one(idx): first_idx = idx[0] first_idx = paddle.cast(first_idx, "int32") first_idx = paddle.nonzero(first_idx) out = paddle.gather_nd(self, first_idx) return out.cast("bool") if is_bool else out elif is_condition_two(idx): new_idx = list() for i in range(len(self.shape) - 1): new_idx.append(slice(None, None, None)) new_idx.append(list(idx)[-1]) out = self.tmp(tuple(new_idx)) return out.cast("bool") if is_bool else out else: out = self.tmp(idx) return out.cast("bool") if is_bool else out # TODO(syf): 出来为(slice(None, None, None), slice(None, None, None), 0) else: out = self.tmp(idx) if out.shape == [1]: return out.numpy()[0] else: return out
def __call__(self, feats, roi, rois_num): roi = paddle.concat(roi) if len(roi) > 1 else roi[0] if len(feats) == 1: rois_feat = ops.roi_align( feats[self.start_level], roi, self.resolution, self.spatial_scale[0], rois_num=rois_num, aligned=self.aligned) else: offset = 2 k_min = self.start_level + offset k_max = self.end_level + offset rois_dist, restore_index, rois_num_dist = ops.distribute_fpn_proposals( roi, k_min, k_max, self.canconical_level, self.canonical_size, rois_num=rois_num) rois_feat_list = [] for lvl in range(self.start_level, self.end_level + 1): roi_feat = ops.roi_align( feats[lvl], rois_dist[lvl], self.resolution, self.spatial_scale[lvl], sampling_ratio=self.sampling_ratio, rois_num=rois_num_dist[lvl], aligned=self.aligned) rois_feat_list.append(roi_feat) rois_feat_shuffle = paddle.concat(rois_feat_list) rois_feat = paddle.gather(rois_feat_shuffle, restore_index) return rois_feat
def forward(self, boxes, logits, gt_bbox, gt_class): r""" Args: boxes (Tensor): [b, query, 4] logits (Tensor): [b, query, num_classes] gt_bbox (List(Tensor)): list[[n, 4]] gt_class (List(Tensor)): list[[n, 1]] Returns: A list of size batch_size, containing tuples of (index_i, index_j) where: - index_i is the indices of the selected predictions (in order) - index_j is the indices of the corresponding selected targets (in order) For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes) """ bs, num_queries = boxes.shape[:2] num_gts = sum(len(a) for a in gt_class) if num_gts == 0: return [(paddle.to_tensor( [], dtype=paddle.int64), paddle.to_tensor( [], dtype=paddle.int64)) for _ in range(bs)] # We flatten to compute the cost matrices in a batch # [batch_size * num_queries, num_classes] out_prob = F.sigmoid(logits.flatten( 0, 1)) if self.use_focal_loss else F.softmax(logits.flatten(0, 1)) # [batch_size * num_queries, 4] out_bbox = boxes.flatten(0, 1) # Also concat the target labels and boxes tgt_ids = paddle.concat(gt_class).flatten() tgt_bbox = paddle.concat(gt_bbox) # Compute the classification cost if self.use_focal_loss: neg_cost_class = (1 - self.alpha) * (out_prob**self.gamma) * (-( 1 - out_prob + 1e-8).log()) pos_cost_class = self.alpha * ( (1 - out_prob)**self.gamma) * (-(out_prob + 1e-8).log()) cost_class = paddle.gather( pos_cost_class, tgt_ids, axis=1) - paddle.gather( neg_cost_class, tgt_ids, axis=1) else: cost_class = -paddle.gather(out_prob, tgt_ids, axis=1) # Compute the L1 cost between boxes cost_bbox = ( out_bbox.unsqueeze(1) - tgt_bbox.unsqueeze(0)).abs().sum(-1) # Compute the giou cost betwen boxes cost_giou = self.giou_loss( bbox_cxcywh_to_xyxy(out_bbox.unsqueeze(1)), bbox_cxcywh_to_xyxy(tgt_bbox.unsqueeze(0))).squeeze(-1) # Final cost matrix C = self.matcher_coeff['class'] * cost_class + self.matcher_coeff['bbox'] * cost_bbox + \ self.matcher_coeff['giou'] * cost_giou C = C.reshape([bs, num_queries, -1]) C = [a.squeeze(0) for a in C.chunk(bs)] sizes = [a.shape[0] for a in gt_bbox] indices = [ linear_sum_assignment(c.split(sizes, -1)[i].numpy()) for i, c in enumerate(C) ] return [(paddle.to_tensor( i, dtype=paddle.int64), paddle.to_tensor( j, dtype=paddle.int64)) for i, j in indices]
def __call__(self, box_cls, box_pred, scale_factor_wh, img_whwh): """ Arguments: box_cls (Tensor): tensor of shape (batch_size, num_proposals, K). The tensor predicts the classification probability for each proposal. box_pred (Tensor): tensors of shape (batch_size, num_proposals, 4). The tensor predicts 4-vector (x,y,w,h) box regression values for every proposal scale_factor_wh (Tensor): tensors of shape [batch_size, 2] the scalor of per img img_whwh (Tensor): tensors of shape [batch_size, 4] Returns: bbox_pred (Tensor): tensors of shape [num_boxes, 6] Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] bbox_num (Tensor): tensors of shape [batch_size] the number of RoIs in each image. """ assert len(box_cls) == len(scale_factor_wh) == len(img_whwh) img_wh = img_whwh[:, :2] scores = F.sigmoid(box_cls) labels = paddle.arange(0, self.num_classes). \ unsqueeze(0).tile([self.num_proposals, 1]).flatten(start_axis=0, stop_axis=1) classes_all = [] scores_all = [] boxes_all = [] for i, (scores_per_image, box_pred_per_image) in enumerate(zip(scores, box_pred)): scores_per_image, topk_indices = scores_per_image.flatten( 0, 1).topk( self.num_proposals, sorted=False) labels_per_image = paddle.gather(labels, topk_indices, axis=0) box_pred_per_image = box_pred_per_image.reshape([-1, 1, 4]).tile( [1, self.num_classes, 1]).reshape([-1, 4]) box_pred_per_image = paddle.gather( box_pred_per_image, topk_indices, axis=0) classes_all.append(labels_per_image) scores_all.append(scores_per_image) boxes_all.append(box_pred_per_image) bbox_num = paddle.zeros([len(scale_factor_wh)], dtype="int32") boxes_final = [] for i in range(len(scale_factor_wh)): classes = classes_all[i] boxes = boxes_all[i] scores = scores_all[i] boxes[:, 0::2] = paddle.clip( boxes[:, 0::2], min=0, max=img_wh[i][0]) / scale_factor_wh[i][0] boxes[:, 1::2] = paddle.clip( boxes[:, 1::2], min=0, max=img_wh[i][1]) / scale_factor_wh[i][1] boxes_w, boxes_h = (boxes[:, 2] - boxes[:, 0]).numpy(), ( boxes[:, 3] - boxes[:, 1]).numpy() keep = (boxes_w > 1.) & (boxes_h > 1.) if (keep.sum() == 0): bboxes = paddle.zeros([1, 6]).astype("float32") else: boxes = paddle.to_tensor(boxes.numpy()[keep]).astype("float32") classes = paddle.to_tensor(classes.numpy()[keep]).astype( "float32").unsqueeze(-1) scores = paddle.to_tensor(scores.numpy()[keep]).astype( "float32").unsqueeze(-1) bboxes = paddle.concat([classes, scores, boxes], axis=-1) boxes_final.append(bboxes) bbox_num[i] = bboxes.shape[0] bbox_pred = paddle.concat(boxes_final) return bbox_pred, bbox_num
def __call__(self, seg_preds, seg_masks, cate_labels, cate_scores, sum_masks=None): # sort and keep top nms_pre sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n) seg_masks = paddle.gather(seg_masks, index=sort_inds) seg_preds = paddle.gather(seg_preds, index=sort_inds) sum_masks = paddle.gather(sum_masks, index=sort_inds) cate_scores = paddle.gather(cate_scores, index=sort_inds) cate_labels = paddle.gather(cate_labels, index=sort_inds) seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1) # inter. inter_matrix = paddle.mm(seg_masks, paddle.transpose(seg_masks, [1, 0])) n_samples = paddle.shape(cate_labels) # union. sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples]) # iou. iou_matrix = (inter_matrix / (sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix)) iou_matrix = paddle.triu(iou_matrix, diagonal=1) # label_specific matrix. cate_labels_x = paddle.expand(cate_labels, shape=[n_samples, n_samples]) label_matrix = paddle.cast( (cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])), 'float32') label_matrix = paddle.triu(label_matrix, diagonal=1) # IoU compensation compensate_iou = paddle.max((iou_matrix * label_matrix), axis=0) compensate_iou = paddle.expand(compensate_iou, shape=[n_samples, n_samples]) compensate_iou = paddle.transpose(compensate_iou, [1, 0]) # IoU decay decay_iou = iou_matrix * label_matrix # matrix nms if self.kernel == 'gaussian': decay_matrix = paddle.exp(-1 * self.sigma * (decay_iou**2)) compensate_matrix = paddle.exp(-1 * self.sigma * (compensate_iou**2)) decay_coefficient = paddle.min(decay_matrix / compensate_matrix, axis=0) elif self.kernel == 'linear': decay_matrix = (1 - decay_iou) / (1 - compensate_iou) decay_coefficient = paddle.min(decay_matrix, axis=0) else: raise NotImplementedError # update the score. cate_scores = cate_scores * decay_coefficient y = paddle.zeros(shape=paddle.shape(cate_scores), dtype='float32') keep = paddle.where(cate_scores >= self.update_threshold, cate_scores, y) keep = paddle.nonzero(keep) keep = paddle.squeeze(keep, axis=[1]) # Prevent empty and increase fake data keep = paddle.concat( [keep, paddle.cast(paddle.shape(cate_scores)[0] - 1, 'int64')]) seg_preds = paddle.gather(seg_preds, index=keep) cate_scores = paddle.gather(cate_scores, index=keep) cate_labels = paddle.gather(cate_labels, index=keep) # sort and keep top_k sort_inds = self._sort_score(cate_scores, self.post_nms_top_n) seg_preds = paddle.gather(seg_preds, index=sort_inds) cate_scores = paddle.gather(cate_scores, index=sort_inds) cate_labels = paddle.gather(cate_labels, index=sort_inds) return seg_preds, cate_scores, cate_labels
def compute_ref_grad_updates(self): ref_grad_updates = paddle.gather(paddle.to_tensor(self.dout_np), paddle.to_tensor(self.index_np)) return ref_grad_updates
def forward(self, outputs, targets): """ Performs the matching Args: outputs: This is a dict that contains at least these entries: "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates eg. outputs = {"pred_logits": pred_logits, "pred_boxes": pred_boxes} targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing: "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth objects in the target) containing the class labels "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates eg. targets = [{"labels":labels, "boxes": boxes}, ...,{"labels":labels, "boxes": boxes}] Returns: A list of size batch_size, containing tuples of (index_i, index_j) where: - index_i is the indices of the selected predictions (in order) - index_j is the indices of the corresponding selected targets (in order) For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes) """ bs, num_queries = outputs["pred_logits"].shape[:2] # We flatten to compute the cost matrices in a batch out_prob = F.sigmoid(outputs["pred_logits"].flatten( start_axis=0, stop_axis=1)) out_bbox = outputs["pred_boxes"].flatten(start_axis=0, stop_axis=1) # Also concat the target labels and boxes tgt_ids = paddle.concat([v["labels"] for v in targets]) assert (tgt_ids > -1).all() tgt_bbox = paddle.concat([v["boxes"] for v in targets]) # Compute the classification cost. Contrary to the loss, we don't use the NLL, # but approximate it in 1 - proba[target class]. # The 1 is a constant that doesn't change the matching, it can be ommitted. # Compute the classification cost. alpha = self.focal_loss_alpha gamma = self.focal_loss_gamma neg_cost_class = (1 - alpha) * (out_prob**gamma) * (-( 1 - out_prob + 1e-8).log()) pos_cost_class = alpha * ((1 - out_prob) **gamma) * (-(out_prob + 1e-8).log()) cost_class = paddle.gather( pos_cost_class, tgt_ids, axis=1) - paddle.gather( neg_cost_class, tgt_ids, axis=1) # Compute the L1 cost between boxes image_size_out = paddle.concat( [v["img_whwh"].unsqueeze(0) for v in targets]) image_size_out = image_size_out.unsqueeze(1).tile( [1, num_queries, 1]).flatten( start_axis=0, stop_axis=1) image_size_tgt = paddle.concat([v["img_whwh_tgt"] for v in targets]) out_bbox_ = out_bbox / image_size_out tgt_bbox_ = tgt_bbox / image_size_tgt cost_bbox = F.l1_loss( out_bbox_.unsqueeze(-2), tgt_bbox_, reduction='none').sum(-1) # [batch_size * num_queries, num_tgts] # Compute the giou cost betwen boxes cost_giou = -get_bboxes_giou(out_bbox, tgt_bbox) # Final cost matrix C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou C = C.reshape([bs, num_queries, -1]) sizes = [len(v["boxes"]) for v in targets] indices = [ linear_sum_assignment(c[i].numpy()) for i, c in enumerate(C.split(sizes, -1)) ] return [(paddle.to_tensor( i, dtype="int32"), paddle.to_tensor( j, dtype="int32")) for i, j in indices]
def find_top_rpn_proposals(is_train, rpn_bbox_offsets_list, rpn_cls_prob_list, all_anchors_list, im_info): prev_nms_top_n = config.train_prev_nms_top_n \ if is_train else config.test_prev_nms_top_n post_nms_top_n = config.train_post_nms_top_n \ if is_train else config.test_post_nms_top_n batch_per_gpu = config.train_batch_per_gpu if is_train else 1 nms_threshold = config.rpn_nms_threshold box_min_size = config.rpn_min_box_size bbox_normalize_targets = config.rpn_bbox_normalize_targets bbox_normalize_means = config.bbox_normalize_means bbox_normalize_stds = config.bbox_normalize_stds list_size = len(rpn_bbox_offsets_list) return_rois = [] return_inds = [] for bid in range(batch_per_gpu): batch_proposals_list = [] batch_probs_list = [] for l in range(list_size): # get proposals and probs offsets = rpn_bbox_offsets_list[l][bid] \ .transpose((1, 2, 0)).reshape((-1, 4)) if bbox_normalize_targets: std_opr = torch.to_tensor( config.bbox_normalize_stds[None, :]).cast('float32') mean_opr = torch.to_tensor( config.bbox_normalize_means[None, :]).cast('float32') pred_offsets = pred_offsets * std_opr pred_offsets = pred_offsets + mean_opr all_anchors = all_anchors_list[l] proposals = bbox_transform_inv_opr(all_anchors, offsets) if config.anchor_within_border: proposals = clip_boxes_opr(proposals, im_info[bid, :]) probs = rpn_cls_prob_list[l][bid] \ .transpose((1,2,0)).reshape((-1, 2)) probs = F.softmax(probs, axis=-1)[:, 1] # gather the proposals and probs batch_proposals_list.append(proposals) batch_probs_list.append(probs) batch_proposals = cat(batch_proposals_list, axis=0) batch_probs = cat(batch_probs_list, axis=0) # filter the zero boxes. batch_keep_mask = filter_boxes_opr(batch_proposals, box_min_size * im_info[bid, 2]) batch_keep_mask = torch.nonzero(batch_keep_mask) batch_proposals = torch.gather(batch_proposals, batch_keep_mask) batch_probs = torch.gather(batch_probs, batch_keep_mask) # prev_nms_top_n num_proposals = min(prev_nms_top_n, batch_probs.shape[0]) batch_probs = batch_probs.sort(descending=True) idx = batch_probs.argsort(descending=True) batch_probs = batch_probs[:num_proposals] topk_idx = idx[:num_proposals].flatten() batch_proposals = torch.gather(batch_proposals, topk_idx) #nmss(tt.tensor(batch_proposals.numpy()), tt.tensor(batch_probs.numpy()), nms_threshold).shape # For each image, run a total-level NMS, and choose topk results. keep = nms(bboxes=batch_proposals.unsqueeze(axis=0), scores=batch_probs.unsqueeze(axis=0).unsqueeze(axis=0), score_threshold=nms_threshold, nms_top_k=post_nms_top_n, keep_top_k=post_nms_top_n, normalized=False) #keep = keep[:post_nms_top_n] batch_proposals = keep[:, 2:] #batch_proposals[keep] #batch_probs = batch_probs[keep] # cons the rois batch_inds = torch.ones( (batch_proposals.shape[0], 1)).cast('float32') * bid batch_rois = cat([batch_inds, batch_proposals], axis=1) return_rois.append(batch_rois) if batch_per_gpu == 1: return batch_rois else: concated_rois = cat(return_rois, axis=0) return concated_rois
def forward(self, pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, bg_index, gt_scores=None): r"""This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py The assignment is done in following steps 1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt 2. select top-k bbox as candidates for each gt 3. limit the positive sample's center in gt (because the anchor-free detector only can predict positive distance) 4. if an anchor box is assigned to multiple gts, the one with the highest iou will be selected. Args: pred_scores (Tensor, float32): predicted class probability, shape(B, L, C) pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4) anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes, shape(B, n, 1) gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes, shape(B, n, 4) bg_index (int): background index gt_scores (Tensor|List[Tensor]|None, float32) Score of gt_bboxes, shape(B, n, 1), if None, then it will initialize with one_hot label Returns: assigned_labels (Tensor): (B, L) assigned_bboxes (Tensor): (B, L, 4) assigned_scores (Tensor): (B, L, C) """ assert pred_scores.ndim == pred_bboxes.ndim gt_labels, gt_bboxes, pad_gt_scores, pad_gt_mask = pad_gt( gt_labels, gt_bboxes, gt_scores) assert gt_labels.ndim == gt_bboxes.ndim and \ gt_bboxes.ndim == 3 batch_size, num_anchors, num_classes = pred_scores.shape _, num_max_boxes, _ = gt_bboxes.shape # negative batch if num_max_boxes == 0: assigned_labels = paddle.full([batch_size, num_anchors], bg_index) assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4]) assigned_scores = paddle.zeros( [batch_size, num_anchors, num_classes]) return assigned_labels, assigned_bboxes, assigned_scores # compute iou between gt and pred bbox, [B, n, L] ious = iou_similarity(gt_bboxes, pred_bboxes) # gather pred bboxes class score pred_scores = pred_scores.transpose([0, 2, 1]) batch_ind = paddle.arange( end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1) gt_labels_ind = paddle.stack( [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)], axis=-1) bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind) # compute alignment metrics, [B, n, L] alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow( self.beta) # check the positive sample's center in gt, [B, n, L] is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes) # select topk largest alignment metrics pred bbox as candidates # for each gt, [B, n, L] is_in_topk = gather_topk_anchors( alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask.tile([1, 1, self.topk]).astype(paddle.bool)) # select positive sample, [B, n, L] mask_positive = is_in_topk * is_in_gts * pad_gt_mask # if an anchor box is assigned to multiple gts, # the one with the highest iou will be selected, [B, n, L] mask_positive_sum = mask_positive.sum(axis=-2) if mask_positive_sum.max() > 1: mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile( [1, num_max_boxes, 1]) is_max_iou = compute_max_iou_anchor(ious) mask_positive = paddle.where(mask_multiple_gts, is_max_iou, mask_positive) mask_positive_sum = mask_positive.sum(axis=-2) assigned_gt_index = mask_positive.argmax(axis=-2) assert mask_positive_sum.max() == 1, \ ("one anchor just assign one gt, but received not equals 1. " "Received: %f" % mask_positive_sum.max().item()) # assigned target assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes assigned_labels = paddle.gather( gt_labels.flatten(), assigned_gt_index.flatten(), axis=0) assigned_labels = assigned_labels.reshape([batch_size, num_anchors]) assigned_labels = paddle.where( mask_positive_sum > 0, assigned_labels, paddle.full_like(assigned_labels, bg_index)) assigned_bboxes = paddle.gather( gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0) assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4]) assigned_scores = F.one_hot(assigned_labels, num_classes) # rescale alignment metrics alignment_metrics *= mask_positive max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True) max_ious_per_instance = (ious * mask_positive).max(axis=-1, keepdim=True) alignment_metrics = alignment_metrics / ( max_metrics_per_instance + self.eps) * max_ious_per_instance alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1) assigned_scores = assigned_scores * alignment_metrics return assigned_labels, assigned_bboxes, assigned_scores
def forward(self, feed_dict): g = feed_dict['graph'] x = g.node_feat["feat"] edge_feat = g.edge_feat["feat"] h_list = [self.atom_encoder(x)] ### virtual node embeddings for graphs virtualnode_embedding = self.virtualnode_embedding.expand( [g.num_graph, self.virtualnode_embedding.shape[-1]]) junc_feat = self.junc_embed(feed_dict['junc_graph'].node_feat['feat']) junc_feat = paddle.squeeze(junc_feat, axis=1) for layer in range(self.num_layers): ### add message from virtual nodes to graph nodes h_list[layer] = h_list[layer] + paddle.gather(virtualnode_embedding, g.graph_node_id) ### Message passing among graph nodes h = self.convs[layer](g, h_list[layer], edge_feat) h = self.batch_norms[layer](h) if layer == self.num_layers - 1: #remove relu for the last layer h = F.dropout(h, self.drop_ratio, training = self.training) else: h = F.dropout(F.swish(h), self.drop_ratio, training = self.training) if self.residual: h = h + h_list[layer] # junction tree aggr atom_index = feed_dict['mol2junc'][:, 0] junc_index = feed_dict['mol2junc'][:, 1] gather_h = paddle.gather(h, atom_index) out_dim = gather_h.shape[-1] num = feed_dict['junc_graph'].num_nodes init_h = paddle.zeros(shape=[num, out_dim], dtype=gather_h.dtype) junc_h = paddle.scatter(init_h, junc_index, gather_h, overwrite=False) # node feature of junction tree junc_h = junc_feat + junc_h junc_h = self.junc_convs[layer](feed_dict['junc_graph'], junc_h) junc_h = paddle.gather(junc_h, junc_index) init_h = paddle.zeros(shape=[feed_dict['graph'].num_nodes, out_dim], dtype=h.dtype) sct_h = paddle.scatter(init_h, atom_index, junc_h, overwrite=False) h = h + sct_h h_list.append(h) ### update the virtual nodes if layer < self.num_layers - 1: ### add message from graph nodes to virtual nodes virtualnode_embedding_temp = self.pool(g, h_list[layer]) + virtualnode_embedding ### transform virtual nodes using MLP if self.residual: virtualnode_embedding = virtualnode_embedding + F.dropout(self.mlp_virtualnode_list[layer](virtualnode_embedding_temp), self.drop_ratio, training = self.training) else: virtualnode_embedding = F.dropout(self.mlp_virtualnode_list[layer](virtualnode_embedding_temp), self.drop_ratio, training = self.training) ### Different implementations of Jk-concat if self.JK == "last": node_representation = h_list[-1] elif self.JK == "sum": node_representation = 0 for layer in range(self.num_layers): node_representation += h_list[layer] return node_representation
def do_train(args): set_seed(args) tokenizer_class, eval_name, test_name, = DATASET_INFO[args.dataset] tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path) train_ds, eval_ds, test_ds = load_dataset( args.dataset, splits=["train", eval_name, test_name]) num_classes = len(train_ds.label_list) no_entity_id = num_classes - 1 paddle.set_device(args.device) trainer_num = paddle.distributed.get_world_size() if trainer_num > 1: paddle.distributed.init_parallel_env() rank = paddle.distributed.get_rank() if rank == 0: if os.path.exists(args.model_name_or_path): logger.info("init checkpoint from %s" % args.model_name_or_path) model = ErnieDocForTokenClassification.from_pretrained( args.model_name_or_path, num_classes=num_classes) model_config = model.ernie_doc.config if trainer_num > 1: model = paddle.DataParallel(model) train_ds_iter = SequenceLabelingIterator( train_ds, args.batch_size, tokenizer, trainer_num, trainer_id=rank, memory_len=model_config["memory_len"], max_seq_length=args.max_seq_length, random_seed=args.seed, no_entity_id=no_entity_id) eval_ds_iter = SequenceLabelingIterator( eval_ds, args.batch_size, tokenizer, trainer_num, trainer_id=rank, memory_len=model_config["memory_len"], max_seq_length=args.max_seq_length, mode="eval", no_entity_id=no_entity_id) test_ds_iter = SequenceLabelingIterator( test_ds, args.batch_size, tokenizer, trainer_num, trainer_id=rank, memory_len=model_config["memory_len"], max_seq_length=args.max_seq_length, mode="test", no_entity_id=no_entity_id) train_dataloader = paddle.io.DataLoader.from_generator(capacity=70, return_list=True) train_dataloader.set_batch_generator(train_ds_iter, paddle.get_device()) eval_dataloader = paddle.io.DataLoader.from_generator(capacity=70, return_list=True) eval_dataloader.set_batch_generator(eval_ds_iter, paddle.get_device()) test_dataloader = paddle.io.DataLoader.from_generator(capacity=70, return_list=True) test_dataloader.set_batch_generator(test_ds_iter, paddle.get_device()) num_training_examples = train_ds_iter.get_num_examples() num_training_steps = args.epochs * num_training_examples // args.batch_size // trainer_num logger.info("Device count: %d, trainer_id: %d" % (trainer_num, rank)) logger.info("Num train examples: %d" % num_training_examples) logger.info("Max train steps: %d" % num_training_steps) logger.info("Num warmup steps: %d" % int(num_training_steps * args.warmup_proportion)) lr_scheduler = LinearDecayWithWarmup(args.learning_rate, num_training_steps, args.warmup_proportion) # Generate parameter names needed to perform weight decay. # All bias and LayerNorm parameters are excluded. decay_params = [ p.name for n, p in model.named_parameters() if not any(nd in n for nd in ["bias", "norm"]) ] # Construct dict name_dict = dict() for n, p in model.named_parameters(): name_dict[p.name] = n optimizer = AdamWDL(learning_rate=lr_scheduler, parameters=model.parameters(), weight_decay=args.weight_decay, apply_decay_param_fun=lambda x: x in decay_params, n_layers=model_config["num_hidden_layers"], layerwise_decay=args.layerwise_decay, name_dict=name_dict) criterion = paddle.nn.loss.CrossEntropyLoss() metric = ChunkEvaluator(label_list=train_ds.label_list) global_steps = 0 create_memory = partial(init_memory, args.batch_size, args.memory_length, model_config["hidden_size"], model_config["num_hidden_layers"]) # Copy the memory memories = create_memory() tic_train = time.time() best_f1 = 0 for epoch in range(args.epochs): train_ds_iter.shuffle_sample() train_dataloader.set_batch_generator(train_ds_iter, paddle.get_device()) for step, batch in enumerate(train_dataloader, start=1): global_steps += 1 input_ids, position_ids, token_type_ids, attn_mask, labels, lengths, qids, \ gather_idx, need_cal_loss = batch logits, memories = model(input_ids, memories, token_type_ids, position_ids, attn_mask) logits, labels = list( map(lambda x: paddle.gather(x, gather_idx), [logits, labels])) loss = criterion(logits, labels) * need_cal_loss loss.backward() optimizer.step() lr_scheduler.step() optimizer.clear_grad() if global_steps % args.logging_steps == 0: logger.info( "train: global step %d, epoch: %d, loss: %f, lr: %f, speed: %.2f step/s" % (global_steps, epoch, loss, lr_scheduler.get_lr(), args.logging_steps / (time.time() - tic_train))) tic_train = time.time() if global_steps % args.save_steps == 0: # Evaluate logger.info("Eval:") precision, recall, f1_score = evaluate(model, metric, eval_dataloader, create_memory()) # Save if rank == 0: output_dir = os.path.join(args.output_dir, "model_%d" % (global_steps)) if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model._layers if isinstance( model, paddle.DataParallel) else model model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) if f1_score > best_f1: logger.info("Save best model......") best_f1 = f1_score best_model_dir = os.path.join(args.output_dir, "best_model") if not os.path.exists(best_model_dir): os.makedirs(best_model_dir) model_to_save.save_pretrained(best_model_dir) tokenizer.save_pretrained(best_model_dir) if args.max_steps > 0 and global_steps >= args.max_steps: return logger.info("Final test result:") eval_acc = evaluate(model, metric, test_dataloader, create_memory())
def forward(self, inputs, lengths): """ Decode the highest scoring sequence of tags. Args: inputs (Tensor): The unary emission tensor. Its dtype is float32 and has a shape of `[batch_size, sequence_length, num_tags]`. length (Tensor): The input length tensor storing real length of each sequence for correctness. Its dtype is int64 and has a shape of `[batch_size]`. Returns: tuple: Returns tuple (scores, paths). The `scores` tensor containing the score for the Viterbi sequence. Its dtype is float32 and has a shape of `[batch_size]`. The `paths` tensor containing the highest scoring tag indices. Its dtype is int64 and has a shape of `[batch_size, sequence_length]`. """ input_shape = paddle.shape(inputs) batch_size = input_shape[0] seq_len = input_shape[1] n_label = input_shape[2] inputs_t = inputs.transpose([1, 0, 2]) trans_exp = self.transitions.unsqueeze(0).expand( [batch_size, n_label, n_label]) historys = [] left_length = lengths.clone() max_seq_len = left_length.max() # no need to expand the 'mask' in the following iteration left_length = left_length.unsqueeze(-1).expand([batch_size, n_label]) if self.with_start_stop_tag: alpha = self._initialize_alpha(batch_size) else: alpha = paddle.zeros((batch_size, self.num_tags), dtype='float32') for i, logit in enumerate(inputs_t[:max_seq_len]): # if not with_start_stop_tag, the first label has not antecedent tag. if i == 0 and not self.with_start_stop_tag: alpha = logit left_length = left_length - 1 continue alpha_exp = alpha.unsqueeze(2) # alpha_trn_sum: batch_size, n_labels, n_labels alpha_trn_sum = alpha_exp + trans_exp # alpha_max: batch_size, n_labels # We don't include the emission scores here because the max does not depend on them (we add them in below) alpha_max = alpha_trn_sum.max(1) # If with_start_stop_tag, the first antecedent tag must be START, else the first label has not antecedent tag. # So we can record the path from i=1. if i >= 1: alpha_argmax = alpha_trn_sum.argmax(1) historys.append(alpha_argmax) # Now add the emission scores alpha_nxt = alpha_max + logit mask = paddle.cast((left_length > 0), dtype='float32') alpha = mask * alpha_nxt + (1 - mask) * alpha if self.with_start_stop_tag: mask = paddle.cast((left_length == 1), dtype='float32') alpha += mask * trans_exp[:, self.stop_idx] left_length = left_length - 1 # last_ids: batch_size scores, last_ids = alpha.max(1), alpha.argmax(1) if max_seq_len == 1: return scores, last_ids.unsqueeze(1) # Trace back the best path # historys: seq_len, batch_size, n_labels historys = paddle.stack(historys) left_length = left_length[:, 0] tag_mask = paddle.cast((left_length >= 0), 'int64') last_ids_update = last_ids * tag_mask batch_path = [last_ids_update] batch_offset = self._get_batch_index(batch_size) * n_label historys = paddle.reverse(historys, [0]) for hist in historys: # hist: batch_size, n_labels left_length = left_length + 1 gather_idx = batch_offset + last_ids tag_mask = paddle.cast((left_length > 0), 'int64') last_ids_update = paddle.gather(hist.flatten(), gather_idx) * tag_mask zero_len_mask = paddle.cast((left_length == 0), 'int64') last_ids_update = last_ids_update * ( 1 - zero_len_mask) + last_ids * zero_len_mask batch_path.append(last_ids_update) tag_mask = paddle.cast((left_length >= 0), 'int64') last_ids = last_ids_update + last_ids * (1 - tag_mask) batch_path = paddle.reverse(paddle.stack(batch_path, 1), [1]) return scores, batch_path
def __init__(self, feature, label, rank, world_size, num_classes, margin1=1.0, margin2=0.5, margin3=0.0, scale=64.0, sample_ratio=1.0, embedding_size=512, name=None): super(LargeScaleClassifier, self).__init__() self.num_classes: int = num_classes self.rank: int = rank self.world_size: int = world_size self.sample_ratio: float = sample_ratio self.embedding_size: int = embedding_size self.num_local: int = (num_classes + world_size - 1) // world_size if num_classes % world_size != 0 and rank == world_size - 1: self.num_local = num_classes % self.num_local self.num_sample: int = int(self.sample_ratio * self.num_local) self.margin1 = margin1 self.margin2 = margin2 self.margin3 = margin3 self.logit_scale = scale self.input_dict = OrderedDict() self.input_dict['feature'] = feature self.input_dict['label'] = label self.output_dict = OrderedDict() if name is None: name = 'dist@fc@rank@%05d' % rank stddev = math.sqrt(2.0 / (self.embedding_size + self.num_local)) param_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Normal( std=stddev)) weight_dtype = 'float16' if feature.dtype == paddle.float16 else 'float32' weight = paddle.static.create_parameter( shape=[self.embedding_size, self.num_local], dtype=weight_dtype, name=name, attr=param_attr, is_bias=False) # avoid allreducing gradients for distributed parameters weight.is_distributed = True # avoid broadcasting distributed parameters in startup program paddle.static.default_startup_program().global_block().vars[ weight.name].is_distributed = True if self.world_size > 1: feature_list = [] paddle.distributed.all_gather(feature_list, feature) total_feature = paddle.concat(feature_list, axis=0) label_list = [] paddle.distributed.all_gather(label_list, label) total_label = paddle.concat(label_list, axis=0) total_label.stop_gradient = True else: total_feature = feature total_label = label total_label.stop_gradient = True if self.sample_ratio < 1.0: # partial fc sample process total_label, sampled_class_index = paddle.nn.functional.class_center_sample( total_label, self.num_local, self.num_sample) sampled_class_index.stop_gradient = True weight = paddle.gather(weight, sampled_class_index, axis=1) norm_feature = paddle.fluid.layers.l2_normalize(total_feature, axis=1) norm_weight = paddle.fluid.layers.l2_normalize(weight, axis=0) local_logit = paddle.matmul(norm_feature, norm_weight) loss = paddle.nn.functional.margin_cross_entropy( local_logit, total_label, margin1=self.margin1, margin2=self.margin2, margin3=self.margin3, scale=self.logit_scale, return_softmax=False, reduction=None, ) loss.desc.set_dtype(paddle.fluid.core.VarDesc.VarType.FP32) loss = paddle.mean(loss) self.output_dict['loss'] = loss
def get_loss(self, scores, deltas, targets, rois, bbox_weight): """ scores (Tensor): scores from bbox head outputs deltas (Tensor): deltas from bbox head outputs targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds rois (List[Tensor]): RoIs generated in each batch """ # TODO: better pass args tgt_labels, tgt_bboxes, tgt_gt_inds = targets tgt_labels = paddle.concat(tgt_labels) if len( tgt_labels) > 1 else tgt_labels[0] tgt_labels = tgt_labels.cast('int64') tgt_labels.stop_gradient = True loss_bbox_cls = F.cross_entropy( input=scores, label=tgt_labels, reduction='mean') # bbox reg cls_agnostic_bbox_reg = deltas.shape[1] == 4 fg_inds = paddle.nonzero( paddle.logical_and(tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() cls_name = 'loss_bbox_cls' reg_name = 'loss_bbox_reg' loss_bbox = {} if fg_inds.numel() == 0: loss_bbox[cls_name] = paddle.to_tensor(0., dtype='float32') loss_bbox[reg_name] = paddle.to_tensor(0., dtype='float32') return loss_bbox if cls_agnostic_bbox_reg: reg_delta = paddle.gather(deltas, fg_inds) else: fg_gt_classes = paddle.gather(tgt_labels, fg_inds) reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1) reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1]) reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4) reg_col_inds = reg_col_inds.reshape([-1, 1]) reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1) reg_delta = paddle.gather(deltas, fg_inds) reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4]) rois = paddle.concat(rois) if len(rois) > 1 else rois[0] tgt_bboxes = paddle.concat(tgt_bboxes) if len( tgt_bboxes) > 1 else tgt_bboxes[0] reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight) reg_target = paddle.gather(reg_target, fg_inds) reg_target.stop_gradient = True loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum( ) / tgt_labels.shape[0] loss_bbox[cls_name] = loss_bbox_cls loss_bbox[reg_name] = loss_bbox_reg return loss_bbox
def get_loss(self, cate_preds, kernel_preds, ins_pred, ins_labels, cate_labels, grid_order_list, fg_num): """ Get loss of network of SOLOv2. Args: cate_preds (list): Tensor list of categroy branch output. kernel_preds (list): Tensor list of kernel branch output. ins_pred (list): Tensor list of instance branch output. ins_labels (list): List of instance labels pre batch. cate_labels (list): List of categroy labels pre batch. grid_order_list (list): List of index in pre grid. fg_num (int): Number of positive samples in a mini-batch. Returns: loss_ins (Tensor): The instance loss Tensor of SOLOv2 network. loss_cate (Tensor): The category loss Tensor of SOLOv2 network. """ batch_size = paddle.shape(grid_order_list[0])[0] ins_pred_list = [] for kernel_preds_level, grid_orders_level in zip( kernel_preds, grid_order_list): if grid_orders_level.shape[1] == 0: ins_pred_list.append(None) continue grid_orders_level = paddle.reshape(grid_orders_level, [-1]) reshape_pred = paddle.reshape( kernel_preds_level, shape=(paddle.shape(kernel_preds_level)[0], paddle.shape(kernel_preds_level)[1], -1)) reshape_pred = paddle.transpose(reshape_pred, [0, 2, 1]) reshape_pred = paddle.reshape( reshape_pred, shape=(-1, paddle.shape(reshape_pred)[2])) gathered_pred = paddle.gather(reshape_pred, index=grid_orders_level) gathered_pred = paddle.reshape( gathered_pred, shape=[batch_size, -1, paddle.shape(gathered_pred)[1]]) cur_ins_pred = ins_pred cur_ins_pred = paddle.reshape(cur_ins_pred, shape=(paddle.shape(cur_ins_pred)[0], paddle.shape(cur_ins_pred)[1], -1)) ins_pred_conv = paddle.matmul(gathered_pred, cur_ins_pred) cur_ins_pred = paddle.reshape(ins_pred_conv, shape=(-1, paddle.shape(ins_pred)[-2], paddle.shape(ins_pred)[-1])) ins_pred_list.append(cur_ins_pred) num_ins = paddle.sum(fg_num) cate_preds = [ paddle.reshape(paddle.transpose(cate_pred, [0, 2, 3, 1]), shape=(-1, self.cate_out_channels)) for cate_pred in cate_preds ] flatten_cate_preds = paddle.concat(cate_preds) new_cate_labels = [] for cate_label in cate_labels: new_cate_labels.append(paddle.reshape(cate_label, shape=[-1])) cate_labels = paddle.concat(new_cate_labels) loss_ins, loss_cate = self.solov2_loss(ins_pred_list, ins_labels, flatten_cate_preds, cate_labels, num_ins) return {'loss_ins': loss_ins, 'loss_cate': loss_cate}
def test_tensor_patch_method(self): paddle.disable_static() x_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype) y_np = np.random.uniform(-1, 1, [2, 3]).astype(self.dtype) z_np = np.random.uniform(-1, 1, [6, 9]).astype(self.dtype) x = paddle.to_tensor(x_np) y = paddle.to_tensor(y_np) z = paddle.to_tensor(z_np) a = paddle.to_tensor([[1, 1], [2, 2], [3, 3]]) b = paddle.to_tensor([[1, 1], [2, 2], [3, 3]]) # 1. Unary operation for Tensor self.assertEqual(x.dim(), 2) self.assertEqual(x.ndimension(), 2) self.assertEqual(x.ndim, 2) self.assertEqual(x.size, 6) self.assertEqual(x.numel(), 6) self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy())) self.assertTrue( np.array_equal(x.tanh().numpy(), paddle.tanh(x).numpy())) self.assertTrue( np.array_equal(x.atan().numpy(), paddle.atan(x).numpy())) self.assertTrue(np.array_equal(x.abs().numpy(), paddle.abs(x).numpy())) m = x.abs() self.assertTrue( np.array_equal(m.sqrt().numpy(), paddle.sqrt(m).numpy())) self.assertTrue( np.array_equal(m.rsqrt().numpy(), paddle.rsqrt(m).numpy())) self.assertTrue( np.array_equal(x.ceil().numpy(), paddle.ceil(x).numpy())) self.assertTrue( np.array_equal(x.floor().numpy(), paddle.floor(x).numpy())) self.assertTrue(np.array_equal(x.cos().numpy(), paddle.cos(x).numpy())) self.assertTrue( np.array_equal(x.acos().numpy(), paddle.acos(x).numpy())) self.assertTrue( np.array_equal(x.asin().numpy(), paddle.asin(x).numpy())) self.assertTrue(np.array_equal(x.sin().numpy(), paddle.sin(x).numpy())) self.assertTrue( np.array_equal(x.sinh().numpy(), paddle.sinh(x).numpy())) self.assertTrue( np.array_equal(x.cosh().numpy(), paddle.cosh(x).numpy())) self.assertTrue( np.array_equal(x.round().numpy(), paddle.round(x).numpy())) self.assertTrue( np.array_equal(x.reciprocal().numpy(), paddle.reciprocal(x).numpy())) self.assertTrue( np.array_equal(x.square().numpy(), paddle.square(x).numpy())) self.assertTrue( np.array_equal(x.rank().numpy(), paddle.rank(x).numpy())) self.assertTrue( np.array_equal(x[0].t().numpy(), paddle.t(x[0]).numpy())) self.assertTrue( np.array_equal(x.asinh().numpy(), paddle.asinh(x).numpy())) ### acosh(x) = nan, need to change input t_np = np.random.uniform(1, 2, [2, 3]).astype(self.dtype) t = paddle.to_tensor(t_np) self.assertTrue( np.array_equal(t.acosh().numpy(), paddle.acosh(t).numpy())) self.assertTrue( np.array_equal(x.atanh().numpy(), paddle.atanh(x).numpy())) d = paddle.to_tensor([[1.2285208, 1.3491015, 1.4899898], [1.30058, 1.0688717, 1.4928783], [1.0958099, 1.3724753, 1.8926544]]) d = d.matmul(d.t()) # ROCM not support cholesky if not fluid.core.is_compiled_with_rocm(): self.assertTrue( np.array_equal(d.cholesky().numpy(), paddle.cholesky(d).numpy())) self.assertTrue( np.array_equal(x.is_empty().numpy(), paddle.is_empty(x).numpy())) self.assertTrue( np.array_equal(x.isfinite().numpy(), paddle.isfinite(x).numpy())) self.assertTrue( np.array_equal( x.cast('int32').numpy(), paddle.cast(x, 'int32').numpy())) self.assertTrue( np.array_equal( x.expand([3, 2, 3]).numpy(), paddle.expand(x, [3, 2, 3]).numpy())) self.assertTrue( np.array_equal( x.tile([2, 2]).numpy(), paddle.tile(x, [2, 2]).numpy())) self.assertTrue( np.array_equal(x.flatten().numpy(), paddle.flatten(x).numpy())) index = paddle.to_tensor([0, 1]) self.assertTrue( np.array_equal( x.gather(index).numpy(), paddle.gather(x, index).numpy())) index = paddle.to_tensor([[0, 1], [1, 2]]) self.assertTrue( np.array_equal( x.gather_nd(index).numpy(), paddle.gather_nd(x, index).numpy())) self.assertTrue( np.array_equal( x.reverse([0, 1]).numpy(), paddle.reverse(x, [0, 1]).numpy())) self.assertTrue( np.array_equal( a.reshape([3, 2]).numpy(), paddle.reshape(a, [3, 2]).numpy())) self.assertTrue( np.array_equal( x.slice([0, 1], [0, 0], [1, 2]).numpy(), paddle.slice(x, [0, 1], [0, 0], [1, 2]).numpy())) self.assertTrue( np.array_equal( x.split(2)[0].numpy(), paddle.split(x, 2)[0].numpy())) m = paddle.to_tensor( np.random.uniform(-1, 1, [1, 6, 1, 1]).astype(self.dtype)) self.assertTrue( np.array_equal( m.squeeze([]).numpy(), paddle.squeeze(m, []).numpy())) self.assertTrue( np.array_equal( m.squeeze([1, 2]).numpy(), paddle.squeeze(m, [1, 2]).numpy())) m = paddle.to_tensor([2, 3, 3, 1, 5, 3], 'float32') self.assertTrue( np.array_equal(m.unique()[0].numpy(), paddle.unique(m)[0].numpy())) self.assertTrue( np.array_equal( m.unique(return_counts=True)[1], paddle.unique(m, return_counts=True)[1])) self.assertTrue(np.array_equal(x.flip([0]), paddle.flip(x, [0]))) self.assertTrue(np.array_equal(x.unbind(0), paddle.unbind(x, 0))) self.assertTrue(np.array_equal(x.roll(1), paddle.roll(x, 1))) self.assertTrue(np.array_equal(x.cumsum(1), paddle.cumsum(x, 1))) m = paddle.to_tensor(1) self.assertTrue(np.array_equal(m.increment(), paddle.increment(m))) m = x.abs() self.assertTrue(np.array_equal(m.log(), paddle.log(m))) self.assertTrue(np.array_equal(x.pow(2), paddle.pow(x, 2))) self.assertTrue(np.array_equal(x.reciprocal(), paddle.reciprocal(x))) # 2. Binary operation self.assertTrue( np.array_equal(x.divide(y).numpy(), paddle.divide(x, y).numpy())) self.assertTrue( np.array_equal( x.matmul(y, True, False).numpy(), paddle.matmul(x, y, True, False).numpy())) self.assertTrue( np.array_equal( x.norm(p='fro', axis=[0, 1]).numpy(), paddle.norm(x, p='fro', axis=[0, 1]).numpy())) self.assertTrue( np.array_equal(x.dist(y).numpy(), paddle.dist(x, y).numpy())) self.assertTrue( np.array_equal(x.cross(y).numpy(), paddle.cross(x, y).numpy())) m = x.expand([2, 2, 3]) n = y.expand([2, 2, 3]).transpose([0, 2, 1]) self.assertTrue( np.array_equal(m.bmm(n).numpy(), paddle.bmm(m, n).numpy())) self.assertTrue( np.array_equal( x.histogram(5, -1, 1).numpy(), paddle.histogram(x, 5, -1, 1).numpy())) self.assertTrue( np.array_equal(x.equal(y).numpy(), paddle.equal(x, y).numpy())) self.assertTrue( np.array_equal( x.greater_equal(y).numpy(), paddle.greater_equal(x, y).numpy())) self.assertTrue( np.array_equal( x.greater_than(y).numpy(), paddle.greater_than(x, y).numpy())) self.assertTrue( np.array_equal( x.less_equal(y).numpy(), paddle.less_equal(x, y).numpy())) self.assertTrue( np.array_equal( x.less_than(y).numpy(), paddle.less_than(x, y).numpy())) self.assertTrue( np.array_equal( x.not_equal(y).numpy(), paddle.not_equal(x, y).numpy())) self.assertTrue( np.array_equal( x.equal_all(y).numpy(), paddle.equal_all(x, y).numpy())) self.assertTrue( np.array_equal( x.allclose(y).numpy(), paddle.allclose(x, y).numpy())) m = x.expand([2, 2, 3]) self.assertTrue( np.array_equal( x.expand_as(m).numpy(), paddle.expand_as(x, m).numpy())) index = paddle.to_tensor([2, 1, 0]) self.assertTrue( np.array_equal( a.scatter(index, b).numpy(), paddle.scatter(a, index, b).numpy())) # 3. Bool tensor operation x = paddle.to_tensor([[True, False], [True, False]]) y = paddle.to_tensor([[False, False], [False, True]]) self.assertTrue( np.array_equal( x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) self.assertTrue( np.array_equal( x.logical_not(y).numpy(), paddle.logical_not(x, y).numpy())) self.assertTrue( np.array_equal( x.logical_or(y).numpy(), paddle.logical_or(x, y).numpy())) self.assertTrue( np.array_equal( x.logical_xor(y).numpy(), paddle.logical_xor(x, y).numpy())) self.assertTrue( np.array_equal( x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) a = paddle.to_tensor([[1, 2], [3, 4]]) b = paddle.to_tensor([[4, 3], [2, 1]]) self.assertTrue( np.array_equal( x.where(a, b).numpy(), paddle.where(x, a, b).numpy())) x_np = np.random.randn(3, 6, 9, 7) x = paddle.to_tensor(x_np) x_T = x.T self.assertTrue(x_T.shape, [7, 9, 6, 3]) self.assertTrue(np.array_equal(x_T.numpy(), x_np.T)) self.assertTrue(inspect.ismethod(a.dot)) self.assertTrue(inspect.ismethod(a.logsumexp)) self.assertTrue(inspect.ismethod(a.multiplex)) self.assertTrue(inspect.ismethod(a.prod)) self.assertTrue(inspect.ismethod(a.scale)) self.assertTrue(inspect.ismethod(a.stanh)) self.assertTrue(inspect.ismethod(a.add_n)) self.assertTrue(inspect.ismethod(a.max)) self.assertTrue(inspect.ismethod(a.maximum)) self.assertTrue(inspect.ismethod(a.min)) self.assertTrue(inspect.ismethod(a.minimum)) self.assertTrue(inspect.ismethod(a.floor_divide)) self.assertTrue(inspect.ismethod(a.remainder)) self.assertTrue(inspect.ismethod(a.floor_mod)) self.assertTrue(inspect.ismethod(a.multiply)) self.assertTrue(inspect.ismethod(a.logsumexp)) self.assertTrue(inspect.ismethod(a.inverse)) self.assertTrue(inspect.ismethod(a.log1p)) self.assertTrue(inspect.ismethod(a.erf)) self.assertTrue(inspect.ismethod(a.addmm)) self.assertTrue(inspect.ismethod(a.clip)) self.assertTrue(inspect.ismethod(a.trace)) self.assertTrue(inspect.ismethod(a.kron)) self.assertTrue(inspect.ismethod(a.isinf)) self.assertTrue(inspect.ismethod(a.isnan)) self.assertTrue(inspect.ismethod(a.concat)) self.assertTrue(inspect.ismethod(a.broadcast_to)) self.assertTrue(inspect.ismethod(a.scatter_nd_add)) self.assertTrue(inspect.ismethod(a.scatter_nd)) self.assertTrue(inspect.ismethod(a.shard_index)) self.assertTrue(inspect.ismethod(a.chunk)) self.assertTrue(inspect.ismethod(a.stack)) self.assertTrue(inspect.ismethod(a.strided_slice)) self.assertTrue(inspect.ismethod(a.unsqueeze)) self.assertTrue(inspect.ismethod(a.unstack)) self.assertTrue(inspect.ismethod(a.argmax)) self.assertTrue(inspect.ismethod(a.argmin)) self.assertTrue(inspect.ismethod(a.argsort)) self.assertTrue(inspect.ismethod(a.masked_select)) self.assertTrue(inspect.ismethod(a.topk)) self.assertTrue(inspect.ismethod(a.index_select)) self.assertTrue(inspect.ismethod(a.nonzero)) self.assertTrue(inspect.ismethod(a.sort)) self.assertTrue(inspect.ismethod(a.index_sample)) self.assertTrue(inspect.ismethod(a.mean)) self.assertTrue(inspect.ismethod(a.std)) self.assertTrue(inspect.ismethod(a.numel))
def train(args): # 使用 GPU训练 if paddle.is_compiled_with_cuda(): paddle.set_device("gpu:0") # 创建多进程的游戏环境 envs = MultipleEnvironments(args.game, args.num_processes) # 固定初始化状态 paddle.seed(123) # 创建模型 model = Model(envs.num_states, envs.num_actions) # 加载预训练模型 if args.trained_model is not None: model.load_dict(paddle.load(args.trained_model)) # 创建保存模型的文件夹 if not os.path.isdir(args.saved_path): os.makedirs(args.saved_path) paddle.save(model.state_dict(), "{}/model_{}.pdparams".format(args.saved_path, args.game)) # 为游戏评估单独开一个进程 mp = _mp.get_context("spawn") process = mp.Process(target=eval, args=(args, envs.num_states, envs.num_actions)) process.start() # 创建优化方法 clip_grad = paddle.nn.ClipGradByNorm(clip_norm=0.5) optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=args.lr, grad_clip=clip_grad) # 刚开始给每个进程的游戏执行初始化 [agent_conn.send(("reset", None)) for agent_conn in envs.agent_conns] # 获取游戏初始的界面 curr_states = [agent_conn.recv() for agent_conn in envs.agent_conns] curr_states = paddle.to_tensor(np.concatenate(curr_states, 0), dtype='float32') curr_episode = 0 while True: curr_episode += 1 old_log_policies, actions, values, states, rewards, dones = [], [], [], [], [], [] for _ in range(args.num_local_steps): states.append(curr_states) # 执行预测 logits, value = model(curr_states) # 计算每个动作的概率值 policy = F.softmax(logits) # 根据每个标签的概率随机生成符合概率的标签 old_m = Categorical(policy) action = old_m.sample([1]).squeeze() # 记录预测数据 actions.append(action) values.append(value.squeeze()) # 计算类别的概率的对数 old_log_policy = old_m.log_prob(paddle.unsqueeze(action, axis=1)) old_log_policy = paddle.squeeze(old_log_policy) old_log_policies.append(old_log_policy) # 向各个进程游戏发送动作 [ agent_conn.send(("step", int(act[0]))) for agent_conn, act in zip(envs.agent_conns, action) ] # 将多进程的游戏数据打包 state, reward, done, info = zip( *[agent_conn.recv() for agent_conn in envs.agent_conns]) # 进行数据转换 state = paddle.to_tensor(np.concatenate(state, 0), dtype='float32') # 转换为tensor数据 reward = paddle.to_tensor(reward, dtype='float32') done = paddle.to_tensor(done, dtype='float32') # 记录预测数据 rewards.append(reward) dones.append(done) curr_states = state # 根据上面最后的图像预测 _, next_value, = model(curr_states) next_value = next_value.squeeze() old_log_policies = paddle.concat(old_log_policies).detach().squeeze() actions = paddle.concat(actions).squeeze() values = paddle.concat(values).squeeze().detach() states = paddle.concat(states).squeeze() gae = 0.0 R = [] for value, reward, done in list(zip(values, rewards, dones))[::-1]: gae = gae * args.gamma * args.tau gae = gae + reward + args.gamma * next_value.detach() * ( 1.0 - done) - value.detach() next_value = value R.append(gae + value) R = R[::-1] R = paddle.concat(R).detach() advantages = R - values for i in range(args.num_epochs): indice = paddle.randperm(args.num_local_steps * args.num_processes) for j in range(args.batch_size): batch_indices = indice[int(j * ( args.num_local_steps * args.num_processes / args.batch_size )):int((j + 1) * (args.num_local_steps * args.num_processes / args.batch_size))] # 根据拿到的图像执行预测 logits, value = model(paddle.gather(states, batch_indices)) # 计算每个动作的概率值 new_policy = F.softmax(logits) # 计算类别的概率的对数 new_m = Categorical(new_policy) new_log_policy = new_m.log_prob( paddle.unsqueeze(paddle.gather(actions, batch_indices), axis=1)) new_log_policy = paddle.squeeze(new_log_policy) # 计算actor损失 ratio = paddle.exp( new_log_policy - paddle.gather(old_log_policies, batch_indices)) advantage = paddle.gather(advantages, batch_indices) actor_loss = paddle.clip(ratio, 1.0 - args.epsilon, 1.0 + args.epsilon) * advantage actor_loss = paddle.concat([ paddle.unsqueeze(ratio * advantage, axis=0), paddle.unsqueeze(actor_loss, axis=0) ]) actor_loss = -paddle.mean(paddle.min(actor_loss, axis=0)) # 计算critic损失 critic_loss = F.smooth_l1_loss(paddle.gather(R, batch_indices), value.squeeze()) entropy_loss = paddle.mean(new_m.entropy()) # 计算全部损失 total_loss = actor_loss + critic_loss - args.beta * entropy_loss # 计算梯度 total_loss.backward() optimizer.step() optimizer.clear_grad() paddle.save( model.state_dict(), "{}/model_{}.pdparams".format(args.saved_path, args.game)) print("Episode: {}. Total loss: {:.4f}".format(curr_episode, total_loss.numpy()[0]))
def get_seg_single(self, cate_preds, seg_preds, kernel_preds, featmap_size, im_shape, scale_factor): h = paddle.cast(im_shape[0], 'int32')[0] w = paddle.cast(im_shape[1], 'int32')[0] upsampled_size_out = [featmap_size[0] * 4, featmap_size[1] * 4] y = paddle.zeros(shape=paddle.shape(cate_preds), dtype='float32') inds = paddle.where(cate_preds > self.score_threshold, cate_preds, y) inds = paddle.nonzero(inds) if paddle.shape(inds)[0] == 0: out = paddle.full(shape=[1], fill_value=-1) return out, out, out cate_preds = paddle.reshape(cate_preds, shape=[-1]) # Prevent empty and increase fake data ind_a = paddle.cast(paddle.shape(kernel_preds)[0], 'int64') ind_b = paddle.zeros(shape=[1], dtype='int64') inds_end = paddle.unsqueeze(paddle.concat([ind_a, ind_b]), 0) inds = paddle.concat([inds, inds_end]) kernel_preds_end = paddle.ones(shape=[1, self.kernel_out_channels], dtype='float32') kernel_preds = paddle.concat([kernel_preds, kernel_preds_end]) cate_preds = paddle.concat( [cate_preds, paddle.zeros(shape=[1], dtype='float32')]) # cate_labels & kernel_preds cate_labels = inds[:, 1] kernel_preds = paddle.gather(kernel_preds, index=inds[:, 0]) cate_score_idx = paddle.add(inds[:, 0] * 80, cate_labels) cate_scores = paddle.gather(cate_preds, index=cate_score_idx) size_trans = np.power(self.seg_num_grids, 2) strides = [] for _ind in range(len(self.segm_strides)): strides.append( paddle.full(shape=[int(size_trans[_ind])], fill_value=self.segm_strides[_ind], dtype="int32")) strides = paddle.concat(strides) strides = paddle.gather(strides, index=inds[:, 0]) # mask encoding. kernel_preds = paddle.unsqueeze(kernel_preds, [2, 3]) seg_preds = F.conv2d(seg_preds, kernel_preds) seg_preds = F.sigmoid(paddle.squeeze(seg_preds, [0])) seg_masks = seg_preds > self.mask_threshold seg_masks = paddle.cast(seg_masks, 'float32') sum_masks = paddle.sum(seg_masks, axis=[1, 2]) y = paddle.zeros(shape=paddle.shape(sum_masks), dtype='float32') keep = paddle.where(sum_masks > strides, sum_masks, y) keep = paddle.nonzero(keep) keep = paddle.squeeze(keep, axis=[1]) # Prevent empty and increase fake data keep_other = paddle.concat( [keep, paddle.cast(paddle.shape(sum_masks)[0] - 1, 'int64')]) keep_scores = paddle.concat( [keep, paddle.cast(paddle.shape(sum_masks)[0], 'int64')]) cate_scores_end = paddle.zeros(shape=[1], dtype='float32') cate_scores = paddle.concat([cate_scores, cate_scores_end]) seg_masks = paddle.gather(seg_masks, index=keep_other) seg_preds = paddle.gather(seg_preds, index=keep_other) sum_masks = paddle.gather(sum_masks, index=keep_other) cate_labels = paddle.gather(cate_labels, index=keep_other) cate_scores = paddle.gather(cate_scores, index=keep_scores) # mask scoring. seg_mul = paddle.cast(seg_preds * seg_masks, 'float32') seg_scores = paddle.sum(seg_mul, axis=[1, 2]) / sum_masks cate_scores *= seg_scores # Matrix NMS seg_preds, cate_scores, cate_labels = self.mask_nms( seg_preds, seg_masks, cate_labels, cate_scores, sum_masks=sum_masks) ori_shape = im_shape[:2] / scale_factor + 0.5 ori_shape = paddle.cast(ori_shape, 'int32') seg_preds = F.interpolate(paddle.unsqueeze(seg_preds, 0), size=upsampled_size_out, mode='bilinear', align_corners=False, align_mode=0) seg_preds = paddle.slice(seg_preds, axes=[2, 3], starts=[0, 0], ends=[h, w]) seg_masks = paddle.squeeze(F.interpolate(seg_preds, size=ori_shape[:2], mode='bilinear', align_corners=False, align_mode=0), axis=[0]) # TODO: support bool type seg_masks = paddle.cast(seg_masks > self.mask_threshold, 'int32') return seg_masks, cate_labels, cate_scores