def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3): pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int( paddle.sum( paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5)) .astype('float32'))) if pos_num == 0: selected_mask = training_mask selected_mask = selected_mask.reshape( [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( 'float32') return selected_mask neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32'))) neg_num = int(min(pos_num * ohem_ratio, neg_num)) if neg_num == 0: selected_mask = training_mask selected_mask = selected_mask.view( 1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') return selected_mask neg_score = paddle.masked_select(score, gt_text <= 0.5) neg_score_sorted = paddle.sort(-neg_score) threshold = -neg_score_sorted[neg_num - 1] selected_mask = paddle.logical_and( paddle.logical_or((score >= threshold), (gt_text > 0.5)), (training_mask > 0.5)) selected_mask = selected_mask.reshape( [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( 'float32') return selected_mask
def forward(self, words, wp): words, x = self.embed(words, wp) mask = paddle.logical_and(words != self.pad_index, words != self.eos_index) arc_h = self.mlp_arc_h(x) arc_d = self.mlp_arc_d(x) rel_h = self.mlp_rel_h(x) rel_d = self.mlp_rel_d(x) # Get arc and rel scores from the bilinear attention # Shape: (batch_size, seq_len, seq_len) s_arc = self.arc_attn(arc_d, arc_h) # Shape: (batch_size, seq_len, seq_len, n_rels) s_rel = paddle.transpose(self.rel_attn(rel_d, rel_h), perm=[0, 2, 3, 1]) # Set the scores that exceed the length of each sentence to -1e5 s_arc_mask = paddle.unsqueeze(mask, 1) s_arc = s_arc * s_arc_mask + paddle.scale( paddle.cast(s_arc_mask, 'int32'), scale=1e5, bias=-1, bias_after_scale=False) mask = paddle.cast(paddle.logical_and( paddle.logical_and(words != self.pad_index, words != self.bos_index), words != self.eos_index, ), 'int32') arc_preds = paddle.argmax(s_arc, axis=-1) rel_preds = paddle.argmax(s_rel, axis=-1) return arc_preds, rel_preds, s_arc, mask
def batch_predict( model, data_loader, rel_vocab, word_pad_index, word_bos_index, word_eos_index, ): model.eval() arcs, rels = [], [] for inputs in data_loader(): if args.encoding_model.startswith("ernie") or args.encoding_model == "lstm-pe": words = inputs[0] words, feats = flat_words(words) s_arc, s_rel, words = model(words, feats) else: words, feats = inputs s_arc, s_rel, words = model(words, feats) mask = paddle.logical_and( paddle.logical_and(words != word_pad_index, words != word_bos_index), words != word_eos_index, ) lens = paddle.sum(paddle.cast(mask, "int32"), axis=-1) arc_preds, rel_preds = decode(s_arc, s_rel, mask) arcs.extend(paddle.split(paddle.masked_select(arc_preds, mask), lens.numpy().tolist())) rels.extend(paddle.split(paddle.masked_select(rel_preds, mask), lens.numpy().tolist())) arcs = [[str(s) for s in seq.numpy().tolist()] for seq in arcs] rels = [rel_vocab.to_tokens(seq.numpy().tolist()) for seq in rels] return arcs, rels
def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, allow_low_quality, ignore_thresh, is_crowd=None): iou = bbox_overlaps(gt_boxes, anchors) n_gt = gt_boxes.shape[0] if n_gt == 0 or is_crowd is None: n_gt_crowd = 0 else: n_gt_crowd = paddle.nonzero(is_crowd).shape[0] if iou.shape[0] == 0 or n_gt_crowd == n_gt: # No truth, assign everything to background default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64') default_match_labels = paddle.full((iou.shape[1], ), 0, dtype='int32') return default_matches, default_match_labels # if ignore_thresh > 0, remove anchor if it is closed to # one of the crowded ground-truth if n_gt_crowd > 0: N_a = anchors.shape[0] ones = paddle.ones([N_a]) mask = is_crowd * ones if ignore_thresh > 0: crowd_iou = iou * mask valid = (paddle.sum((crowd_iou > ignore_thresh).cast('int32'), axis=0) > 0).cast('float32') iou = iou * (1 - valid) - valid # ignore the iou between anchor and crowded ground-truth iou = iou * (1 - mask) - mask matched_vals, matches = paddle.topk(iou, k=1, axis=0) match_labels = paddle.full(matches.shape, -1, dtype='int32') # set ignored anchor with iou = -1 neg_cond = paddle.logical_and(matched_vals > -1, matched_vals < negative_overlap) match_labels = paddle.where(neg_cond, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) if allow_low_quality: highest_quality_foreach_gt = iou.max(axis=1, keepdim=True) pred_inds_with_highest_quality = paddle.logical_and( iou > 0, iou == highest_quality_foreach_gt).cast('int32').sum(0, keepdim=True) match_labels = paddle.where(pred_inds_with_highest_quality > 0, paddle.ones_like(match_labels), match_labels) matches = matches.flatten() match_labels = match_labels.flatten() return matches, match_labels
def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds, num_classes, resolution): mask_rois = [] mask_rois_num = [] tgt_masks = [] tgt_classes = [] mask_index = [] tgt_weights = [] for k in range(len(rois)): has_fg = True rois_per_im = rois[k] gt_segms_per_im = gt_segms[k] labels_per_im = labels_int32[k] # select rois labeled with foreground fg_inds = paddle.nonzero( paddle.logical_and(labels_per_im != -1, labels_per_im != num_classes)) # generate fake roi if foreground is empty if fg_inds.numel() == 0: has_fg = False fg_inds = paddle.ones([1], dtype='int32') inds_per_im = sampled_gt_inds[k] inds_per_im = paddle.gather(inds_per_im, fg_inds) gt_segms_per_im = paddle.gather(gt_segms_per_im, inds_per_im) fg_rois = paddle.gather(rois_per_im, fg_inds) fg_classes = paddle.gather(labels_per_im, fg_inds) fg_segms = paddle.gather(gt_segms_per_im, fg_inds) weight = paddle.ones([fg_rois.shape[0]], dtype='float32') if not has_fg: weight = weight - 1 # remove padding gt_polys = fg_segms.numpy() boxes = fg_rois.numpy() new_gt_polys = _strip_pad(gt_polys) results = [ rasterize_polygons_within_box(poly, box, resolution) for poly, box in zip(new_gt_polys, boxes) ] tgt_mask = paddle.stack(results) tgt_mask.stop_gradient = True fg_rois.stop_gradient = True mask_index.append(fg_inds) mask_rois.append(fg_rois) mask_rois_num.append(paddle.shape(fg_rois)[0]) tgt_classes.append(fg_classes) tgt_masks.append(tgt_mask) tgt_weights.append(weight) mask_index = paddle.concat(mask_index) mask_rois_num = paddle.concat(mask_rois_num) tgt_classes = paddle.concat(tgt_classes, axis=0) tgt_masks = paddle.concat(tgt_masks, axis=0) tgt_weights = paddle.concat(tgt_weights, axis=0) return mask_rois, mask_rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() #get cmc choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') Ns = paddle.arange(gallery_img_id.shape[0]) + 1 equal_flag_cumsum = paddle.cumsum(equal_flag, axis=1) Precision_at_k = (paddle.mean(equal_flag_cumsum, axis=0) / Ns).numpy() for k in self.topk: metric_dict["precision@{}".format(k)] = Precision_at_k[k - 1] return metric_dict
def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None): """ body_feats (list[Tensor]): Feature maps from backbone rois (Tensor): RoIs generated from RPN module rois_num (Tensor): The number of RoIs in each image inputs (dict{Tensor}): The ground-truth of image """ targets = [] if self.training: rois, rois_num, targets = self.bbox_assigner( rois, rois_num, inputs) targets_list = [targets] self.assigned_rois = (rois, rois_num) self.assigned_targets = targets pred_bbox = None head_out_list = [] for i in range(self.num_cascade_stages): if i > 0: rois, rois_num = self._get_rois_from_boxes( pred_bbox, inputs['im_shape']) if self.training: rois, rois_num, targets = self.bbox_assigner( rois, rois_num, inputs, i, is_cascade=True) tgt_labels = targets[0] tgt_labels = paddle.concat( tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0] tgt_labels.stop_gradient = True fg_inds = paddle.nonzero( paddle.logical_and( tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() if fg_inds.numel() == 0: targets_list.append(targets_list[-1]) else: targets_list.append(targets) rois_feat = self.roi_extractor(body_feats, rois, rois_num) bbox_feat = self.head(rois_feat, i) scores = self.bbox_score_list[i](bbox_feat) deltas = self.bbox_delta_list[i](bbox_feat) head_out_list.append([scores, deltas, rois]) pred_bbox = self._get_pred_bbox(deltas, rois, self.bbox_weight[i]) if self.training: loss = {} for stage, value in enumerate(zip(head_out_list, targets_list)): (scores, deltas, rois), targets = value loss_stage = self.get_loss(scores, deltas, targets, rois, self.bbox_weight[stage]) for k, v in loss_stage.items(): loss[ k + "_stage{}".format(stage)] = v / self.num_cascade_stages return loss, bbox_feat else: scores, deltas, self.refined_rois = self.get_prediction( head_out_list) return (deltas, scores), self.head
def combine_mask(mask1, mask2): """Combine two mask with multiplication or logical and. Parameters ----------- mask1 : Tensor The first mask. mask2 : Tensor The second mask with broadcastable shape with ``mask1``. Returns -------- Tensor Combined mask. Notes ------ It is mainly used to combine the padding mask and no future mask for transformer decoder. Padding mask is used to mask padding positions of the decoder inputs and no future mask is used to prevent the decoder to see future information. """ if mask1.dtype == paddle.fluid.core.VarDesc.VarType.BOOL: return paddle.logical_and(mask1, mask2) else: return mask1 * mask2
def iou_single(a, b, mask, n_class): valid = mask == 1 valid_flatten = paddle.reshape(valid, (-1, )) valid_flatten = paddle.cast(valid_flatten, dtype="int32") index = where(valid_flatten == 1) if index.shape[0] == 0: return paddle.zeros((1, )) index = paddle.reshape(index, (1, -1)) a_flatten = paddle.reshape(a, (1, -1)) a = paddle.index_sample(a_flatten, index) a = paddle.reshape(a, (-1, )) b_flatten = paddle.reshape(b, (1, -1)) b = paddle.index_sample(b_flatten, index) b = paddle.reshape(b, (-1, )) miou = [] for i in range(n_class): inter = paddle.logical_and(a == i, b == i) inter = paddle.cast(inter, dtype='float32') union = paddle.logical_or(a == i, b == i) union = paddle.cast(union, dtype='float32') miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS)) miou = sum(miou) / len(miou) return miou
def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, allow_low_quality): iou = bbox_overlaps(gt_boxes, anchors) if iou.numel() == 0: default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64') default_match_labels = paddle.full((iou.shape[1], ), -1, dtype='int32') return default_matches, default_match_labels matched_vals, matches = paddle.topk(iou, k=1, axis=0) match_labels = paddle.full(matches.shape, -1, dtype='int32') match_labels = paddle.where(matched_vals < negative_overlap, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) if allow_low_quality: highest_quality_foreach_gt = iou.max(axis=1, keepdim=True) pred_inds_with_highest_quality = paddle.logical_and( iou > 0, iou == highest_quality_foreach_gt).cast('int32').sum(0, keepdim=True) match_labels = paddle.where(pred_inds_with_highest_quality > 0, paddle.ones_like(match_labels), match_labels) matches = matches.flatten() match_labels = match_labels.flatten() return matches, match_labels
def equal_logical_xor(name: str, x, y, z): import paddle paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): node_x = paddle.static.data(name='x', shape=x.shape, dtype='float32') node_y = paddle.static.data(name='y', shape=y.shape, dtype='float32') node_z = paddle.static.data(name='z', shape=z.shape, dtype='float32') bool_x = paddle.equal(node_x, node_y) bool_y = paddle.equal(node_x, node_z) out = paddle.logical_and(bool_x, bool_y) out = paddle.cast(out, x.dtype) cpu = paddle.static.cpu_places(1) exe = paddle.static.Executor(cpu[0]) # startup program will call initializer to initialize the parameters. exe.run(paddle.static.default_startup_program()) outs = exe.run(feed={'x': x, 'y': y, 'z': z}, fetch_list=[out]) saveModel(name, exe, feedkeys=['x', 'y', 'z'], fetchlist=[out], inputs=[x, y, z], outputs=[outs[0]], target_dir=sys.argv[1]) return outs[0]
def subsample_labels(labels, num_samples, fg_fraction, bg_label=0, use_random=True): positive = paddle.nonzero( paddle.logical_and(labels != -1, labels != bg_label)) negative = paddle.nonzero(labels == bg_label) positive = positive.cast('int32').flatten() negative = negative.cast('int32').flatten() fg_num = int(num_samples * fg_fraction) fg_num = min(positive.numel(), fg_num) bg_num = num_samples - fg_num bg_num = min(negative.numel(), bg_num) # randomly select positive and negative examples fg_perm = paddle.randperm(positive.numel(), dtype='int32') fg_perm = paddle.slice(fg_perm, axes=[0], starts=[0], ends=[fg_num]) bg_perm = paddle.randperm(negative.numel(), dtype='int32') bg_perm = paddle.slice(bg_perm, axes=[0], starts=[0], ends=[bg_num]) if use_random: fg_inds = paddle.gather(positive, fg_perm) bg_inds = paddle.gather(negative, bg_perm) else: fg_inds = paddle.slice(positive, axes=[0], starts=[0], ends=[fg_num]) bg_inds = paddle.slice(negative, axes=[0], starts=[0], ends=[bg_num]) return fg_inds, bg_inds
def filter_roi(rois, max_overlap): ws = rois[:, 2] - rois[:, 0] hs = rois[:, 3] - rois[:, 1] valid_mask = paddle.logical_and(ws > 0, hs > 0, max_overlap < 1) keep = paddle.nonzero(valid_mask) if keep.numel() > 0: return rois[keep[:, 1]] return paddle.zeros((1, 4), dtype='float32')
def nonempty_bbox(boxes, min_size=0, return_mask=False): w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] mask = paddle.logical_and(w > min_size, w > min_size) if return_mask: return mask keep = paddle.nonzero(mask).flatten() return keep
def build_model(self): x = paddle.static.data( name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32') x = paddle.less_than(x, x) assign = paddle.assign(self.assign_bool) x = paddle.logical_and(x, assign) out = paddle.cast(x, 'float32') self.fetch_list = [out.name]
def get_loss(self, scores, deltas, targets, rois, bbox_weight): """ scores (Tensor): scores from bbox head outputs deltas (Tensor): deltas from bbox head outputs targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds rois (List[Tensor]): RoIs generated in each batch """ # TODO: better pass args tgt_labels, tgt_bboxes, tgt_gt_inds = targets tgt_labels = paddle.concat( tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0] tgt_labels = tgt_labels.cast('int64') tgt_labels.stop_gradient = True loss_bbox_cls = F.cross_entropy(input=scores, label=tgt_labels, reduction='mean') # bbox reg cls_agnostic_bbox_reg = deltas.shape[1] == 4 fg_inds = paddle.nonzero( paddle.logical_and(tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() cls_name = 'loss_bbox_cls' reg_name = 'loss_bbox_reg' loss_bbox = {} if cls_agnostic_bbox_reg: reg_delta = paddle.gather(deltas, fg_inds) else: fg_gt_classes = paddle.gather(tgt_labels, fg_inds) reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1) reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1]) reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4) reg_col_inds = reg_col_inds.reshape([-1, 1]) reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1) reg_delta = paddle.gather(deltas, fg_inds) reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4]) rois = paddle.concat(rois) if len(rois) > 1 else rois[0] tgt_bboxes = paddle.concat( tgt_bboxes) if len(tgt_bboxes) > 1 else tgt_bboxes[0] reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight) reg_target = paddle.gather(reg_target, fg_inds) reg_target.stop_gradient = True loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum() / tgt_labels.shape[0] loss_bbox[cls_name] = loss_bbox_cls loss_bbox[reg_name] = loss_bbox_reg return loss_bbox
def ohem_single(score, gt_text, training_mask): gt_part = paddle.cast(gt_text > 0.5, dtype='float32') gt_tr_part = paddle.cast(paddle.logical_and(gt_text > 0.5, training_mask <= 0.5), dtype='float32') pos_num = int(paddle.sum(gt_part)) - int(paddle.sum(gt_tr_part)) #pos_num = int(np.sum(gt_text.numpy() > 0.5)) - int(np.sum((gt_text.numpy() > 0.5) & (training_mask.numpy() <= 0.5))) #pos_num = int(paddle.sum(gt_text > 0.5)) - int(paddle.sum((gt_text > 0.5) & (training_mask <= 0.5))) if pos_num == 0: # selected_mask = gt_text.copy() * 0 # may be not good selected_mask = training_mask selected_mask = paddle.reshape( selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1])) selected_mask = paddle.cast(selected_mask, dtype='float32') return selected_mask neg_num = int(np.sum(gt_text.numpy() <= 0.5)) neg_num = int(min(pos_num * 3, neg_num)) if neg_num == 0: selected_mask = training_mask # selected_mask = selected_mask.view(1, selected_mask.shape[0], selected_mask.shape[1]).float() selected_mask = paddle.reshape( selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1])) selected_mask = paddle.cast(selected_mask, dtype='float32') return selected_mask gt_text_flatten = paddle.reshape(gt_text, (-1, )) index = where(gt_text_flatten <= 0.5) index = paddle.reshape(index, (1, -1)) score_flatten = paddle.reshape(score, (1, -1)) neg_score = paddle.index_sample(score_flatten, index) neg_score = paddle.reshape(neg_score, (-1, )) neg_score_sorted = paddle.sort(-neg_score) threshold = -neg_score_sorted[neg_num - 1] item1 = paddle.logical_or(score >= threshold, gt_text > 0.5) selected_mask = paddle.logical_and(item1, training_mask > 0.5) # selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).float() selected_mask = paddle.reshape( selected_mask, (1, selected_mask.shape[0], selected_mask.shape[1])) #selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]) selected_mask = paddle.cast(selected_mask, dtype='float32') return selected_mask
def update(self, arc_preds, rel_preds, arcs, rels, mask): select = paddle.nonzero(mask) arc_mask = paddle.gather_nd(arc_preds == arcs, select) rel_mask = paddle.logical_and( paddle.gather_nd(rel_preds == rels, select), arc_mask) self.total += len(arc_mask) self.correct_arcs += np.sum(arc_mask.numpy()).item() self.correct_rels += np.sum(rel_mask.numpy()).item()
def get_in_gt_and_in_center_info(self, flatten_center_and_stride, gt_bboxes): num_gt = gt_bboxes.shape[0] flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile([1, num_gt]) flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile([1, num_gt]) flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile( [1, num_gt]) flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile( [1, num_gt]) # is prior centers in gt bboxes, shape: [n_center, n_gt] l_ = flatten_x - gt_bboxes[:, 0] t_ = flatten_y - gt_bboxes[:, 1] r_ = gt_bboxes[:, 2] - flatten_x b_ = gt_bboxes[:, 3] - flatten_y deltas = paddle.stack([l_, t_, r_, b_], axis=1) is_in_gts = deltas.min(axis=1) > 0 is_in_gts_all = is_in_gts.sum(axis=1) > 0 # is prior centers in gt centers gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0 gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0 ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y cl_ = flatten_x - ct_bound_l ct_ = flatten_y - ct_bound_t cr_ = ct_bound_r - flatten_x cb_ = ct_bound_b - flatten_y ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1) is_in_cts = ct_deltas.min(axis=1) > 0 is_in_cts_all = is_in_cts.sum(axis=1) > 0 # in any of gts or gt centers, shape: [n_center] is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all, is_in_cts_all) is_in_gts_or_centers_all_inds = paddle.nonzero( is_in_gts_or_centers_all).squeeze(1) # both in gts and gt centers, shape: [num_fg, num_gt] is_in_gts_and_centers = paddle.logical_and( paddle.gather(is_in_gts.cast('int'), is_in_gts_or_centers_all_inds, axis=0).cast('bool'), paddle.gather(is_in_cts.cast('int'), is_in_gts_or_centers_all_inds, axis=0).cast('bool')) return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
def calculate_area(pred, label, num_classes, ignore_index=255): """ Calculate intersect, prediction and label area Args: pred (Tensor): The prediction by model. label (Tensor): The ground truth of image. num_classes (int): The unique number of target classes. ignore_index (int): Specifies a target value that is ignored. Default: 255. Returns: Tensor: The intersection area of prediction and the ground on all class. Tensor: The prediction area on all class. Tensor: The ground truth area on all class """ if len(pred.shape) == 4: pred = paddle.squeeze(pred, axis=1) if len(label.shape) == 4: label = paddle.squeeze(label, axis=1) if not pred.shape == label.shape: raise ValueError('Shape of `pred` and `label should be equal, ' 'but there are {} and {}.'.format( pred.shape, label.shape)) pred_area = [] label_area = [] intersect_area = [] mask = label != ignore_index for i in range(num_classes): pred_i = paddle.logical_and(pred == i, mask) label_i = label == i intersect_i = paddle.logical_and(pred_i, label_i) pred_area.append(paddle.sum(paddle.cast(pred_i, "int32"))) label_area.append(paddle.sum(paddle.cast(label_i, "int32"))) intersect_area.append(paddle.sum(paddle.cast(intersect_i, "int32"))) pred_area = paddle.concat(pred_area) label_area = paddle.concat(label_area) intersect_area = paddle.concat(intersect_area) return intersect_area, pred_area, label_area
def pem_reg_loss_func(self, pred_score, gt_iou_map, mask): gt_iou_map = paddle.multiply(gt_iou_map, mask) u_hmask = paddle.cast(x=gt_iou_map > 0.7, dtype=self.datatype) u_mmask = paddle.logical_and(gt_iou_map <= 0.7, gt_iou_map > 0.3) u_mmask = paddle.cast(x=u_mmask, dtype=self.datatype) u_lmask = paddle.logical_and(gt_iou_map <= 0.3, gt_iou_map >= 0.) u_lmask = paddle.cast(x=u_lmask, dtype=self.datatype) u_lmask = paddle.multiply(u_lmask, mask) num_h = paddle.cast(paddle.sum(u_hmask), dtype=self.datatype) num_m = paddle.cast(paddle.sum(u_mmask), dtype=self.datatype) num_l = paddle.cast(paddle.sum(u_lmask), dtype=self.datatype) r_m = num_h / num_m u_smmask = paddle.uniform( shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]], dtype=self.datatype, min=0.0, max=1.0) u_smmask = paddle.multiply(u_mmask, u_smmask) u_smmask = paddle.cast(x=(u_smmask > (1. - r_m)), dtype=self.datatype) r_l = num_h / num_l u_slmask = paddle.uniform( shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]], dtype=self.datatype, min=0.0, max=1.0) u_slmask = paddle.multiply(u_lmask, u_slmask) u_slmask = paddle.cast(x=(u_slmask > (1. - r_l)), dtype=self.datatype) weights = u_hmask + u_smmask + u_slmask weights.stop_gradient = True loss = F.square_error_cost(pred_score, gt_iou_map) loss = paddle.multiply(loss, weights) loss = 0.5 * paddle.sum(loss) / paddle.sum(weights) return loss
def batch_evaluate( model, metric, criterion, data_loader, word_pad_index, word_bos_index, word_eos_index, ): model.eval() metric.reset() losses = [] for batch in data_loader(): if args.encoding_model.startswith( "ernie") or args.encoding_model == "lstm-pe": words, arcs, rels = batch words, feats = flat_words(words) s_arc, s_rel, words = model(words, feats) else: words, feats, arcs, rels = batch s_arc, s_rel, words = model(words, feats) mask = paddle.logical_and( paddle.logical_and(words != word_pad_index, words != word_bos_index), words != word_eos_index, ) loss = criterion(s_arc, s_rel, arcs, rels, mask) losses.append(loss.numpy().item()) arc_preds, rel_preds = decode(s_arc, s_rel, mask) metric.update(arc_preds, rel_preds, arcs, rels, mask) uas, las = metric.accumulate() total_loss = np.mean(losses) model.train() metric.reset() return total_loss, uas, las
def greedy_search(self, input_ids, logits_processors, max_length, pad_token_id, eos_token_id, **model_kwargs): batch_size, cur_len = input_ids.shape origin_len = cur_len unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool') scores = paddle.full([batch_size, 1], 0.0, dtype=paddle.get_default_dtype()) while cur_len < max_length: # prepare model inputs & get model output model_inputs = self.prepare_inputs_for_generation( input_ids, **model_kwargs) outputs = self(**model_inputs) logits = outputs[0] if isinstance(outputs, tuple) else outputs # [batch_size, vocab_size] logits = logits[:, -1, :] # pre-process distribution logits = self.adjust_logits_during_generation(logits) logits = logits_processors(input_ids, logits) # greedy probs = F.softmax(logits) probs = paddle.log(probs) next_tokens = paddle.argmax(probs, axis=-1).unsqueeze(-1) next_scores = paddle.index_sample(probs, next_tokens) if eos_token_id is not None: next_tokens = paddle.where( unfinished_flag, next_tokens, paddle.full_like(next_tokens, pad_token_id)) scores = self.update_scores_for_generation(scores, next_scores, cur_len - origin_len, unfinished_flag) cur_len += 1 input_ids = paddle.concat([input_ids, next_tokens], axis=1) if eos_token_id is not None: unfinished_flag = paddle.logical_and( unfinished_flag, next_tokens != eos_token_id) # Stop when there is a </s> in all sentences if not paddle.any(unfinished_flag): break model_kwargs = self.update_model_kwargs_for_generation( outputs, model_kwargs) return input_ids[:, origin_len:], scores
def quality_focal_loss(pred, target, beta=2.0, use_sigmoid=True): """ Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection <https://arxiv.org/abs/2006.04388>`_. Args: pred (Tensor): Predicted joint representation of classification and quality (IoU) estimation with shape (N, C), C is the number of classes. target (tuple([Tensor])): Target category label with shape (N,) and target quality label with shape (N,). beta (float): The beta parameter for calculating the modulating factor. Defaults to 2.0. Returns: Tensor: Loss tensor with shape (N,). """ assert len(target) == 2, """target for QFL must be a tuple of two elements, including category label and quality label, respectively""" # label denotes the category id, score denotes the quality score label, score = target if use_sigmoid: func = F.binary_cross_entropy_with_logits else: func = F.binary_cross_entropy # negatives are supervised by 0 quality score pred_sigmoid = F.sigmoid(pred) if use_sigmoid else pred scale_factor = pred_sigmoid zerolabel = paddle.zeros(pred.shape, dtype='float32') loss = func(pred, zerolabel, reduction='none') * scale_factor.pow(beta) # FG cat_id: [0, num_classes -1], BG cat_id: num_classes bg_class_ind = pred.shape[1] pos = paddle.logical_and((label >= 0), (label < bg_class_ind)).nonzero().squeeze(1) if pos.shape[0] == 0: return loss.sum(axis=1) pos_label = paddle.gather(label, pos, axis=0) pos_mask = np.zeros(pred.shape, dtype=np.int32) pos_mask[pos.numpy(), pos_label.numpy()] = 1 pos_mask = paddle.to_tensor(pos_mask, dtype='bool') score = score.unsqueeze(-1).expand([-1, pred.shape[1]]).cast('float32') # positives are supervised by bbox quality (IoU) score scale_factor_new = score - pred_sigmoid loss_pos = func(pred, score, reduction='none') * scale_factor_new.abs().pow(beta) loss = loss * paddle.logical_not(pos_mask) + loss_pos * pos_mask loss = loss.sum(axis=1) return loss
def filter_boxes(self, boxes, im_w, im_h, im_s, min_size): min_size = max(min_size, 1.0) xmin, ymin, xmax, ymax = paddle.tensor.split(boxes, axis=1, num_or_sections=4) x_ctr = (xmax + xmin) / 2 + 0.5 y_ctr = (ymax + ymin) / 2 + 0.5 ws = (xmax - xmin) / im_s + 1 hs = (ymax - ymin) / im_s + 1 min_size = np.asarray([min_size], dtype='float32') min_size = paddle.assign(min_size) valid_flag_ws = paddle.greater_equal(ws, min_size) valid_flag_hs = paddle.greater_equal(hs, min_size) valid_flag_x = paddle.less_equal(x_ctr, im_w) valid_flag_y = paddle.less_equal(y_ctr, im_h) valid_flag = paddle.logical_and(valid_flag_ws, valid_flag_hs) valid_flag = paddle.logical_and(valid_flag, valid_flag_x) valid_flag = paddle.logical_and(valid_flag, valid_flag_y) valid_flag = paddle.squeeze(valid_flag, axis=1) valid_inds = paddle.nonzero(valid_flag) return valid_inds
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') num_rel = paddle.sum(equal_flag, axis=1) num_rel = paddle.greater_than(num_rel, paddle.to_tensor(0.)) num_rel_index = paddle.nonzero(num_rel.astype("int")) num_rel_index = paddle.reshape(num_rel_index, [num_rel_index.shape[0]]) equal_flag = paddle.index_select(equal_flag, num_rel_index, axis=0) acc_sum = paddle.cumsum(equal_flag, axis=1) div = paddle.arange(acc_sum.shape[1]).astype("float32") + 1 precision = paddle.divide(acc_sum, div) #calc map precision_mask = paddle.multiply(equal_flag, precision) ap = paddle.sum(precision_mask, axis=1) / paddle.sum(equal_flag, axis=1) metric_dict["mAP"] = paddle.mean(ap).numpy()[0] return metric_dict
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') num_rel = paddle.sum(equal_flag, axis=1) num_rel = paddle.greater_than(num_rel, paddle.to_tensor(0.)) num_rel_index = paddle.nonzero(num_rel.astype("int")) num_rel_index = paddle.reshape(num_rel_index, [num_rel_index.shape[0]]) equal_flag = paddle.index_select(equal_flag, num_rel_index, axis=0) #do accumulative sum div = paddle.arange(equal_flag.shape[1]).astype("float32") + 2 minus = paddle.divide(equal_flag, div) auxilary = paddle.subtract(equal_flag, minus) hard_index = paddle.argmax(auxilary, axis=1).astype("float32") all_INP = paddle.divide(paddle.sum(equal_flag, axis=1), hard_index) mINP = paddle.mean(all_INP) metric_dict["mINP"] = mINP.numpy()[0] return metric_dict
def check_points_inside_bboxes(points, bboxes, center_radius_tensor=None, eps=1e-9): r""" Args: points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None. eps (float): Default: 1e-9 Returns: is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected """ points = points.unsqueeze([0, 1]) x, y = points.chunk(2, axis=-1) xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1) # check whether `points` is in `bboxes` l = x - xmin t = y - ymin r = xmax - x b = ymax - y delta_ltrb = paddle.concat([l, t, r, b], axis=-1) is_in_bboxes = (delta_ltrb.min(axis=-1) > eps) if center_radius_tensor is not None: # check whether `points` is in `center_radius` center_radius_tensor = center_radius_tensor.unsqueeze([0, 1]) cx = (xmin + xmax) * 0.5 cy = (ymin + ymax) * 0.5 l = x - (cx - center_radius_tensor) t = y - (cy - center_radius_tensor) r = (cx + center_radius_tensor) - x b = (cy + center_radius_tensor) - y delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1) is_in_center = (delta_ltrb_c.min(axis=-1) > eps) return (paddle.logical_and(is_in_bboxes, is_in_center), paddle.logical_or(is_in_bboxes, is_in_center)) return is_in_bboxes.astype(bboxes.dtype)
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() #get cmc choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') real_query_num = paddle.sum(equal_flag, axis=1) real_query_num = paddle.sum( paddle.greater_than(real_query_num, paddle.to_tensor(0.)).astype("float32")) acc_sum = paddle.cumsum(equal_flag, axis=1) mask = paddle.greater_than(acc_sum, paddle.to_tensor(0.)).astype("float32") all_cmc = (paddle.sum(mask, axis=0) / real_query_num).numpy() for k in self.topk: metric_dict["recall{}".format(k)] = all_cmc[k - 1] return metric_dict
def sample(self, input_ids, logits_processors, max_length, pad_token_id, eos_token_id, top_k=None, top_p=None, temperature=None, min_tokens_to_keep=1, **model_kwargs): def TopKProcess(probs, top_k, min_tokens_to_keep): top_k = min(max(top_k, min_tokens_to_keep), probs.shape[-1]) # Remove all tokens with a probability less than the last token of the top-k topk_probs, _ = paddle.topk(probs, k=top_k) probs = paddle.where(probs >= topk_probs[:, -1:], probs, paddle.full_like(probs, 0.0)) return probs def TopPProcess(probs, top_p, min_tokens_to_keep): sorted_probs = paddle.sort(probs, descending=True) sorted_indices = paddle.argsort(probs, descending=True) cumulative_probs = paddle.cumsum(sorted_probs, axis=-1) # Remove tokens with cumulative probs above the top_p, But keep at # least min_tokens_to_keep tokens sorted_indices_to_remove = cumulative_probs > top_p if min_tokens_to_keep > 1: # Set 'min_tokens_to_keep - 1' because the first token is kept sorted_indices_to_remove[:, :min_tokens_to_keep - 1] = 0 # Keep the first token sorted_indices_to_remove = paddle.cast(sorted_indices_to_remove, dtype='int64') sorted_indices_to_remove[:, 1:] = ( sorted_indices_to_remove[:, :-1].clone()) sorted_indices_to_remove[:, 0] = 0 # Scatter sorted tensors to original indexing sorted_indices = sorted_indices + paddle.arange( probs.shape[0]).unsqueeze(-1) * probs.shape[-1] condition = paddle.scatter(sorted_indices_to_remove.flatten(), sorted_indices.flatten(), sorted_indices_to_remove.flatten()) condition = paddle.cast(condition, 'bool').reshape(probs.shape) probs = paddle.where(condition, paddle.full_like(probs, 0.0), probs) return probs batch_size, cur_len = input_ids.shape origin_len = cur_len unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool') scores = paddle.full([batch_size, 1], 0.0, dtype=paddle.get_default_dtype()) while cur_len < max_length: # prepare model inputs & get model output model_inputs = self.prepare_inputs_for_generation( input_ids, **model_kwargs) outputs = self(**model_inputs) logits = outputs[0] if isinstance(outputs, tuple) else outputs # [batch_size, vocab_size] logits = logits[:, -1, :] # pre-process distribution logits = self.adjust_logits_during_generation(logits) logits = logits_processors(input_ids, logits) # sample origin_probs = F.softmax(logits) origin_probs = paddle.log(origin_probs) if temperature is not None and temperature != 1.0: logits = logits / temperature probs = F.softmax(logits) if top_k is not None and top_k != 0: probs = TopKProcess(probs, top_k, min_tokens_to_keep) if top_p is not None and top_p < 1.0: probs = TopPProcess(probs, top_p, min_tokens_to_keep) next_tokens = paddle.multinomial(probs) next_scores = paddle.index_sample(origin_probs, next_tokens) if eos_token_id is not None: next_tokens = paddle.where( unfinished_flag, next_tokens, paddle.full_like(next_tokens, pad_token_id)) scores = self.update_scores_for_generation(scores, next_scores, cur_len - origin_len, unfinished_flag) cur_len += 1 input_ids = paddle.concat([input_ids, next_tokens], axis=1) if eos_token_id is not None: unfinished_flag = paddle.logical_and( unfinished_flag, next_tokens != eos_token_id) # Stop when there is a </s> in all sentences if not paddle.any(unfinished_flag): break model_kwargs = self.update_model_kwargs_for_generation( outputs, model_kwargs) return input_ids[:, origin_len:], scores