def rpn_anchor_target(anchors, gt_boxes, rpn_batch_size_per_im, rpn_positive_overlap, rpn_negative_overlap, rpn_fg_fraction, use_random=True, batch_size=1, weights=[1., 1., 1., 1.]): tgt_labels = [] tgt_bboxes = [] tgt_deltas = [] for i in range(batch_size): gt_bbox = gt_boxes[i] # Step1: match anchor and gt_bbox matches, match_labels, matched_vals = label_box( anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True) # Step2: sample anchor fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im, rpn_fg_fraction, 0, use_random) # Fill with the ignore label (-1), then set positive and negative labels labels = paddle.full(match_labels.shape, -1, dtype='int32') labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds)) labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds)) # Step3: make output matched_gt_boxes = paddle.gather(gt_bbox, matches) tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights) labels.stop_gradient = True matched_gt_boxes.stop_gradient = True tgt_delta.stop_gradient = True tgt_labels.append(labels) tgt_bboxes.append(matched_gt_boxes) tgt_deltas.append(tgt_delta) return tgt_labels, tgt_bboxes, tgt_deltas
def batch_distance2bbox(points, distance, max_shapes=None): """Decode distance prediction to bounding box for batch. Args: points (Tensor): [B, ..., 2], "xy" format distance (Tensor): [B, ..., 4], "ltrb" format max_shapes (Tensor): [B, 2], "h,w" format, Shape of the image. Returns: Tensor: Decoded bboxes, "x1y1x2y2" format. """ lt, rb = paddle.split(distance, 2, -1) # while tensor add parameters, parameters should be better placed on the second place x1y1 = -lt + points x2y2 = rb + points out_bbox = paddle.concat([x1y1, x2y2], -1) if max_shapes is not None: max_shapes = max_shapes.flip(-1).tile([1, 2]) delta_dim = out_bbox.ndim - max_shapes.ndim for _ in range(delta_dim): max_shapes.unsqueeze_(1) out_bbox = paddle.where(out_bbox < max_shapes, out_bbox, max_shapes) out_bbox = paddle.where(out_bbox > 0, out_bbox, paddle.zeros_like(out_bbox)) return out_bbox
def aggr(batch, y, nxt_y, y0, alpha): pred = graph.predecessor(batch.numpy()) self_label = paddle.to_tensor(y[batch.numpy()]) self_label0 = paddle.to_tensor(y0[batch.numpy()]) pred_id = [] for n, p in enumerate(pred): if len(p) > 0: pred_id.append(np.ones(len(p)) * n) pred_cat = np.concatenate(pred) pred_id_cat = paddle.to_tensor(np.concatenate(pred_id), dtype="int64") pred_cat_pd = paddle.to_tensor(pred_cat) pred_label = paddle.to_tensor(y[pred_cat]) pred_norm = paddle.gather(indegree, pred_cat_pd) self_norm = paddle.gather(indegree, paddle.to_tensor(batch, dtype="int64")) others = paddle.zeros_like(self_label) others = paddle.scatter(others, pred_id_cat, pred_label) others = (1 - alpha) * (others + self_label ) * self_norm + alpha * self_label0 others = others / paddle.sum(others, -1, keepdim=True) nxt_y[batch] = others.numpy()
def __call__(self, bboxes, scores): attrs = { 'background_label': -1, 'score_threshold': self.score_threshold, 'nms_top_k': -1, 'nms_threshold': self.nms_threshold, 'keep_top_k': self.keep_top_k, 'nms_eta': 1.0, 'normalized': False, 'return_index': True } output, nms_rois_num, index = multiclass_nms(bboxes, scores, **attrs) clas = paddle.slice(output, axes=[1], starts=[0], ends=[1]) clas = paddle.cast(clas, dtype="int64") index = paddle.cast(index, dtype="int64") if bboxes.shape[0] == 1: batch = paddle.zeros_like(clas, dtype="int64") else: bboxes_count = bboxes.shape[1] batch = paddle.divide(index, bboxes_count) index = paddle.mod(index, bboxes_count) res = paddle.concat([batch, clas, index], axis=1) return res
def forward(self, inputs): querys, keys, sess_length = inputs #assert(type(sess_length) == paddle.Tensor), f"At Attention SequencePoolingLayer expected inputs[2]'s type is paddle.Tensor, but got {type(sess_length)}" keys_length = keys.shape[1] key_masks = nn.functional.sequence_mask(sess_length, keys_length) querys = paddle.tile(querys.unsqueeze(1), [1, keys_length, 1]) att_input = paddle.concat([querys, keys, querys - keys, querys * keys], axis=-1) for i, layer in enumerate(self.layers): att_input = layer(att_input) #att_input = self.bn_layer[i](att_input) # BatchNomalization att_input = self.activation(att_input) # activation att_score = self.dnn(att_input) # (N, 50, 1) att_score = paddle.transpose(att_score, [0, 2, 1]) # (N, 1, 50) if self.weight_normalization: paddings = paddle.ones_like(att_score) * (-2**32 + 1) else: paddings = paddle.zeros_like(att_score) att_score = paddle.where( key_masks.unsqueeze(1) == 1, att_score, paddings ) # key_masks.unsqueeze in order to keep shape same as att_score att_score = self.soft(att_score) out = paddle.matmul(att_score, keys) return out
def drop_head(x, drop_n_heads, training=True): """Drop n context vectors from multiple ones. Parameters ---------- x : Tensor [shape=(batch_size, num_heads, time_steps, channels)] The input, multiple context vectors. drop_n_heads : int [0<= drop_n_heads <= num_heads] Number of vectors to drop. training : bool A flag indicating whether it is in training. If `False`, no dropout is applied. Returns ------- Tensor The output. """ if not training or (drop_n_heads == 0): return x batch_size, num_heads, _, _ = x.shape # drop all heads if num_heads == drop_n_heads: return paddle.zeros_like(x) mask = np.ones([batch_size, num_heads]) mask[:, :drop_n_heads] = 0 for subarray in mask: np.random.shuffle(subarray) scale = float(num_heads) / (num_heads - drop_n_heads) mask = scale * np.reshape(mask, [batch_size, num_heads, 1, 1]) out = x * paddle.to_tensor(mask) return out
def forward(self, src, dsts): # src [b, 1] # dsts [b, 1+neg] src_embed = self.emb(src) dsts_embed = self.emb(dsts) pos_embed = dsts_embed[:, 0:1] neg_embed = dsts_embed[:, 1:] pos_logits = paddle.matmul(src_embed, pos_embed, transpose_y=True) # [batch_size, 1, 1] neg_logits = paddle.matmul( src_embed, neg_embed, transpose_y=True) # [batch_size, 1, neg_num] ones_label = paddle.ones_like(pos_logits) pos_loss = self.loss(pos_logits, ones_label) zeros_label = paddle.zeros_like(neg_logits) neg_loss = self.loss(neg_logits, zeros_label) loss = (pos_loss + neg_loss) / 2 return loss
def post_process(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Args: bboxes(Tensor): bboxes [N, 8] bbox_num(Tensor): bbox_num im_shape(Tensor): [1 2] scale_factor(Tensor): [1 2] Returns: bbox_pred(Tensor): The output is the prediction with shape [N, 8] including labels, scores and bboxes. The size of bboxes are corresponding to the original image. """ origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_h = origin_shape[0] origin_w = origin_shape[1] bboxes[:, 0::2] = bboxes[:, 0::2] / scale_factor[0] bboxes[:, 1::2] = bboxes[:, 1::2] / scale_factor[1] zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros) y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros) x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros) y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros) x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros) y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros) x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros) y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros) bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1) bboxes = (bbox, bbox_num) return bboxes
def forward(self, input_ids, token_type_ids=None, position_ids=None): if position_ids is None: # maybe need use shape op to unify static graph and dynamic graph ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) position_ids = seq_length - ones position_ids.stop_gradient = True input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings if self.type_vocab_size != 0: if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings += token_type_embeddings elif token_type_ids is not None: logger.warning( "There is no need to pass the token type ids to SKEP based on RoBERTa model." "The input token type ids will be ignored.") embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, input_ids, token_type_ids=None, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) content_len = paddle.shape(input_ids)[1] - self.cls_num position_ids = paddle.concat([ paddle.zeros(shape=[self.cls_num], dtype="int64"), paddle.linspace(1, content_len, content_len, dtype="int64") ]) position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = input_embedings + token_type_embeddings + position_embeddings embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, features, num_voxels, coors): # Find distance of x, y, and z from cluster center points_mean = features[:, :, :3].sum( axis=1, keepdim=True) / num_voxels.astype(features.dtype).reshape( (-1, 1, 1)) f_cluster = features[:, :, :3] - points_mean # Find distance of x, y, and z from pillar center f_center = paddle.zeros_like(features[:, :, :2]) f_center[:, :, 0] = features[:, :, 0] - ( coors[:, 3].astype("float32").unsqueeze(1) * self.vx + self.x_offset) f_center[:, :, 1] = features[:, :, 1] - ( coors[:, 2].astype("float32").unsqueeze(1) * self.vy + self.y_offset) # Combine together feature decorations features_ls = [features, f_cluster, f_center] if self._with_distance: points_dist = paddle.norm(features[:, :, :3], 2, 2, keepdim=True) features_ls.append(points_dist) features = paddle.concat(features_ls, axis=-1) # The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that # empty pillars remain set to zeros. voxel_count = features.shape[1] mask = get_paddings_indicator(num_voxels, voxel_count, axis=0) mask = paddle.unsqueeze(mask, -1).astype(features.dtype) features *= mask # Forward pass through PFNLayers for pfn in self.pfn_layers: features = pfn(features) return features.squeeze()
def forward(self, inputs): """ forward """ x = paddle.zeros_like(inputs, dtype=None, name=None) return x
def _criterion(loss_func, logit, is_real): if is_real: target = paddle.ones_like(logit) else: target = paddle.zeros_like(logit) return loss_func(logit, target)
for epoch in range(EPOCHS): for data in tqdm(data_loader_train): real_A, real_B = data optimizerD.clear_grad() # D([real_A, real_B]) real_AB = paddle.concat((real_A, real_B), 1) d_real_predict = discriminator(real_AB) d_real_loss = bce_loss(d_real_predict, paddle.ones_like(d_real_predict)) # D([real_A, fake_B]) fake_B = generator(real_A).detach() fake_AB = paddle.concat((real_A, fake_B), 1) d_fake_predict = discriminator(fake_AB) d_fake_loss = bce_loss(d_fake_predict, paddle.zeros_like(d_fake_predict)) # train D d_loss = (d_real_loss + d_fake_loss) / 2. d_loss.backward() optimizerD.step() optimizerG.clear_grad() # D([real_A, fake_B]) fake_B = generator(real_A) fake_AB = paddle.concat((real_A, fake_B), 1) g_fake_predict = discriminator(fake_AB) g_bce_loss = bce_loss(g_fake_predict, paddle.ones_like(g_fake_predict)) g_l1_loss = l1_loss(fake_B, real_B) * 100. g_loss = g_bce_loss + g_l1_loss *1.
def int_seghead(self, ref_frame_embedding=None, ref_scribble_label=None, prev_round_label=None, normalize_nearest_neighbor_distances=True, global_map_tmp_dic=None, local_map_dics=None, interaction_num=None, seq_names=None, gt_ids=None, k_nearest_neighbors=1, frame_num=None, first_inter=True): dic_tmp = {} bs, c, h, w = ref_frame_embedding.shape scale_ref_scribble_label = paddle.nn.functional.interpolate( float_(ref_scribble_label), size=(h, w), mode='nearest') scale_ref_scribble_label = int_(scale_ref_scribble_label) if not first_inter: scale_prev_round_label = paddle.nn.functional.interpolate( float_(prev_round_label), size=(h, w), mode='nearest') scale_prev_round_label = int_(scale_prev_round_label) n_chunks = 500 for n in range(bs): gt_id = paddle.arange(0, gt_ids[n] + 1) gt_id = int_(gt_id) seq_ref_frame_embedding = ref_frame_embedding[n] ########################Local dist map seq_ref_frame_embedding = paddle.transpose(seq_ref_frame_embedding, [1, 2, 0]) seq_ref_scribble_label = paddle.transpose( scale_ref_scribble_label[n], [1, 2, 0]) nn_features_n = local_previous_frame_nearest_neighbor_features_per_object( prev_frame_embedding=seq_ref_frame_embedding, query_embedding=seq_ref_frame_embedding, prev_frame_labels=seq_ref_scribble_label, gt_ids=gt_id, max_distance=self.cfg['model_max_local_distance']) ####### ######################Global map update if seq_names[n] not in global_map_tmp_dic: global_map_tmp_dic[seq_names[n]] = paddle.ones_like( nn_features_n).tile([1000, 1, 1, 1, 1]) nn_features_n_ = paddle.where( nn_features_n <= global_map_tmp_dic[seq_names[n]][frame_num[n]].unsqueeze(0), nn_features_n, global_map_tmp_dic[seq_names[n]][frame_num[n]].unsqueeze(0)) ### ### # print('detach 3') # nn_features_n_ = nn_features_n_.detach() global_map_tmp_dic[seq_names[n]][ frame_num[n]] = nn_features_n_.detach()[0] ##################Local map update if local_map_dics is not None: local_map_tmp_dic, local_map_dist_dic = local_map_dics if seq_names[n] not in local_map_dist_dic: local_map_dist_dic[seq_names[n]] = paddle.zeros([1000, 9]) if seq_names[n] not in local_map_tmp_dic: local_map_tmp_dic[seq_names[n]] = paddle.ones_like( nn_features_n).unsqueeze(0).tile([1000, 9, 1, 1, 1, 1]) local_map_dist_dic[seq_names[n]][frame_num[n]][interaction_num - 1] = 0 local_map_dics = (local_map_tmp_dic, local_map_dist_dic) ################## to_cat_current_frame_embedding = ref_frame_embedding[n].unsqueeze( 0).tile((gt_id.shape[0], 1, 1, 1)) to_cat_nn_feature_n = nn_features_n.squeeze(0).transpose( [2, 3, 0, 1]) to_cat_scribble_mask_to_cat = ( float_(seq_ref_scribble_label) == float_(gt_id) ) # float comparision? to_cat_scribble_mask_to_cat = float_( to_cat_scribble_mask_to_cat.unsqueeze(-1).transpose( [2, 3, 0, 1])) if not first_inter: seq_prev_round_label = scale_prev_round_label[n].transpose( [1, 2, 0]) to_cat_prev_round_to_cat = ( float_(seq_prev_round_label) == float_(gt_id) ) # float comparision? to_cat_prev_round_to_cat = float_( to_cat_prev_round_to_cat.unsqueeze(-1).transpose( [2, 3, 0, 1])) else: to_cat_prev_round_to_cat = paddle.zeros_like( to_cat_scribble_mask_to_cat) to_cat_prev_round_to_cat[0] = 1. to_cat = paddle.concat( (to_cat_current_frame_embedding, to_cat_scribble_mask_to_cat, to_cat_prev_round_to_cat), 1) pred_ = self.inter_seghead(to_cat) pred_ = pred_.transpose([1, 0, 2, 3]) dic_tmp[seq_names[n]] = pred_ if local_map_dics is None: return dic_tmp else: return dic_tmp, local_map_dics
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): """ Rescale, clip and filter the bbox from the output of NMS to get final prediction. Notes: Currently only support bs = 1. Args: bboxes (Tensor): The output bboxes with shape [N, 6] after decode and NMS, including labels, scores and bboxes. bbox_num (Tensor): The number of prediction boxes of each batch with shape [1], and is N. im_shape (Tensor): The shape of the input image. scale_factor (Tensor): The scale factor of the input image. Returns: pred_result (Tensor): The final prediction results with shape [N, 6] including labels, scores and bboxes. """ bboxes_list = [] bbox_num_list = [] id_start = 0 # add fake bbox when output is empty for each batch for i in range(bbox_num.shape[0]): if bbox_num[i] == 0: bboxes_i = self.fake_bboxes bbox_num_i = self.fake_bbox_num id_start += 1 else: bboxes_i = bboxes[id_start:id_start + bbox_num[i], :] bbox_num_i = bbox_num[i] id_start += bbox_num[i] bboxes_list.append(bboxes_i) bbox_num_list.append(bbox_num_i) bboxes = paddle.concat(bboxes_list) bbox_num = paddle.concat(bbox_num_list) origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape_list = [] scale_factor_list = [] # scale_factor: scale_y, scale_x for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) expand_scale = paddle.expand(scale, [bbox_num[i], 4]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) self.origin_shape_list = paddle.concat(origin_shape_list) scale_factor_list = paddle.concat(scale_factor_list) # bboxes: [N, 6], label, score, bbox pred_label = bboxes[:, 0:1] pred_score = bboxes[:, 1:2] pred_bbox = bboxes[:, 2:] # rescale bbox to original image scaled_bbox = pred_bbox / scale_factor_list origin_h = self.origin_shape_list[:, 0] origin_w = self.origin_shape_list[:, 1] zeros = paddle.zeros_like(origin_h) # clip bbox to [0, original_size] x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros) pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1) # filter empty bbox keep_mask = nonempty_bbox(pred_bbox, return_mask=True) keep_mask = paddle.unsqueeze(keep_mask, [1]) pred_label = paddle.where(keep_mask, pred_label, paddle.ones_like(pred_label) * -1) pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1) return pred_result
def where(condition, x=None, y=None, name=None): r""" Return a tensor of elements selected from either $x$ or $y$, depending on $condition$. **Note**: ``paddle.where(condition)`` is identical to ``paddle.nonzero(condition, as_tuple=True)``. .. math:: out_i = \begin{cases} x_i, \quad \text{if} \ condition_i \ is \ True \\ y_i, \quad \text{if} \ condition_i \ is \ False \\ \end{cases} Args: condition(Tensor): The condition to choose x or y. When True(nonzero), yield x, otherwise yield y. x(Tensor or Scalar, optional): x is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given. y(Tensor or Scalar, optional): y is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor: A Tensor with the same data dype as x. Examples: .. code-block:: python import paddle x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2]) y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0]) out = paddle.where(x>1, x, y) print(out) #out: [1.0, 1.0, 3.2, 1.2] out = paddle.where(x>1) print(out) #out: (Tensor(shape=[2, 1], dtype=int64, place=CPUPlace, stop_gradient=True, # [[2], # [3]]),) """ if np.isscalar(x): x = paddle.full([1], x, np.array([x]).dtype.name) if np.isscalar(y): y = paddle.full([1], y, np.array([y]).dtype.name) if x is None and y is None: return nonzero(condition, as_tuple=True) if x is None or y is None: raise ValueError("either both or neither of x and y should be given") if not paddle.in_dynamic_mode(): check_variable_and_dtype(condition, 'condition', ['bool'], 'where') check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'], 'where') check_variable_and_dtype(y, 'y', ['float32', 'float64', 'int32', 'int64'], 'where') condition_shape = list(condition.shape) x_shape = list(x.shape) y_shape = list(y.shape) if x_shape == y_shape and condition_shape == x_shape: broadcast_condition = condition broadcast_x = x broadcast_y = y else: if core.is_compiled_with_xpu(): cond_int = paddle.cast(condition, x.dtype) cond_not_int = paddle.cast(logical_not(condition), x.dtype) out1 = paddle.multiply(x, cond_int) out2 = paddle.multiply(y, cond_not_int) out = paddle.add(out1, out2) return out zeros_like_x = paddle.zeros_like(x) zeros_like_y = paddle.zeros_like(y) zeros_like_condition = paddle.zeros_like(condition) zeros_like_condition = paddle.cast(zeros_like_condition, x.dtype) cast_cond = paddle.cast(condition, x.dtype) broadcast_zeros = paddle.add(zeros_like_x, zeros_like_y) broadcast_zeros = paddle.add(broadcast_zeros, zeros_like_condition) broadcast_x = paddle.add(x, broadcast_zeros) broadcast_y = paddle.add(y, broadcast_zeros) broadcast_condition = paddle.add(cast_cond, broadcast_zeros) broadcast_condition = paddle.cast(broadcast_condition, 'bool') if in_dygraph_mode(): return _C_ops.final_state_where(broadcast_condition, broadcast_x, broadcast_y) else: if _in_legacy_dygraph(): return _C_ops.where(broadcast_condition, broadcast_x, broadcast_y) else: helper = LayerHelper("where", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type='where', inputs={ 'Condition': broadcast_condition, 'X': broadcast_x, 'Y': broadcast_y }, outputs={'Out': [out]}) return out
def forward(self, src_ids, sent_ids=None, pos_ids=None, input_mask=None, attn_bias=None, past_cache=None, use_causal_mask=False): """ Args: src_ids (`Variable` of shape `[batch_size, seq_len]`): Indices of input sequence tokens in the vocabulary. sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. if None, assume all tokens come from `segment_a` pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): Indices of positions of each input sequence tokens in the position embeddings. input_mask(optional `Variable` of shape `[batch_size, seq_len]`): Mask to avoid performing attention on the padding token indices of the encoder input. attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask past_cache(optional, tuple of two lists: cached key and cached value, each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): cached key/value tensor that will be concated to generated key/value when performing self attention. if set, `attn_bias` should not be None. Returns: pooled (`Variable` of shape `[batch_size, hidden_size]`): output logits of pooler classifier encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): output logits of transformer stack info (Dictionary): addtional middle level info, inclues: all hidden stats, k/v caches. """ assert len( src_ids.shape ) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr( src_ids.shape)) assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' d_seqlen = paddle.shape(src_ids)[1] if pos_ids is None: pos_ids = paddle.arange(0, d_seqlen, 1, dtype='int32').reshape([1, -1]).cast('int64') if attn_bias is None: if input_mask is None: input_mask = paddle.cast(src_ids != 0, 'float32') assert len(input_mask.shape) == 2 input_mask = input_mask.unsqueeze(-1) attn_bias = input_mask.matmul(input_mask, transpose_y=True) if use_causal_mask: sequence = paddle.reshape( paddle.arange(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) causal_mask = (sequence.matmul(1. / sequence, transpose_y=True) >= 1.).cast('float32') attn_bias *= causal_mask else: assert len( attn_bias.shape ) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape attn_bias = (1. - attn_bias) * -10000.0 attn_bias = attn_bias.unsqueeze(1).tile([1, self.n_head, 1, 1]) # avoid broadcast =_= if sent_ids is None: sent_ids = paddle.zeros_like(src_ids) src_embedded = self.word_emb(src_ids) pos_embedded = self.pos_emb(pos_ids) sent_embedded = self.sent_emb(sent_ids) embedded = src_embedded + pos_embedded + sent_embedded embedded = self.dropout(self.ln(embedded)) encoded, hidden_list, cache_list = self.encoder_stack( embedded, attn_bias, past_cache=past_cache) additional_info = { 'hiddens': hidden_list, 'caches': cache_list, } return encoded, additional_info
def scatter_add_prim2orig(op, index_t, x, y): assert op.attr('axis') == 0, 'Only support axis==0 currently' zeros = paddle.zeros_like(x=x, dtype=x.dtype) tmp = paddle.scatter(x=zeros, index=index_t, updates=y, overwrite=False) return paddle.add(x, tmp)
boxmul = (boxmax - boxmin) / 2. boxplus = (boxmin + boxmax) / 2. # output = model(paddle.to_tensor(img, dtype="float32", place=paddle.CUDAPlace(0))) output = model(paddle.to_tensor(img, dtype="float32")) orig_label = np.argmax(output) print("orig_label={}".format(orig_label), type(orig_label)) #345 succ_flag = False for outer_step in range(binary_search_steps): print("o_bestl2={} confidence={}".format(o_bestl2, confidence)) #把原始图像转换成图像数据和扰动的形态 timg = paddle.to_tensor(np.arctanh((img - boxplus) / boxmul * 0.999999), dtype='float32') modifier = paddle.zeros_like(timg, dtype='float32') #print (type(modifier)) # modifier = paddle.to_tensor(modifier, dtype='float32', place=paddle.CUDAPlace(0)) #print (type(modifier)) #图像数据的扰动量梯度可以获取 import pdb pdb.set_trace() modifier.stop_gradient = False #设置为不保存梯度值 自然也无法修改 for param in model.parameters(): param.stop_gradient = True #定义优化器 仅优化modifier #optimizer = torch.optim.Adam([modifier], lr=learning_rate) optimizer = paddle.optimizer.Adam(learning_rate=learning_rate,
def forward(self, hidden, target, keep_order=False): assert (hidden.shape[0] == target.shape[0]) if self.num_clusters == 0: logit = self._compute_logits(hidden, self.out_layers_weight[0], self.out_layers_bias[0], self.out_projs[0]) nll = -paddle.log(F.softmax(logit, axis=-1)) idx = paddle.concat( [ paddle.arange(0, nll.shape[0]).unsqueeze([1]), target.unsqueeze(1) ], axis=1) nll = paddle.gather_nd(nll, idx) else: weights, biases = [], [] for i in range(len(self.cutoffs)): if self.div_val == 1: l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1] weight_i = self.out_layers_weight[0][l_idx:r_idx] bias_i = self.out_layers_bias[0][l_idx:r_idx] else: weight_i = self.out_layers_weight[i] bias_i = self.out_layers_bias[i] if i == 0: weight_i = paddle.concat( [weight_i, self.cluster_weight], axis=0) bias_i = paddle.concat([bias_i, self.cluster_bias], axis=0) weights.append(weight_i) biases.append(bias_i) head_weight, head_bias, head_proj = weights[0], biases[ 0], self.out_projs[0] head_logit = self._compute_logits(hidden, head_weight, head_bias, head_proj) head_logprob = paddle.log(F.softmax(head_logit, axis=-1)) nll = paddle.zeros_like(target, dtype=hidden.dtype) offset = 0 cutoff_values = [0] + self.cutoffs for i in range(len(cutoff_values) - 1): l_idx, r_idx = cutoff_values[i], cutoff_values[i + 1] mask_i = paddle.cast( target >= l_idx, dtype=paddle.get_default_dtype()) * paddle.cast( target < r_idx, dtype="int64") indices_i = paddle.nonzero(mask_i).squeeze([1]) if paddle.numel(indices_i) == 0: continue target_i = paddle.gather(target, indices_i, axis=0) - l_idx head_logprob_i = paddle.gather(head_logprob, indices_i, axis=0) if i == 0: target_i_idx = paddle.concat( [ paddle.arange(0, head_logprob_i.shape[0]).unsqueeze( [1]), target_i.unsqueeze([1]) ], axis=1) logprob_i = head_logprob_i.gather_nd(target_i_idx) else: weight_i, bias_i, proj_i = weights[i], biases[ i], self.out_projs[i].weight if self.out_projs[ i] is not None else None hidden_i = paddle.gather(hidden, indices_i, axis=0) tail_logit_i = self._compute_logits(hidden_i, weight_i, bias_i, proj_i) tail_logprob_i = paddle.log( F.softmax( tail_logit_i, axis=-1)) target_i_idx = paddle.concat( [ paddle.arange(0, tail_logprob_i.shape[0]).unsqueeze( [1]), target_i.unsqueeze([1]) ], axis=1) logprob_i = tail_logprob_i.gather_nd(target_i_idx) logprob_i = head_logprob_i[:, -i] + logprob_i if self.keep_order or keep_order: nll = paddle.scatter(nll, indices_i, -logprob_i) else: index = paddle.arange(offset, offset + logprob_i.shape[0], 1) nll = paddle.scatter(nll, index, -logprob_i) offset += logprob_i.shape[0] return nll
def compute_fbank_matrix(sr: int, n_fft: int, n_mels: int = 128, f_min: float = 0.0, f_max: Optional[float] = None, htk: bool = False, norm: Union[str, float] = 'slaney', dtype: str = 'float64') -> Tensor: """Compute fbank matrix. Parameters: sr(int): the audio sample rate. n_fft(int): the number of fft bins. n_mels(int): the number of Mel bins. f_min(float): the lower cut-off frequency, below which the filter response is zero. f_max(float): the upper cut-off frequency, above which the filter response is zero. htk: whether to use htk formula. return_complex(bool): whether to return complex matrix. If True, the matrix will be complex type. Otherwise, the real and image part will be stored in the last axis of returned tensor. dtype(str): the datatype of the returned fbank matrix. Returns: The fbank matrix of shape (n_mels, int(1+n_fft//2)). Shape: output: (n_mels, int(1+n_fft//2)) Notes: This function is consistent with librosa.filters.mel(). Examples: .. code-block:: python import paddle import paddleaudio.functional as F m = F.compute_fbank_matrix(16000, 512) print(m.shape) >>[128, 257] """ if f_max is None: f_max = float(sr) / 2 # Initialize the weights weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype) # Center freqs of each FFT bin fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype) # 'Center freqs' of mel bands - uniformly spaced between limits mel_f = mel_frequencies(n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype) fdiff = mel_f[1:] - mel_f[:-1] #np.diff(mel_f) ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0) #ramps = np.subtract.outer(mel_f, fftfreqs) for i in range(n_mels): # lower and upper slopes for all bins lower = -ramps[i] / fdiff[i] upper = ramps[i + 2] / fdiff[i + 1] # .. then intersect them with each other and zero weights[i] = paddle.maximum(paddle.zeros_like(lower), paddle.minimum(lower, upper)) # Slaney-style mel is scaled to be approx constant energy per channel if norm == 'slaney': enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels]) weights *= enorm.unsqueeze(1) elif isinstance(norm, int) or isinstance(norm, float): weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1) return weights
def forward(self, anchor_bboxes, num_anchors_list, gt_labels, gt_bboxes, pad_gt_mask, bg_index, gt_scores=None, pred_bboxes=None): r"""This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py The assignment is done in following steps 1. compute iou between all bbox (bbox of all pyramid levels) and gt 2. compute center distance between all bbox and gt 3. on each pyramid level, for each gt, select k bbox whose center are closest to the gt center, so we total select k*l bbox as candidates for each gt 4. get corresponding iou for the these candidates, and compute the mean and std, set mean + std as the iou threshold 5. select these candidates whose iou are greater than or equal to the threshold as positive 6. limit the positive sample's center in gt 7. if an anchor box is assigned to multiple gts, the one with the highest iou will be selected. Args: anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4), "xmin, xmax, ymin, ymax" format num_anchors_list (List): num of anchors in each level gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1) gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4) pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1) bg_index (int): background index gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1), if None, then it will initialize with one_hot label pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4) Returns: assigned_labels (Tensor): (B, L) assigned_bboxes (Tensor): (B, L, 4) assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious """ assert gt_labels.ndim == gt_bboxes.ndim and \ gt_bboxes.ndim == 3 num_anchors, _ = anchor_bboxes.shape batch_size, num_max_boxes, _ = gt_bboxes.shape # negative batch if num_max_boxes == 0: assigned_labels = paddle.full([batch_size, num_anchors], bg_index, dtype=gt_labels.dtype) assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4]) assigned_scores = paddle.zeros( [batch_size, num_anchors, self.num_classes]) return assigned_labels, assigned_bboxes, assigned_scores # 1. compute iou between gt and anchor bbox, [B, n, L] ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes) ious = ious.reshape([batch_size, -1, num_anchors]) # 2. compute center distance between all anchors and gt, [B, n, L] gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1) anchor_centers = bbox_center(anchor_bboxes) gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \ .norm(2, axis=-1).reshape([batch_size, -1, num_anchors]) # 3. on each pyramid level, selecting topk closest candidates # based on the center distance, [B, n, L] is_in_topk, topk_idxs = self._gather_topk_pyramid( gt2anchor_distances, num_anchors_list, pad_gt_mask) # 4. get corresponding iou for the these candidates, and compute the # mean and std, 5. set mean + std as the iou threshold iou_candidates = ious * is_in_topk iou_threshold = paddle.index_sample( iou_candidates.flatten(stop_axis=-2), topk_idxs.flatten(stop_axis=-2)) iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1]) iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \ iou_threshold.std(axis=-1, keepdim=True) is_in_topk = paddle.where( iou_candidates > iou_threshold.tile([1, 1, num_anchors]), is_in_topk, paddle.zeros_like(is_in_topk)) # 6. check the positive sample's center in gt, [B, n, L] is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes) # select positive sample, [B, n, L] mask_positive = is_in_topk * is_in_gts * pad_gt_mask # 7. if an anchor box is assigned to multiple gts, # the one with the highest iou will be selected. mask_positive_sum = mask_positive.sum(axis=-2) if mask_positive_sum.max() > 1: mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile( [1, num_max_boxes, 1]) is_max_iou = compute_max_iou_anchor(ious) mask_positive = paddle.where(mask_multiple_gts, is_max_iou, mask_positive) mask_positive_sum = mask_positive.sum(axis=-2) # 8. make sure every gt_bbox matches the anchor if self.force_gt_matching: is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile( [1, num_max_boxes, 1]) mask_positive = paddle.where(mask_max_iou, is_max_iou, mask_positive) mask_positive_sum = mask_positive.sum(axis=-2) assigned_gt_index = mask_positive.argmax(axis=-2) # assigned target batch_ind = paddle.arange(end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1) assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes assigned_labels = paddle.gather(gt_labels.flatten(), assigned_gt_index.flatten(), axis=0) assigned_labels = assigned_labels.reshape([batch_size, num_anchors]) assigned_labels = paddle.where( mask_positive_sum > 0, assigned_labels, paddle.full_like(assigned_labels, bg_index)) assigned_bboxes = paddle.gather(gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0) assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4]) assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1) ind = list(range(self.num_classes + 1)) ind.remove(bg_index) assigned_scores = paddle.index_select(assigned_scores, paddle.to_tensor(ind), axis=-1) if pred_bboxes is not None: # assigned iou ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive ious = ious.max(axis=-2).unsqueeze(-1) assigned_scores *= ious elif gt_scores is not None: gather_scores = paddle.gather(gt_scores.flatten(), assigned_gt_index.flatten(), axis=0) gather_scores = gather_scores.reshape([batch_size, num_anchors]) gather_scores = paddle.where(mask_positive_sum > 0, gather_scores, paddle.zeros_like(gather_scores)) assigned_scores *= gather_scores.unsqueeze(-1) return assigned_labels, assigned_bboxes, assigned_scores
def __init__(self, rank, local_rank, world_size, batch_size, resume, margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"): super(PartialFC, self).__init__() self.num_classes: int = num_classes self.rank: int = rank self.local_rank: int = local_rank self.world_size: int = world_size self.batch_size: int = batch_size self.margin_softmax: callable = margin_softmax self.sample_rate: float = sample_rate self.embedding_size: int = embedding_size self.prefix: str = prefix self.num_local: int = num_classes // world_size + int( rank < num_classes % world_size) self.class_start: int = num_classes // world_size * rank + min( rank, num_classes % world_size) self.num_sample: int = int(self.sample_rate * self.num_local) self.weight_name = os.path.join( self.prefix, "rank:{}_softmax_weight.pkl".format(self.rank)) self.weight_mom_name = os.path.join( self.prefix, "rank:{}_softmax_weight_mom.pkl".format(self.rank)) if resume: try: self.weight: paddle.Tensor = paddle.load(self.weight_name) print("softmax weight resume successfully!") except (FileNotFoundError, KeyError, IndexError): self.weight = paddle.normal( 0, 0.01, (self.num_local, self.embedding_size)) print("softmax weight resume fail!") try: self.weight_mom: paddle.Tensor = paddle.load( self.weight_mom_name) print("softmax weight mom resume successfully!") except (FileNotFoundError, KeyError, IndexError): self.weight_mom: paddle.Tensor = paddle.zeros_like(self.weight) print("softmax weight mom resume fail!") else: self.weight = paddle.normal(0, 0.01, (self.num_local, self.embedding_size)) self.weight_mom: paddle.Tensor = paddle.zeros_like(self.weight) print("softmax weight init successfully!") print("softmax weight mom init successfully!") self.index = None if int(self.sample_rate) == 1: self.update = lambda: 0 self.sub_weight = paddle.create_parameter( shape=self.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Assign(self.weight)) self.sub_weight_mom = self.weight_mom else: self.sub_weight = paddle.create_parameter( shape=[1, 1], dtype='float32', default_initializer=paddle.nn.initializer.Assign( paddle.empty((1, 1))))
def test_device_error4(): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data(name="data", shape=[10], dtype="float32") paddle.zeros_like(data, dtype="float")
def zero_(tensor: Tensor): return tensor.set_value(paddle.zeros_like(tensor))
def _no_grad_zero_(tensor): with paddle.no_grad(): tensor.set_value(paddle.zeros_like(tensor)) return tensor
def reset(self): self.step = 0 self.epoch = 0 for k, v in self.state_dict.items(): self.state_dict[k] = paddle.zeros_like(v)
def _get_band_mask(self, blocked_query_mask, blocked_key_mask, batch_size, sequence_length): ''' Return second mask: [B, 1, L-G, bs, G+W] ''' GB = self.num_global_blocks_back GF = self.num_global_blocks_front G = self.num_global_blocks R = self.num_rand_blocks W = self.window_size bs = self.block_size T = sequence_length L = T // bs # blocked length B = batch_size H = self.num_heads # G+W+R # query_mask: [B, L, bs] # key_mask: [B, L, bs] # [B, L-G, bs, 1] * [B, L-G, 1, G*bs] -> [B, L-G, bs, G*bs] temp_query_mask = paddle.reshape(blocked_query_mask[:, GF:-GB], [B, L - G, bs, 1]) temp_key_mask_front = paddle.reshape(blocked_key_mask[:, :GF], [B, 1, 1, GF * bs]) global_block_mask_front = paddle.matmul(temp_query_mask, temp_key_mask_front) temp_key_mask_back = paddle.reshape(blocked_key_mask[:, -GB:], [B, 1, 1, GB * bs]) global_block_mask_back = paddle.matmul(temp_query_mask, temp_key_mask_back) # create window block mask key_mask_list = [] for query_block_id in range(GF, GF + W // 2): left_block_id = query_block_id - W // 2 right_block_id = query_block_id + W // 2 zero_key_mask = paddle.zeros_like( blocked_key_mask[:, -(W - (right_block_id + 1 - G)):-GB]) temp_key_mask = paddle.concat( [blocked_key_mask[:, GF:(right_block_id + 1)], zero_key_mask], axis=1) temp_key_mask = paddle.unsqueeze(temp_key_mask, 1) key_mask_list.append(temp_key_mask) roll_key_mask1 = paddle.concat(key_mask_list, axis=1) roll_key_mask1 = paddle.reshape(roll_key_mask1, [0, 0, W * bs]) key_mask_list = [] band_length = L - G - W // 2 * 2 for query_block_id in range(GF + W // 2, GF + W // 2 + W): left_block_id = query_block_id - W // 2 right_block_id = query_block_id + W // 2 key_mask_list.append( blocked_key_mask[:, left_block_id:left_block_id + band_length]) window_key_mask = paddle.concat(key_mask_list, axis=2) window_key_mask = paddle.reshape(window_key_mask, [0, 0, W * bs]) key_mask_list = [] for query_block_id in range((L - GB) - W // 2, L - GB): left_block_id = query_block_id - W // 2 right_block_id = query_block_id + W // 2 zero_key_mask = paddle.zeros_like( blocked_key_mask[:, GF:GF + W - (L - left_block_id - GB)]) temp_key_mask = paddle.concat( [zero_key_mask, blocked_key_mask[:, left_block_id:-GB]], axis=1) temp_key_mask = paddle.unsqueeze(temp_key_mask, 1) key_mask_list.append(temp_key_mask) roll_key_mask2 = paddle.concat(key_mask_list, axis=1) roll_key_mask2 = paddle.reshape(roll_key_mask2, [0, 0, W * bs]) window_key_mask = paddle.concat( [roll_key_mask1, window_key_mask, roll_key_mask2], axis=1) window_key_mask = paddle.unsqueeze(window_key_mask, axis=2) # [B, L-G, bs, 1] * [B, L-G, 1, W*bs] -> [B, L-G, bs, W*bs] window_block_mask = paddle.matmul(temp_query_mask, window_key_mask) band_mask = paddle.concat([ global_block_mask_front, window_block_mask, global_block_mask_back ], axis=3) band_mask = paddle.unsqueeze(band_mask, 1) # for head band_mask = paddle.expand(band_mask, [B, H, L - G, bs, -1]) return band_mask
def prop_seghead( self, ref_frame_embedding=None, previous_frame_embedding=None, current_frame_embedding=None, ref_scribble_label=None, previous_frame_mask=None, normalize_nearest_neighbor_distances=True, use_local_map=True, seq_names=None, gt_ids=None, k_nearest_neighbors=1, global_map_tmp_dic=None, local_map_dics=None, interaction_num=None, start_annotated_frame=None, frame_num=None, dynamic_seghead=None, ): """return: feature_embedding,global_match_map,local_match_map,previous_frame_mask""" ############### cfg = self.cfg global_map_tmp_dic = global_map_tmp_dic dic_tmp = {} bs, c, h, w = current_frame_embedding.shape if cfg.get('test_mode'): scale_ref_scribble_label = float_(ref_scribble_label) else: scale_ref_scribble_label = paddle.nn.functional.interpolate( float_(ref_scribble_label), size=(h, w), mode='nearest') scale_ref_scribble_label = int_(scale_ref_scribble_label) scale_previous_frame_label = paddle.nn.functional.interpolate( float_(previous_frame_mask), size=(h, w), mode='nearest') scale_previous_frame_label = int_(scale_previous_frame_label) for n in range(bs): seq_current_frame_embedding = current_frame_embedding[n] seq_ref_frame_embedding = ref_frame_embedding[n] seq_prev_frame_embedding = previous_frame_embedding[n] seq_ref_frame_embedding = seq_ref_frame_embedding.transpose( [1, 2, 0]) seq_current_frame_embedding = seq_current_frame_embedding.transpose( [1, 2, 0]) seq_ref_scribble_label = scale_ref_scribble_label[n].transpose( [1, 2, 0]) #########Global Map nn_features_n, ref_obj_ids = nearest_neighbor_features_per_object( reference_embeddings=seq_ref_frame_embedding, query_embeddings=seq_current_frame_embedding, reference_labels=seq_ref_scribble_label, k_nearest_neighbors=k_nearest_neighbors, gt_ids=gt_ids[n], n_chunks=10) if normalize_nearest_neighbor_distances: nn_features_n = (paddle.nn.functional.sigmoid(nn_features_n) - 0.5) * 2 # print(nn_features_n) ### if global_map_tmp_dic is not None: ###when testing, use global map memory if seq_names[n] not in global_map_tmp_dic: global_map_tmp_dic[seq_names[n]] = paddle.ones_like( nn_features_n).tile([1000, 1, 1, 1, 1]) nn_features_n = paddle.where( nn_features_n <= global_map_tmp_dic[seq_names[n]][ frame_num[n]].unsqueeze(0), nn_features_n, global_map_tmp_dic[seq_names[n]][frame_num[n]].unsqueeze( 0)) # print('detach 1') # print(nn_features_n.shape) # nn_features_n = nn_features_n.detach() global_map_tmp_dic[seq_names[n]][ frame_num[n]] = nn_features_n.detach()[0] #########################Local dist map seq_prev_frame_embedding = seq_prev_frame_embedding.transpose( [1, 2, 0]) seq_previous_frame_label = scale_previous_frame_label[n].transpose( [1, 2, 0]) if use_local_map: prev_frame_nn_features_n = local_previous_frame_nearest_neighbor_features_per_object( prev_frame_embedding=seq_prev_frame_embedding, query_embedding=seq_current_frame_embedding, prev_frame_labels=seq_previous_frame_label, gt_ids=ref_obj_ids, max_distance=cfg['model_max_local_distance']) else: prev_frame_nn_features_n, _ = nearest_neighbor_features_per_object( reference_embeddings=seq_prev_frame_embedding, query_embeddings=seq_current_frame_embedding, reference_labels=seq_previous_frame_label, k_nearest_neighbors=k_nearest_neighbors, gt_ids=gt_ids[n], n_chunks=20) prev_frame_nn_features_n = ( paddle.nn.functional.sigmoid(prev_frame_nn_features_n) - 0.5) * 2 # print(prev_frame_nn_features_n.mean().item(), prev_frame_nn_features_n.shape, interaction_num) # o ############# if local_map_dics is not None: ##When testing, use local map memory local_map_tmp_dic, local_map_dist_dic = local_map_dics if seq_names[n] not in local_map_dist_dic: print(seq_names[n], 'not in local_map_dist_dic') local_map_dist_dic[seq_names[n]] = paddle.zeros(1000, 9) if seq_names[n] not in local_map_tmp_dic: print(seq_names[n], 'not in local_map_tmp_dic') local_map_tmp_dic[seq_names[n]] = paddle.zeros_like( prev_frame_nn_features_n).unsqueeze(0).tile( [1000, 9, 1, 1, 1, 1]) # print(local_map_dist_dic[seq_names[n]].shape) # print('detach 2') # prev_frame_nn_features_n = prev_frame_nn_features_n.detach() local_map_dist_dic[seq_names[n]][ frame_num[n], interaction_num - 1] = 1.0 / (abs(frame_num[n] - start_annotated_frame) ) # bugs fixed. local_map_tmp_dic[seq_names[n]][ frame_num[n], interaction_num - 1] = prev_frame_nn_features_n.squeeze( 0).detach() # bugs fixed. if interaction_num == 1: prev_frame_nn_features_n = local_map_tmp_dic[seq_names[n]][ frame_num[n]][interaction_num - 1] prev_frame_nn_features_n = prev_frame_nn_features_n.unsqueeze( 0) else: if local_map_dist_dic[seq_names[n]][frame_num[n]][interaction_num - 1] > \ local_map_dist_dic[seq_names[n]][frame_num[n]][interaction_num - 2]: prev_frame_nn_features_n = local_map_tmp_dic[ seq_names[n]][frame_num[n]][interaction_num - 1] prev_frame_nn_features_n = prev_frame_nn_features_n.unsqueeze( 0) else: prev_frame_nn_features_n = local_map_tmp_dic[ seq_names[n]][frame_num[n]][interaction_num - 2] prev_frame_nn_features_n = prev_frame_nn_features_n.unsqueeze( 0) local_map_dics = (local_map_tmp_dic, local_map_dist_dic) to_cat_previous_frame = ( float_(seq_previous_frame_label) == float_(ref_obj_ids) ) # float comparision? to_cat_current_frame_embedding = current_frame_embedding[ n].unsqueeze(0).tile((ref_obj_ids.shape[0], 1, 1, 1)) to_cat_nn_feature_n = nn_features_n.squeeze(0).transpose( [2, 3, 0, 1]) to_cat_previous_frame = float_( to_cat_previous_frame.unsqueeze(-1).transpose([2, 3, 0, 1])) to_cat_prev_frame_nn_feature_n = prev_frame_nn_features_n.squeeze( 0).transpose([2, 3, 0, 1]) to_cat = paddle.concat( (to_cat_current_frame_embedding, to_cat_nn_feature_n, to_cat_prev_frame_nn_feature_n, to_cat_previous_frame), 1) pred_ = dynamic_seghead(to_cat) pred_ = pred_.transpose([1, 0, 2, 3]) dic_tmp[seq_names[n]] = pred_ if global_map_tmp_dic is None: return dic_tmp else: if local_map_dics is None: return dic_tmp, global_map_tmp_dic else: return dic_tmp, global_map_tmp_dic, local_map_dics