def run_static(self, use_gpu=False): x = paddle.fluid.data(name='input', shape=[10, 10], dtype='float32') x2 = paddle.fluid.data(name='input2', shape=[2], dtype='float32') result0 = F.normalize(x) result1 = F.normalize(x, p=1.5) result2 = F.normalize(x, axis=0) result3 = F.normalize(x, name='aaa') result4 = F.normalize(x2, axis=0) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) static_result = exe.run( feed={ "input": self.input_np, "input2": self.input_np2 }, fetch_list=[result0, result1, result2, result4]) self.assertTrue(np.allclose(static_result[0], self.expected0)) self.assertTrue(np.allclose(static_result[1], self.expected1)) self.assertTrue(np.allclose(static_result[2], self.expected2)) self.assertTrue('aaa' in result3.name) self.assertTrue(np.allclose(static_result[3], self.expected3)) self.assertRaises(ValueError, F.normalize, x2)
def forward(self, x): x_shape = paddle.shape(x) x = x.flatten(2) mu = paddle.tile(self.mu, [x_shape[0], 1, 1]) with paddle.no_grad(): for i in range(self.stage_num): x_t = paddle.transpose(x, [0, 2, 1]) z = paddle.bmm(x_t, mu) z = F.softmax(z, axis=2) z_ = F.normalize(z, axis=1, p=1) mu = paddle.bmm(x, z_) mu = F.normalize(mu, axis=1, p=2) z_t = paddle.transpose(z, [0, 2, 1]) x = paddle.matmul(mu, z_t) x = paddle.reshape(x, [0, self.c, x_shape[2], x_shape[3]]) if self.training: mu = paddle.mean(mu, 0, keepdim=True) mu = F.normalize(mu, axis=1, p=2) mu = self.mu * (1 - self.momentum) + mu * self.momentum if paddle.distributed.get_world_size() > 1: mu = paddle.distributed.all_reduce(mu) mu /= paddle.distributed.get_world_size() self.mu = mu return x
def forward(self, x): b, c, h, w = x.shape x = paddle.reshape(x, [b, c, h * w]) mu = paddle.tile(self.mu, [b, 1, 1]) with paddle.no_grad(): for i in range(self.stage_num): x_t = paddle.transpose(x, [0, 2, 1]) z = paddle.bmm(x_t, mu) z = F.softmax(z, axis=2) z_ = F.normalize(z, axis=1, p=1) mu = paddle.bmm(x, z_) mu = F.normalize(mu, axis=1, p=2) z_t = paddle.transpose(z, [0, 2, 1]) x = paddle.matmul(mu, z_t) x = paddle.reshape(x, [b, c, h, w]) if self.training: mu = paddle.mean(mu, 0, keepdim=True) if paddle.distributed.get_world_size() > 1: paddle.distributed.reduce( mu / paddle.distributed.get_world_size(), 0) mu = F.normalize(mu, axis=1, p=2) self.mu = self.mu * (1 - self.momentum) + mu * self.momentum return x
def apply(layer, name, n_power_iterations, dim, eps): for k, hook in layer._forward_pre_hooks.items(): if isinstance(hook, SpectralNorm) and hook.name == name: raise RuntimeError("Cannot register two spectral_norm hooks on " "the same parameter {}".format(name)) fn = SpectralNorm(name, n_power_iterations, dim, eps) weight = layer._parameters[name] with paddle.no_grad(): weight_mat = fn.reshape_weight_to_matrix(weight) h, w = weight_mat.shape # randomly initialize u and v u = layer.create_parameter([h]) u = normal_(u, 0., 1.) v = layer.create_parameter([w]) v = normal_(v, 0., 1.) u = F.normalize(u, axis=0, epsilon=fn.eps) v = F.normalize(v, axis=0, epsilon=fn.eps) # delete fn.name form parameters, otherwise you can not set attribute del layer._parameters[fn.name] layer.add_parameter(fn.name + "_orig", weight) # still need to assign weight back as fn.name because all sorts of # things may assume that it exists, e.g., when initializing weights. # However, we can't directly assign as it could be an Parameter and # gets added as a parameter. Instead, we register weight * 1.0 as a plain # attribute. setattr(layer, fn.name, weight * 1.0) layer.register_buffer(fn.name + "_u", u) layer.register_buffer(fn.name + "_v", v) layer.register_forward_pre_hook(fn) return fn
def forward(self, x): x0 = self.linear0(x[0]) x1 = self.linear1(x[1]) bs = x1.shape[0] if self.dropout_input > 0: x0 = F.dropout(x0, p=self.dropout_input, training=self.training) x1 = F.dropout(x1, p=self.dropout_input, training=self.training) x0_chunks = paddle.split(x0, self.chunks, -1) x1_chunks = paddle.split(x1, self.chunks, -1) zs = [] for x0_c, x1_c, m0, m1 in zip(x0_chunks, x1_chunks, self.merge_linears0, self.merge_linears1): m = m0(x0_c) * m1(x1_c) # bs x split_size*rank m = m.reshape([bs, self.rank, -1]) z = paddle.sum(m, 1) if self.pos_norm == 'before_cat': z = paddle.sqrt(F.relu(z)) - paddle.sqrt(F.relu(-z)) z = F.normalize(z) zs.append(z) z = paddle.concat(zs, 1) if self.pos_norm == 'after_cat': z = paddle.sqrt(F.relu(z)) - paddle.sqrt(F.relu(-z)) z = F.normalize(z) if self.dropout_pre_lin > 0: z = F.dropout(z, p=self.dropout_pre_lin, training=self.training) z = self.linear_out(z) if self.dropout_output > 0: z = F.dropout(z, p=self.dropout_output, training=self.training) return z
def forward(self, input, label): # lambda = max(lambda_min,base*(1+gamma*iteration)^(-power)) self.iter += 1 self.lamb = max( self.LambdaMin, self.base * (1 + self.gamma * self.iter)**(-1 * self.power)) # --------------------------- cos(theta) & phi(theta) --------------------------- self.linear.weight.Tensor = F.normalize(self.linear.weight) x = F.normalize(input) cos_theta = self.linear(x) cos_theta = cos_theta.clip(min=-1, max=1) cos_m_theta = self.mlambda[self.m](cos_theta) theta = cos_theta.acos() k = paddle.floor(self.m * theta / 3.14159265) phi_theta = paddle.to_tensor(((-1.0)**k) * cos_m_theta - 2 * k) NormOfFeature = paddle.norm(input, p=2, axis=1) # --------------------------- convert label to one-hot --------------------------- one_hot = F.one_hot(label, num_classes=phi_theta.shape[1]) one_hot = paddle.reshape(one_hot, (phi_theta.shape[0], phi_theta.shape[1])) # --------------------------- Calculate output --------------------------- output = (one_hot * (phi_theta - cos_theta) / (1 + self.lamb)) + cos_theta output *= NormOfFeature.reshape((-1, 1)) return output
def compute_weight(self, module, do_power_iteration): weight = getattr(module, self.name + '_orig') u = getattr(module, self.name + '_u') v = getattr(module, self.name + '_v') weight_mat = self.reshape_weight_to_matrix(weight) if do_power_iteration: with paddle.no_grad(): for _ in range(self.n_power_iterations): v.set_value( F.normalize( paddle.matmul(weight_mat, u, transpose_x=True, transpose_y=False), axis=0, epsilon=self.eps, )) u.set_value( F.normalize( paddle.matmul(weight_mat, v), axis=0, epsilon=self.eps, )) if self.n_power_iterations > 0: u = u.clone() v = v.clone() sigma = paddle.dot(u, paddle.mv(weight_mat, v)) weight = weight / sigma return weight
def forward(self, logits, label): logits = F.normalize(logits, p=2, axis=1, epsilon=self.eps) wn = F.normalize(self.w, p=2, axis=0, epsilon=self.eps) cosine = paddle.matmul(logits, wn) y = paddle.zeros((logits.shape[0], self.n_classes)) for i in range(logits.shape[0]): y[i, label[i]] = self.margin pred = F.log_softmax((cosine - y) * self.scale, -1) return self.nll_loss(pred, label), pred
def forward(self, audio_sequences, face_sequences): # audio_sequences := (B, dim, T) face_embedding = self.face_encoder(face_sequences) audio_embedding = self.audio_encoder(audio_sequences) audio_embedding = audio_embedding.reshape( [audio_embedding.shape[0], -1]) face_embedding = face_embedding.reshape([face_embedding.shape[0], -1]) audio_embedding = F.normalize(audio_embedding, p=2, axis=1) face_embedding = F.normalize(face_embedding, p=2, axis=1) return audio_embedding, face_embedding
def forward(self, input, label): cosine = F.linear(F.normalize(input), F.normalize(self.weight)) sine = paddle.sqrt( paddle.clip(1.0 - paddle.pow(cosine, 2), min=0, max=1)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = paddle.where(cosine > 0, phi, cosine) else: phi = paddle.where(cosine > self.th, phi, cosine - self.mm) one_hot = paddle.nn.functional.one_hot(label, self.class_dim) output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= self.s return output
def forward(self, neck_feat, inputs, bboxes=None, bbox_inds=None, topk_clses=None): reid_feat = self.reid(neck_feat) if self.training: if self.num_classes == 1: loss = self.get_loss(reid_feat, inputs) else: loss = self.get_mc_loss(reid_feat, inputs) return loss else: assert bboxes is not None and bbox_inds is not None reid_feat = F.normalize(reid_feat) embedding = paddle.transpose(reid_feat, [0, 2, 3, 1]) embedding = paddle.reshape(embedding, [-1, self.ch_emb]) # embedding shape: [bs * h * w, ch_emb] if self.num_classes == 1: pred_dets = bboxes pred_embs = paddle.gather(embedding, bbox_inds) else: pred_dets, pred_embs = self.process_by_class( bboxes, embedding, bbox_inds, topk_clses) return pred_dets, pred_embs
def forward(self, node_feat, edge_feat): # get size num_tasks = node_feat.shape[0] num_data = node_feat.shape[1] # get eye matrix (batch_size x 2 x node_size x node_size) diag_mask = 1.0 - paddle.expand( paddle.eye(num_data), [num_tasks, self.edge_dim, num_data, num_data]) # set diagonal as zero and normalize edge_feat = F.normalize(edge_feat * diag_mask, p=1, axis=-1) # compute attention and aggregate aggr_feat = paddle.bmm( paddle.concat(paddle.split(edge_feat, 2, 1), self.edge_dim).squeeze(1), node_feat) node_feat = paddle.transpose( paddle.concat( [node_feat, paddle.concat(paddle.split(aggr_feat, 2, 1), -1)], -1), (0, 2, 1)) # non-linear transform node_feat = paddle.transpose(self.network(node_feat.unsqueeze(-1)), (0, 2, 1, 3)).squeeze(-1) return node_feat
def forward(self, input, label): # --------------------------- cos(theta) & phi(theta) --------------------------- self.linear.weight.Tensor = F.normalize(self.linear.weight) x = F.normalize(input) cosine = self.linear(x) phi = cosine - self.m # --------------------------- convert label to one-hot --------------------------- label = label.astype(dtype='int64').flatten() one_hot = F.one_hot(label, num_classes=phi.shape[1]) # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- output = (one_hot * phi) + ( (1.0 - one_hot) * cosine ) # you can use torch.where if your torch.__version__ is 0.4 output *= self.s return output
def accumulate(self): logger.info("Computing pairwise similairity...") assert len(self.embedding) == len(self.id_labels) if len(self.embedding) < 1: return None embedding = paddle.stack(self.embedding, axis=0) emb = F.normalize(embedding, axis=1).numpy() pdist = np.matmul(emb, emb.T) id_labels = np.array(self.id_labels, dtype='int32').reshape(-1, 1) n = len(id_labels) id_lbl = np.tile(id_labels, n).T gt = id_lbl == id_lbl.T up_triangle = np.where(np.triu(pdist) - np.eye(n) * pdist != 0) pdist = pdist[up_triangle] gt = gt[up_triangle] # lazy import metrics here from sklearn import metrics far, tar, threshold = metrics.roc_curve(gt, pdist) interp = interpolate.interp1d(far, tar) tar_at_far = [interp(x) for x in self.far_levels] for f, fa in enumerate(self.far_levels): self.eval_results['TPR@FAR={:.7f}'.format(fa)] = ' {:.4f}'.format( tar_at_far[f])
def get_emb_and_gt_outs(self, ide_outs, targets): emb_and_gts = [] for i, p_ide in enumerate(ide_outs): t_conf = targets['tconf{}'.format(i)] t_ide = targets['tide{}'.format(i)] p_ide = p_ide.transpose((0, 2, 3, 1)) p_ide_flatten = paddle.reshape(p_ide, [-1, self.embedding_dim]) mask = t_conf > 0 mask = paddle.cast(mask, dtype="int64") emb_mask = mask.max(1).flatten() emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten() if len(emb_mask_inds) > 0: t_ide_flatten = paddle.reshape(t_ide.max(1), [-1, 1]) tids = paddle.gather(t_ide_flatten, emb_mask_inds) embedding = paddle.gather(p_ide_flatten, emb_mask_inds) embedding = self.emb_scale * F.normalize(embedding) emb_and_gt = paddle.concat([embedding, tids], axis=1) emb_and_gts.append(emb_and_gt) if len(emb_and_gts) > 0: return paddle.concat(emb_and_gts, axis=0) else: return paddle.zeros((1, self.embedding_dim + 1))
def forward(self, graph, feature, act=None): """ Args: graph: `pgl.Graph` instance. feature: A tensor with shape (num_nodes, input_size) act: (default None) Activation for outputs and before normalize. Return: A tensor with shape (num_nodes, output_size) """ def _send_func(src_feat, dst_feat, edge_feat): return {"msg": src_feat["h"]} def _recv_func(message): return getattr(message, self.aggr_func)(message["msg"]) msg = graph.send(_send_func, src_feat={"h": feature}) neigh_feature = graph.recv(reduce_func=_recv_func, msg=msg) self_feature = self.self_linear(feature) neigh_feature = self.neigh_linear(neigh_feature) output = self_feature + neigh_feature if act is not None: output = getattr(F, act)(output) output = F.normalize(output, axis=1) return output
def __init__(self, base_encoder, dim=128, queue_size=65536, momentum=0.999, scale=50, margin=0.3): super(DCQ, self).__init__() self.queue_size = queue_size self.momentum = momentum self.scale = scale self.margin = margin # create the encoders # num_classes is the output fc dimension self.encoder_q = base_encoder(num_classes=dim, name_prefix='q') self.encoder_k = base_encoder(num_classes=dim, name_prefix='k') for param_q, param_k in zip( self.encoder_q.parameters(include_sublayers=True), self.encoder_k.parameters(include_sublayers=True)): param_k.stop_gradient = True param_q.set_value(param_k) self.register_buffer("weight_queue", paddle.randn([dim, queue_size])) self.weight_queue = normalize(self.weight_queue, axis=0) self.register_buffer("label_queue", paddle.randn([1, queue_size])) self.register_buffer("queue_ptr", paddle.zeros([ 1, ], dtype='int64'))
def emb_loss(self, p_ide, t_conf, t_ide, emb_scale, classifier): emb_dim = p_ide.shape[1] p_ide = p_ide.transpose((0, 2, 3, 1)) p_ide_flatten = paddle.reshape(p_ide, [-1, emb_dim]) mask = t_conf > 0 mask = paddle.cast(mask, dtype="int64") mask.stop_gradient = True emb_mask = mask.max(1).flatten() emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten() emb_mask_inds.stop_gradient = True # use max(1) to decide the id, TODO: more reseanable strategy t_ide_flatten = t_ide.max(1).flatten() t_ide_flatten = paddle.cast(t_ide_flatten, dtype="int64") valid_inds = paddle.nonzero(t_ide_flatten != -1).flatten() if emb_mask_inds.numel() == 0 or valid_inds.numel() == 0: # loss_ide = paddle.to_tensor([0]) # will be error in gradient backward loss_ide = self.phony * 0 # todo else: embedding = paddle.gather(p_ide_flatten, emb_mask_inds) embedding = emb_scale * F.normalize(embedding) logits = classifier(embedding) ide_target = paddle.gather(t_ide_flatten, emb_mask_inds) loss_ide = F.cross_entropy(logits, ide_target, ignore_index=-1, reduction='mean') loss_ide.stop_gradient = False return loss_ide
def run_imperative(self): x = paddle.to_tensor(self.input_np) y = F.normalize(x) self.assertTrue(np.allclose(y.numpy(), self.expected0)) y = F.normalize(x, p=1.5) self.assertTrue(np.allclose(y.numpy(), self.expected1)) y = F.normalize(x, axis=0) self.assertTrue(np.allclose(y.numpy(), self.expected2)) x = paddle.to_tensor(self.input_np2) y = F.normalize(x, axis=0) self.assertTrue(np.allclose(y.numpy(), self.expected3)) self.assertRaises(BaseException, F.normalize, x)
def prepare(self, label, optimizer): # label [64, 1] total_label = label.detach() self.sample(total_label) optimizer._parameter_list[0] = self.sub_weight norm_weight = normalize(self.sub_weight) return total_label, norm_weight
def forward(self, embedding, targets): if isinstance(embedding, dict): embedding = embedding['features'] # Normalize embedding features embedding = F.normalize(embedding, axis=1) dist_mat = paddle.matmul(embedding, embedding, transpose_y=True) N = dist_mat.shape[0] is_pos = targets.reshape([N, 1]).expand([N, N]).equal( paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal( paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') # Mask scores related to itself is_pos = is_pos - paddle.eye(N, N) s_p = dist_mat * is_pos s_n = dist_mat * is_neg logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos) logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg) loss = F.softplus( paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean() return {"PairwiseCosface": loss}
def get_pooled_embedding(self, input_ids, token_type_ids=None, position_ids=None): src_mask = input_ids == self.bos_id src_mask = paddle.cast(src_mask, "float32") # [bs, 1, 1, max_len] src_mask = paddle.unsqueeze(src_mask, axis=[1, 2]) src_mask.stop_gradient = True ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids.stop_gradient = True embedding_output = self.ptm.embeddings(input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids) if self.use_fp16: embedding_output = paddle.cast(embedding_output, 'float16') sequence_output = self.ptm.encoder(embedding_output, src_mask) if self.use_fp16: sequence_output = paddle.cast(sequence_output, 'float32') cls_embedding = self.ptm.pooler(sequence_output) if self.output_emb_size > 0: cls_embedding = self.emb_reduce_linear(cls_embedding) cls_embedding = self.dropout(cls_embedding) cls_embedding = F.normalize(cls_embedding, p=2, axis=-1) return cls_embedding
def forward(self, student, teacher): # reshape for feature map distillation bs = student.shape[0] student = student.reshape([bs, -1]) teacher = teacher.reshape([bs, -1]) td = (teacher.unsqueeze(0) - teacher.unsqueeze(1)) norm_td = F.normalize(td, p=2, axis=2) t_angle = paddle.bmm(norm_td, norm_td.transpose([0, 2, 1])).reshape( [-1, 1]) sd = (student.unsqueeze(0) - student.unsqueeze(1)) norm_sd = F.normalize(sd, p=2, axis=2) s_angle = paddle.bmm(norm_sd, norm_sd.transpose([0, 2, 1])).reshape( [-1, 1]) loss = F.smooth_l1_loss(s_angle, t_angle, reduction='mean') return loss
def forward(self, feature, label): cos_theta = paddle.mm(F.normalize(feature, axis=1), F.normalize(self.weight, axis=0)) sin_theta = paddle.sqrt( paddle.clip(1.0 - paddle.pow(cos_theta, 2), min=0, max=1)) cos_theta_m = cos_theta * self.cos_m - sin_theta * self.sin_m cos_theta_m = paddle.where(cos_theta > self.threshold, cos_theta_m, cos_theta - self.mm) one_hot = paddle.nn.functional.one_hot(label, self.class_dim) output = (one_hot * cos_theta_m) + (paddle.abs( (1.0 - one_hot)) * cos_theta) output *= self.s # 简单的分类方法,学习率需要设置为0.1 # cosine = self.cosine_sim(feature, self.weight) # one_hot = paddle.nn.functional.one_hot(label, self.class_dim) # output = self.s * (cosine - one_hot * self.m) return output
def forward(self, feat, inputs): reid_feat = self.reid(feat) if self.training: loss = self.get_loss(reid_feat, inputs) return loss else: reid_feat = F.normalize(reid_feat) return reid_feat
def forward(self, logits, targets): logits = F.normalize(logits, p=2, axis=1, epsilon=1e-8) wn = F.normalize(self.w, p=2, axis=0, epsilon=1e-8) cosine = logits @ wn sine = paddle.sqrt(1.0 - paddle.square(cosine)) phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m) if self.easy_margin: phi = paddle.where(cosine > 0, phi, cosine) else: phi = paddle.where(cosine > self.th, phi, cosine - self.mm) target_one_hot = F.one_hot(targets, self.n_classes) outputs = (target_one_hot * phi) + ( (1.0 - target_one_hot) * cosine) - target_one_hot * self.margin2 outputs = self.scale * outputs pred = F.log_softmax(outputs, axis=-1) return self.nll_loss(pred, targets), pred
def forward(self, graph, u_feat, i_feat): h = paddle.concat([u_feat, i_feat]) embs = [h] for i in range(self.n_layers): h = self.ngcfs[i](graph, h) norm_h = F.normalize(h, p=2, axis=1) embs.append(norm_h) embs = paddle.concat(embs, axis=1) users, items = paddle.split(embs, [u_feat.shape[0], i_feat.shape[0]]) return users, items
def __init__(self, c, k, stage_num=3, momentum=0.1): super(EMAU, self).__init__() assert stage_num >= 1 self.stage_num = stage_num self.momentum = momentum tmp_mu = self.create_parameter( shape=[1, c, k], default_initializer=paddle.nn.initializer.KaimingNormal(k)) self.mu = F.normalize(paddle.to_tensor(tmp_mu), axis=1, p=2) self.register_buffer('bases', self.mu)
def forward(self, analogy_a, analogy_b, analogy_c, true_word, all_label): emb_a = self.embedding(analogy_a) emb_b = self.embedding(analogy_b) emb_c = self.embedding(analogy_c) emb_all_label = self.embedding(all_label) target = emb_b - emb_a + emb_c emb_all_label_l2 = F.normalize(emb_all_label, axis=1) dist = paddle.matmul(x=target, y=emb_all_label_l2, transpose_y=True) values, pred_idx = paddle.topk(x=dist, k=4) return values, pred_idx
def get_mc_loss(self, feat, inputs): # feat.shape = [bs, ch_emb, h, w] assert 'cls_id_map' in inputs and 'cls_tr_ids' in inputs index = inputs['index'] mask = inputs['index_mask'] cls_id_map = inputs['cls_id_map'] # [bs, h, w] cls_tr_ids = inputs['cls_tr_ids'] # [bs, num_classes, h, w] feat = paddle.transpose(feat, perm=[0, 2, 3, 1]) feat_n, feat_h, feat_w, feat_c = feat.shape feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c]) index = paddle.unsqueeze(index, 2) batch_inds = list() for i in range(feat_n): batch_ind = paddle.full( shape=[1, index.shape[1], 1], fill_value=i, dtype='int64') batch_inds.append(batch_ind) batch_inds = paddle.concat(batch_inds, axis=0) index = paddle.concat(x=[batch_inds, index], axis=2) feat = paddle.gather_nd(feat, index=index) mask = paddle.unsqueeze(mask, axis=2) mask = paddle.expand_as(mask, feat) mask.stop_gradient = True feat = paddle.masked_select(feat, mask > 0) feat = paddle.reshape(feat, shape=[-1, feat_c]) reid_losses = 0 for cls_id, id_num in self.num_identities_dict.items(): # target cur_cls_tr_ids = paddle.reshape( cls_tr_ids[:, cls_id, :, :], shape=[feat_n, -1]) # [bs, h*w] cls_id_target = paddle.gather_nd(cur_cls_tr_ids, index=index) mask = inputs['index_mask'] cls_id_target = paddle.masked_select(cls_id_target, mask > 0) cls_id_target.stop_gradient = True # feat cls_id_feat = self.emb_scale_dict[str(cls_id)] * F.normalize(feat) cls_id_pred = self.classifiers[str(cls_id)](cls_id_feat) loss = self.reid_loss(cls_id_pred, cls_id_target) valid = (cls_id_target != self.reid_loss.ignore_index) valid.stop_gradient = True count = paddle.sum((paddle.cast(valid, dtype=np.int32))) count.stop_gradient = True if count > 0: loss = loss / count reid_losses += loss return reid_losses