def cdist(self, a, b): a_s = paddle.norm(a, p=2, axis=-1).pow(2) b_s = paddle.norm(b, p=2, axis=-1).pow(2) dist_score = -2 * paddle.bmm(a, b.transpose( [0, 2, 1])) + a_s.unsqueeze(-1) dist_score = paddle.sqrt(paddle.clip(dist_score, min=1e-30)) return dist_score
def run_graph(self, p, axis, shape_x, dtype): paddle.disable_static() shape = [2, 3, 4] np_input = np.arange(24).astype('float32') - 12 np_input = np_input.reshape(shape) x = paddle.to_tensor(np_input) #[[[-12. -11. -10. -9.] [ -8. -7. -6. -5.] [ -4. -3. -2. -1.]] # [[ 0. 1. 2. 3.] [ 4. 5. 6. 7.] [ 8. 9. 10. 11.]]] out_pnorm = paddle.norm(x, p=2, axis=-1) # compute frobenius norm along last two dimensions. out_fro = paddle.norm(x, p='fro') out_fro = paddle.norm(x, p='fro', axis=0) out_fro = paddle.norm(x, p='fro', axis=[0, 1]) # compute 2-order norm along [0,1] dimension. out_pnorm = paddle.norm(x, p=2, axis=[0, 1]) out_pnorm = paddle.norm(x, p=2) #out_pnorm = [17.43559577 16.91153453 16.73320053 16.91153453] # compute inf-order norm out_pnorm = paddle.norm(x, p=np.inf) #out_pnorm = [12.] out_pnorm = paddle.norm(x, p=np.inf, axis=0) #out_pnorm = [[0. 1. 2. 3.] [4. 5. 6. 5.] [4. 3. 2. 1.]] # compute -inf-order norm out_pnorm = paddle.norm(x, p=-np.inf) #out_pnorm = [0.] out_pnorm = paddle.norm(x, p=-np.inf, axis=0) # out_fro = [17.43559577 16.91153453 16.73320053 16.91153453] paddle.enable_static()
def test_name(self): with fluid.program_guard(fluid.Program()): x = fluid.data(name="x", shape=[10, 10], dtype="float32") y_1 = paddle.norm(x, p='fro', name='frobenius_name') y_2 = paddle.norm(x, p=2, name='pnorm_name') self.assertEqual(('frobenius_name' in y_1.name), True) self.assertEqual(('pnorm_name' in y_2.name), True)
def forward(self, x, patch_embed_size): """ Forward function. Args: x (Tensor): Input tensor of decoder. patch_embed_size (Tensor): The height and width of the patch embed tensor. Returns: list[Tensor]: Segmentation results. """ x = self.proj_input(x) cls_token = self.cls_token.expand((paddle.shape(x)[0], -1, -1)) x = paddle.concat([x, cls_token], axis=1) for block in self.blocks: x = block(x) x = self.decoder_norm(x) patches, masks = x[:, :-self.num_classes], x[:, -self.num_classes:] patches = self.proj_patch(patches) masks = self.proj_class(masks) patches = patches / paddle.norm(patches, axis=-1, keepdim=True) masks = masks / paddle.norm(masks, axis=-1, keepdim=True) masks = patches @ masks.transpose((0, 2, 1)) masks = masks.reshape( (0, 0, self.num_classes)) # For export inference model masks = self.mask_norm(masks) #[b, (h w), c] -> [b, c, h, w] h, w = patch_embed_size[0], patch_embed_size[1] masks = masks.reshape((0, h, w, paddle.shape(masks)[-1])) masks = masks.transpose((0, 3, 1, 2)) return [masks]
def similarity_matrix(self, embeds): # (N, M, C) speakers_per_batch, utterances_per_speaker, embed_dim = embeds.shape # Inclusive centroids (1 per speaker). Cloning is needed for reverse differentiation centroids_incl = paddle.mean(embeds, axis=1) centroids_incl_norm = paddle.norm(centroids_incl, p=2, axis=1, keepdim=True) normalized_centroids_incl = centroids_incl / centroids_incl_norm # Exclusive centroids (1 per utterance) centroids_excl = paddle.broadcast_to( paddle.sum(embeds, axis=1, keepdim=True), embeds.shape) - embeds centroids_excl /= (utterances_per_speaker - 1) centroids_excl_norm = paddle.norm(centroids_excl, p=2, axis=2, keepdim=True) normalized_centroids_excl = centroids_excl / centroids_excl_norm p1 = paddle.matmul(embeds.reshape([-1, embed_dim]), normalized_centroids_incl, transpose_y=True) # (NMN) p1 = p1.reshape([-1]) # print("p1: ", p1.shape) p2 = paddle.bmm(embeds.reshape([-1, 1, embed_dim]), normalized_centroids_excl.reshape([-1, embed_dim, 1])) # (NM, 1, 1) p2 = p2.reshape([-1]) # (NM) # begin: alternative implementation for scatter with paddle.no_grad(): index = paddle.arange(0, speakers_per_batch * utterances_per_speaker, dtype="int64").reshape([ speakers_per_batch, utterances_per_speaker ]) index = index * speakers_per_batch + paddle.arange( 0, speakers_per_batch, dtype="int64").unsqueeze(-1) index = paddle.reshape(index, [-1]) ones = paddle.ones( [speakers_per_batch * utterances_per_speaker * speakers_per_batch]) zeros = paddle.zeros_like(index, dtype=ones.dtype) mask_p1 = paddle.scatter(ones, index, zeros) p = p1 * mask_p1 + (1 - mask_p1) * paddle.scatter(ones, index, p2) # end: alternative implementation for scatter # p = paddle.scatter(p1, index, p2) p = p * self.similarity_weight + self.similarity_bias # neg p = p.reshape( [speakers_per_batch * utterances_per_speaker, speakers_per_batch]) return p, p1, p2
def forward(self, x1, x2, target): similarity = paddle.fluid.layers.reduce_sum(x1 * x2, dim=-1) / ( paddle.norm(x1, axis=-1) * paddle.norm(x2, axis=-1) + self.epsilon) one_list = paddle.full_like(target, fill_value=1) out = paddle.fluid.layers.reduce_mean( paddle.where( paddle.equal(target, one_list), 1. - similarity, paddle.maximum(paddle.zeros_like(similarity), similarity - self.margin))) return out
def __init__(self, quaternion: paddle.Tensor, translation: paddle.Tensor, rotation=None, normalize=True): """Initialize from quaternion and translation. Args: quaternion: Rotation represented by a quaternion, to be applied before translation. Must be a unit quaternion unless normalize==True. shape (batch, N_res, 4) translation: Translation represented as a vector. (batch, N_res, 3) rotation: Same rotation as the quaternion, represented as a (batch, N_res, 3, 3) tensor. If None, rotation will be calculated from the quaternion. normalize: If True, l2 normalize the quaternion on input. """ if quaternion is not None: assert quaternion.shape[-1] == 4 if normalize and quaternion is not None: q_length = paddle.norm(quaternion, axis=-1) quaternion = quaternion / q_length[..., None] if rotation is None: rotation = quat_to_rot(quaternion) self.quaternion = quaternion self.rotation = rotation self.translation = translation assert rotation.shape[-1] == 3 and rotation.shape[-2] == 3 assert translation.shape[-1] == 3
def group_pixels(ctr, offsets): """ Gives each pixel in the image an instance id. Args: ctr (Tensor): A Tensor of shape [K, 2] where K is the number of center points. The order of second dim is (y, x). offsets (Tensor): A Tensor of shape [2, H, W] of raw offset output, where N is the batch size, for consistent, we only support N=1. The order of second dim is (offset_y, offset_x). Returns: Tensor: A Tensor of shape [1, H, W], ins_id is 1, 2, ... """ height, width = offsets.shape[-2:] y_coord = paddle.arange(height, dtype=offsets.dtype).reshape([1, -1, 1]) y_coord = paddle.concat([y_coord] * width, axis=2) x_coord = paddle.arange(width, dtype=offsets.dtype).reshape([1, 1, -1]) x_coord = paddle.concat([x_coord] * height, axis=1) coord = paddle.concat([y_coord, x_coord], axis=0) ctr_loc = coord + offsets ctr_loc = ctr_loc.reshape((2, height * width)).transpose((1, 0)) # ctr: [K, 2] -> [K, 1, 2] # ctr_loc = [H*W, 2] -> [1, H*W, 2] ctr = ctr.unsqueeze(1) ctr_loc = ctr_loc.unsqueeze(0) # distance: [K, H*W] distance = paddle.norm((ctr - ctr_loc).astype('float32'), axis=-1) # finds center with minimum distance at each location, offset by 1, to reserve id=0 for stuff instance_id = paddle.argmin(distance, axis=0).reshape( (1, height, width)) + 1 return instance_id
def build_inv_delta_C_paddle(self, C): """ Return inv_delta_C which is needed to calculate T """ F = self.F hat_eye = paddle.eye(F, dtype='float64') # F x F hat_C = paddle.norm(C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye hat_C = (hat_C**2) * paddle.log(hat_C) delta_C = paddle.concat( # F+3 x F+3 [ paddle.concat([paddle.ones((F, 1), dtype='float64'), C, hat_C], axis=1), # F x F+3 paddle.concat([ paddle.zeros((2, 3), dtype='float64'), paddle.transpose(C, perm=[1, 0]) ], axis=1), # 2 x F+3 paddle.concat([ paddle.zeros((1, 3), dtype='float64'), paddle.ones((1, F), dtype='float64') ], axis=1) # 1 x F+3 ], axis=0) inv_delta_C = paddle.inverse(delta_C) return inv_delta_C # F+3 x F+3
def build_inv_delta_C_paddle(self, C): """ Return inv_delta_C which is needed to calculate T """ F = self.F hat_C = paddle.zeros((F, F), dtype='float32') # F x F for i in range(0, F): for j in range(i, F): if i == j: hat_C[i, j] = 1 else: r = paddle.norm(C[i] - C[j]) hat_C[i, j] = r hat_C[j, i] = r hat_C = (hat_C**2) * paddle.log(hat_C) delta_C = paddle.concat( # F+3 x F+3 [ paddle.concat([paddle.ones( (F, 1)), C, hat_C], axis=1), # F x F+3 paddle.concat( [paddle.zeros((2, 3)), paddle.transpose(C, perm=[1, 0])], axis=1), # 2 x F+3 paddle.concat([paddle.zeros( (1, 3)), paddle.ones((1, F))], axis=1) # 1 x F+3 ], axis=0) inv_delta_C = paddle.inverse(delta_C) return inv_delta_C # F+3 x F+3
def forward(self, input, label): # lambda = max(lambda_min,base*(1+gamma*iteration)^(-power)) self.iter += 1 self.lamb = max( self.LambdaMin, self.base * (1 + self.gamma * self.iter)**(-1 * self.power)) # --------------------------- cos(theta) & phi(theta) --------------------------- self.linear.weight.Tensor = F.normalize(self.linear.weight) x = F.normalize(input) cos_theta = self.linear(x) cos_theta = cos_theta.clip(min=-1, max=1) cos_m_theta = self.mlambda[self.m](cos_theta) theta = cos_theta.acos() k = paddle.floor(self.m * theta / 3.14159265) phi_theta = paddle.to_tensor(((-1.0)**k) * cos_m_theta - 2 * k) NormOfFeature = paddle.norm(input, p=2, axis=1) # --------------------------- convert label to one-hot --------------------------- one_hot = F.one_hot(label, num_classes=phi_theta.shape[1]) one_hot = paddle.reshape(one_hot, (phi_theta.shape[0], phi_theta.shape[1])) # --------------------------- Calculate output --------------------------- output = (one_hot * (phi_theta - cos_theta) / (1 + self.lamb)) + cos_theta output *= NormOfFeature.reshape((-1, 1)) return output
def model(self, x, w, bias, opt): paddle.seed(0) place = paddle.CPUPlace() if paddle.device.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) exe = paddle.static.Executor(place) main = paddle.static.Program() startup = paddle.static.Program() with paddle.static.program_guard(main, startup): input_x = paddle.static.data('x', x.shape, dtype=x.dtype) input_x.stop_gradient = False params_w = paddle.static.create_parameter(shape=w.shape, dtype=w.dtype, is_bias=False) params_bias = paddle.static.create_parameter(shape=bias.shape, dtype=bias.dtype, is_bias=True) y = paddle.tanh(paddle.matmul(input_x, params_w) + params_bias) loss = paddle.norm(y, p=2) opt = opt _, grads = opt.minimize(loss) if prim_enabled(): prim2orig(main.block(0)) exe.run(startup) grads = exe.run(main, feed={ 'x': x, 'w': w, 'bias': bias }, fetch_list=grads) return grads
def _build_volume_2d3(self, feat_l, feat_r, maxdisp, disp, stride=1): """ output residual map L1 distance-based cost """ size = feat_l.shape disp = paddle.unsqueeze(disp, axis=1) batch_disp = paddle.expand(disp, shape=[disp.shape[0], maxdisp * 2 - 1, disp.shape[-3], disp.shape[-2], disp.shape[-1]]) batch_disp = batch_disp.reshape(shape=[-1, 1, size[-2], size[-1]]) batch_shift = paddle.arange(-maxdisp + 1, maxdisp, dtype="float32") batch_shift = paddle.expand(batch_shift, shape=[size[0], batch_shift.shape[0]]).reshape(shape=[-1]).unsqueeze( axis=[1, 2, 3]) * stride batch_disp = batch_disp - batch_shift batch_feat_l = paddle.unsqueeze(feat_l, axis=1).expand( shape=[size[0], maxdisp * 2 - 1, size[-3], size[-2], size[-1]]).reshape( shape=[-1, size[-3], size[-2], size[-1]]) batch_feat_r = paddle.unsqueeze(feat_r, axis=1).expand( shape=[size[0], maxdisp * 2 - 1, size[-3], size[-2], size[-1]]).reshape( shape=[-1, size[-3], size[-2], size[-1]]) cost = paddle.norm(batch_feat_l - self.warp(batch_feat_r, batch_disp), 1, 1) #output residual map cost = cost.reshape(shape=[size[0], -1, size[2], size[3]]) return cost
def forward(self, input, target): """ Args: inputs: feature matrix with shape (batch_size, feat_dim) target: ground truth labels with shape (num_classes) """ inputs = input["features"] if self.normalize_feature: inputs = 1. * inputs / (paddle.expand_as( paddle.norm(inputs, p=2, axis=-1, keepdim=True), inputs) + 1e-12) bs = inputs.shape[0] # compute distance dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs]) dist = dist + dist.t() dist = paddle.addmm(input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0) dist = paddle.clip(dist, min=1e-12).sqrt() # hard negative mining is_pos = paddle.expand(target, (bs, bs)).equal( paddle.expand(target, (bs, bs)).t()) is_neg = paddle.expand(target, (bs, bs)).not_equal( paddle.expand(target, (bs, bs)).t()) # `dist_ap` means distance(anchor, positive) ## both `dist_ap` and `relative_p_inds` with shape [N, 1] ''' dist_ap, relative_p_inds = paddle.max( paddle.reshape(dist[is_pos], (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an, relative_n_inds = paddle.min( paddle.reshape(dist[is_neg], (bs, -1)), axis=1, keepdim=True) ''' dist_ap = paddle.max(paddle.reshape(paddle.masked_select(dist, is_pos), (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an = paddle.min(paddle.reshape(paddle.masked_select(dist, is_neg), (bs, -1)), axis=1, keepdim=True) # shape [N] dist_ap = paddle.squeeze(dist_ap, axis=1) dist_an = paddle.squeeze(dist_an, axis=1) # Compute ranking hinge loss y = paddle.ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) return {"TripletLossV2": loss}
def forward(self, inputs, bc_index): inputs.stop_gradient = False outputs = self.net.nn_func(inputs) # eq_loss hes = Hessian(self.net.nn_func, inputs, is_batched=True) eq_loss = paddle.norm(hes[:, 0, 0] + hes[:, 1, 1], p=2) # bc_loss bc_u = paddle.index_select(outputs, bc_index) return eq_loss, bc_u
def forward(self, inputs): """ forward """ x = paddle.norm( inputs, p=self.config["p"], axis=self.config["axis"], keepdim=self.config["keepdim"]) return x
def run_pnorm(self, p, axis, shape_x, dtype): with fluid.program_guard(fluid.Program()): data = fluid.data(name="X", shape=shape_x, dtype=dtype) out = paddle.norm(input=data, p=p, axis=axis) place = fluid.CPUPlace() exe = fluid.Executor(place) np_input = (np.random.rand(*shape_x) + 1.0).astype(dtype) expected_result = p_norm(np_input, porder=p, axis=axis).astype(dtype) result, = exe.run(feed={"X": np_input}, fetch_list=[out]) self.assertEqual((np.abs(result - expected_result) < 1e-6).all(), True)
def _build_volume_2d(self, feat_l, feat_r, maxdisp, stride=1): """ output full disparity map L1 distance-based cost """ assert maxdisp % stride == 0 cost = paddle.zeros((feat_l.shape[0], maxdisp // stride, feat_l.shape[2], feat_l.shape[3]), dtype='float32') cost.stop_gradient=False for i in range(0, maxdisp, stride): if i > 0: cost[:, i // stride, :, :i] = feat_l[:, :, :, :i].abs().sum(axis=1) #occlusion regions cost[:, i // stride, :, i:] = paddle.norm(feat_l[:, :, :, i:] - feat_r[:, :, :, :-i], 1, 1) else: cost[:, i // stride, :, i:] = paddle.norm(feat_l[:, :, :, :] - feat_r[:, :, :, :], 1, 1) return cost
def run_out(self, p, axis, shape_x, shape_y, dtype): with fluid.program_guard(fluid.Program()): data1 = fluid.data(name="X", shape=shape_x, dtype=dtype) data2 = fluid.data(name="Y", shape=shape_y, dtype=dtype) out = paddle.norm(input=data1, p=p, axis=axis, out=data2) place = fluid.CPUPlace() exe = fluid.Executor(place) result = exe.run(feed={"X": np.random.rand(*shape_x).astype(dtype)}, fetch_list=[data2, out]) self.assertEqual((result[0] == result[1]).all(), True)
def attack(self, epsilon=1., emb_name='emb'): # emb_name这个参数要换成你模型中embedding的参数名 for name, param in self.model.named_parameters(): if not param.stop_gradient and emb_name in name: # 检验参数是否可训练及范围 self.backup[name] = param.numpy() # 备份原有参数值 grad_tensor = paddle.to_tensor( param.grad) # param.grad是个numpy对象 norm = paddle.norm(grad_tensor) # norm化 if norm != 0: r_at = epsilon * grad_tensor / norm param.add(r_at) # 在原有embed值上添加向上梯度干扰
def forward(self, x): x = self.conv1(x) x = F.relu(x) x = self.conv2(x) x = F.relu(x) x = self.conv3(x) x = F.relu(x) x = self.gloabl_pool(x) x = paddle.squeeze(x, axis=[2, 3]) x = self.fc1(x) x = x / paddle.norm(x, axis=1, keepdim=True) return x
def build_P_hat_paddle(self, C, P): F = self.F eps = self.eps n = P.shape[0] P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1)) C_tile = paddle.unsqueeze(C, axis=0) P_diff = P_tile - C_tile rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False) rbf = paddle.multiply(paddle.square(rbf_norm), paddle.log(rbf_norm + eps)) P_hat = paddle.concat([paddle.ones((n, 1)), P, rbf], axis=1) return P_hat
def make_program_serial(): main_program = paddle.fluid.Program() start_program = paddle.fluid.Program() with paddle.static.program_guard(main_program, start_program): x = paddle.static.data(name='x', shape=[4, 5, 6], dtype='float32') x.stop_gradient = False auto.shard_tensor(x, dist_attr={ "process_mesh": auto.ProcessMesh([0]), "dims_mapping": [-1, -1, -1] }) tmp_0 = paddle.norm(x, p=2) return main_program, start_program, tmp_0
def __init__(self, in_features, out_features, m=0.35, s=30.0): super(Am_softmax, self).__init__() self.in_features = in_features self.out_features = out_features self.m = m self.s = s weight_arr = paddle.ParamAttr( initializer=paddle.nn.initializer.XavierUniform()) self.linear = paddle.nn.Linear(in_features, out_features, weight_attr=weight_arr) self.linear.weight.Tensor = paddle.norm( self.linear.weight, p=2, axis=1).clip(max=1e-5) * 1e5
def get_score(self, head, rel, tail): re_head, im_head = paddle.chunk(head, chunks=2, axis=-1) re_tail, im_tail = paddle.chunk(tail, chunks=2, axis=-1) phase_rel = rel / (self.emb_init / np.pi) re_rel, im_rel = paddle.cos(phase_rel), paddle.sin(phase_rel) re_score = re_rel * re_tail + im_rel * im_tail im_score = re_rel * im_tail - im_rel * re_tail re_score = re_score - re_head im_score = im_score - im_head score = paddle.stack([re_score, im_score], axis=0) score = self.gamma - paddle.sum(paddle.norm(score, p=2, axis=0), axis=-1) return score
def cal_gradient_penalty(netD, real_data, fake_data, edge_data=None, type='mixed', constant=1.0, lambda_gp=10.0): if lambda_gp > 0.0: if type == 'real': # either use real images, fake images, or a linear interpolation of two. interpolatesv = real_data elif type == 'fake': interpolatesv = fake_data elif type == 'mixed': alpha = paddle.rand((real_data.shape[0], 1)) alpha = paddle.expand( alpha, [1, np.prod(real_data.shape) // real_data.shape[0]]) alpha = paddle.reshape(alpha, real_data.shape) interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) else: raise NotImplementedError('{} not implemented'.format(type)) # interpolatesv.requires_grad_(True) interpolatesv.stop_gradient = False real_data.stop_gradient = True fake_AB = paddle.concat((real_data.detach(), interpolatesv), 1) disc_interpolates = netD(fake_AB) # FIXME: use paddle.ones outs = paddle.fill_constant(disc_interpolates.shape, disc_interpolates.dtype, 1.0) gradients = paddle.imperative.grad( outputs=disc_interpolates, inputs=fake_AB, grad_outputs=outs, # paddle.ones(list(disc_interpolates.shape)), create_graph=True, retain_graph=True, only_inputs=True, # no_grad_vars=set(netD.parameters()) ) gradients = paddle.reshape(gradients[0], [real_data.shape[0], -1]) # flat the data gradient_penalty = paddle.reduce_mean( (paddle.norm(gradients + 1e-16, 2, 1) - constant)** 2) * lambda_gp # added eps return gradient_penalty, gradients else: return 0.0, None
def run_fro(self, p, axis, shape_x, dtype, keep_dim, check_dim=False): with fluid.program_guard(fluid.Program()): data = fluid.data(name="X", shape=shape_x, dtype=dtype) out = paddle.norm(x=data, p=p, axis=axis, keepdim=keep_dim) place = fluid.CPUPlace() exe = fluid.Executor(place) np_input = (np.random.rand(*shape_x) + 1.0).astype(dtype) expected_result = frobenius_norm(np_input, axis=axis, keepdims=keep_dim) result, = exe.run(feed={"X": np_input}, fetch_list=[out]) self.assertEqual((np.abs(result - expected_result) < 1e-6).all(), True) if keep_dim and check_dim: self.assertEqual( (np.abs(np.array(result.shape) - np.array(expected_result.shape)) < 1e-6).all(), True)
def build_P_hat_paddle(self, C, P): F = self.F eps = self.eps n = P.shape[0] # n (= self.I_r_width x self.I_r_height) # P_tile: n x 2 -> n x 1 x 2 -> n x F x 2 P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1)) C_tile = paddle.unsqueeze(C, axis=0) # 1 x F x 2 P_diff = P_tile - C_tile # n x F x 2 # rbf_norm: n x F rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False) # rbf: n x F rbf = paddle.multiply(paddle.square(rbf_norm), paddle.log(rbf_norm + eps)) P_hat = paddle.concat([paddle.ones((n, 1)), P, rbf], axis=1) return P_hat # n x F+3
def forward(self, item_his_emb, seq_len): """forward Args: item_his_emb : [B, seqlen, dim] seq_len : [B, 1] """ batch_size = item_his_emb.shape[0] seq_len_tile = paddle.tile(seq_len, [1, self.k_max]) mask = self.sequence_mask(seq_len_tile, self.maxlen) pad = paddle.ones_like(mask, dtype="float32") * (-2**32 + 1) # S*e low_capsule_new = paddle.matmul(item_his_emb, self.bilinear_mapping_matrix) low_capsule_new_nograd = paddle.assign(low_capsule_new) low_capsule_new_nograd.stop_gradient = True B = paddle.tile(self.routing_logits, [paddle.shape(item_his_emb)[0], 1, 1]) for i in range(self.iters - 1): B_mask = paddle.where(mask, B, pad) # print(B_mask) W = F.softmax(B_mask, axis=1) high_capsule_tmp = paddle.matmul(W, low_capsule_new_nograd) high_capsule = self.squash(high_capsule_tmp) B_delta = paddle.matmul(high_capsule, low_capsule_new_nograd, transpose_y=True) B += B_delta / paddle.maximum( paddle.norm(B_delta, p=2, axis=-1, keepdim=True), paddle.ones_like(B_delta)) B_mask = paddle.where(mask, B, pad) W = F.softmax(B_mask, axis=1) # paddle.static.Print(W) high_capsule_tmp = paddle.matmul(W, low_capsule_new) # high_capsule_tmp.stop_gradient = False high_capsule = self.squash(high_capsule_tmp) # high_capsule.stop_gradient = False return high_capsule, W, seq_len
def cal_gradient_penalty(self, netD, real_data, fake_data, edge_data=None, type='mixed', constant=1.0, lambda_gp=10.0): if lambda_gp > 0.0: if type == 'real': interpolatesv = real_data elif type == 'fake': interpolatesv = fake_data elif type == 'mixed': alpha = paddle.rand((real_data.shape[0], 1)) alpha = paddle.expand(alpha, [ real_data.shape[0], np.prod(real_data.shape) // real_data.shape[0] ]) alpha = paddle.reshape(alpha, real_data.shape) interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) else: raise NotImplementedError('{} not implemented'.format(type)) interpolatesv.stop_gradient = False real_data.stop_gradient = True fake_AB = paddle.concat((real_data.detach(), interpolatesv), 1) disc_interpolates = netD(fake_AB) outs = paddle.fluid.layers.fill_constant(disc_interpolates.shape, disc_interpolates.dtype, 1.0) gradients = paddle.grad(outputs=disc_interpolates, inputs=fake_AB, grad_outputs=outs, create_graph=True, retain_graph=True, only_inputs=True) gradients = paddle.reshape(gradients[0], [real_data.shape[0], -1]) gradient_penalty = paddle.mean( (paddle.norm(gradients + 1e-16, 2, 1) - constant)** 2) * lambda_gp # added eps return gradient_penalty, gradients else: return 0.0, None