def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.uniform(1, 2, [32, 32]).astype('float32') b_np = np.random.uniform(1, 2, [32, 32]).astype('float32') c_np = np.random.uniform(1, 2, [32, 32]).astype('float32') d_np = np.random.uniform(1, 2, [32, 32]).astype('float32') label_np = np.random.randint(2, size=(32, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') c = paddle.static.data(name="c", shape=[32, 32], dtype='float32') d = paddle.static.data(name="d", shape=[32, 32], dtype='float32') label = paddle.static.data(name="label", shape=[32, 1], dtype='int64') e = paddle.multiply(a, b) f = paddle.multiply(c, d) f.stop_gradient = True g = fluid.layers.elementwise_div(e, f) fc_1 = fluid.layers.fc(input=g, size=128) prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.reduce_mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) print("Start run on {}".format(place)) for epoch in range(100): pred_res, loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np, "c": c_np, "d": d_np, "label": label_np }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) return pred_res, loss_res
def forward(self, x): x = self.conv1(x) if self.radix > 1: splited = paddle.split(x, num_or_sections=self.radix, axis=1) gap = paddle.add_n(splited) else: gap = x gap = self.avg_pool2d(gap) gap = self.conv2(gap) atten = self.conv3(gap) atten = self.rsoftmax(atten) if self.radix > 1: attens = paddle.split(atten, num_or_sections=self.radix, axis=1) y = paddle.add_n([ paddle.multiply(split, att) for (att, split) in zip(attens, splited) ]) else: y = paddle.multiply(x, atten) return y
def test_third_order(self): enable_prim() main = paddle.static.Program() startup = paddle.static.Program() with paddle.static.program_guard(main, startup): x = paddle.static.data(name='x', shape=[1], dtype='float32') x2 = paddle.multiply(x, x) x3 = paddle.multiply(x2, x) x4 = paddle.multiply(x3, x) grad1, = paddle.static.gradients([x4], [x]) grad2, = paddle.static.gradients([grad1], [x]) grad3, = paddle.static.gradients([grad2], [x]) prim2orig(main.block(0)) feed = {x.name: np.array([2.]).astype('float32')} fetch_list = [grad3.name] result = [np.array([48.])] place = paddle.CPUPlace() if paddle.device.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) exe = paddle.static.Executor(place) exe.run(startup) outs = exe.run(main, feed=feed, fetch_list=fetch_list) np.allclose(outs, result) disable_prim()
def test_fourth_order(self): enable_prim() main = paddle.static.Program() startup = paddle.static.Program() with paddle.static.program_guard(main, startup): x = paddle.static.data(name='x', shape=[1], dtype='float32') x2 = paddle.multiply(x, x) x3 = paddle.multiply(x2, x) x4 = paddle.multiply(x3, x) x5 = paddle.multiply(x4, x) out = paddle.sqrt(x5 + x4) grad1, = paddle.static.gradients([out], [x]) grad2, = paddle.static.gradients([grad1], [x]) grad3, = paddle.static.gradients([grad2], [x]) grad4, = paddle.static.gradients([grad3], [x]) prim2orig(main.block(0)) feed = { x.name: np.array([2.]).astype('float32'), } fetch_list = [grad4.name] # (3*(-5*x^2-16*x-16))/(16*(x+1)^3.5) result = [np.array([-0.27263762711])] place = paddle.CPUPlace() if paddle.device.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) exe = paddle.static.Executor(place) exe.run(startup) outs = exe.run(main, feed=feed, fetch_list=fetch_list) np.allclose(outs, result) disable_prim()
def forward(self, input, mask=None): """ Args: input (obj: `paddle.Tensor`) of shape (batch, seq_len, input_size): Tensor containing the features of the input sequence. mask (obj: `paddle.Tensor`, optional, defaults to `None`) of shape (batch, seq_len) : Tensor is a bool tensor, whose each element identifies whether the input word id is pad token or not. """ forward_input, backward_input = paddle.chunk(input, chunks=2, axis=2) # elementwise-sum forward_x and backward_x # Shape: (batch_size, max_seq_len, hidden_size) h = paddle.add_n([forward_input, backward_input]) # Shape: (batch_size, hidden_size, 1) att_weight = self.att_weight.tile( repeat_times=(paddle.shape(h)[0], 1, 1)) # Shape: (batch_size, max_seq_len, 1) att_score = paddle.bmm(paddle.tanh(h), att_weight) if mask is not None: # mask, remove the effect of 'PAD' mask = paddle.cast(mask, dtype='float32') mask = mask.unsqueeze(axis=-1) inf_tensor = paddle.full( shape=mask.shape, dtype='float32', fill_value=-INF) att_score = paddle.multiply(att_score, mask) + paddle.multiply( inf_tensor, (1 - mask)) # Shape: (batch_size, max_seq_len, 1) att_weight = F.softmax(att_score, axis=1) # Shape: (batch_size, lstm_hidden_size) reps = paddle.bmm(h.transpose(perm=(0, 2, 1)), att_weight).squeeze(axis=-1) reps = paddle.tanh(reps) return reps, att_weight
def channel_attention(self, *channel_embeddings): """ channel_embeddings_1: (num_user, emb_size) attention_mat: (emb_size, emb_size) attention: (1, emb_size) """ weights = [] for embedding in channel_embeddings: # ((num_user, emb_size) * (emb_size, emb_size)) @ (1, emb_size) = (num_user, emb_size) @ (1, emb_size) # = (num_user, emb_size) -> (num_user, ) weights.append( paddle.sum( paddle.multiply( paddle.matmul(embedding, self.weights["attention_mat"]), self.weights["attention"]), 1)) t = paddle.stack(weights) # (num_user, channel_num) score = F.softmax(paddle.transpose(t, perm=[1, 0])) mixed_embeddings = 0.0 for i in range(len(weights)): # (emb_size, num_user) @ # (num_user, emb_size) @ (num_user, 1) -> (num_user, emb_size) mixed_embeddings += paddle.transpose(paddle.multiply( paddle.transpose(channel_embeddings[i], perm=[1, 0]), paddle.transpose(score, perm=[1, 0])[i]), perm=[1, 0]) return mixed_embeddings, score
def forward(self, input, mask=None): """ Args: input (obj: `paddle.Tensor`) of shape (batch, seq_len, input_size): Tensor containing the features of the input sequence. mask (obj: `paddle.Tensor`, optional, defaults to `None`) of shape (batch, seq_len) : Tensor is a bool tensor, whose each element identifies whether the input word id is pad token or not. """ weight = self.input_weight.tile( repeat_times=(paddle.shape(input)[0], 1, 1)) bias = self.bias.tile(repeat_times=(paddle.shape(input)[0], 1, 1)) # Shape: (batch_size, max_seq_len, hidden_size) word_squish = paddle.bmm(input, weight) + bias att_context_vector = self.att_context_vector.tile( repeat_times=(paddle.shape(input)[0], 1, 1)) # Shape: (batch_size, max_seq_len, 1) att_score = paddle.bmm(word_squish, att_context_vector) if mask is not None: # mask, remove the effect of 'PAD' mask = paddle.cast(mask, dtype='float32') mask = mask.unsqueeze(axis=-1) inf_tensor = paddle.full( shape=paddle.shape(mask), dtype='float32', fill_value=-INF) att_score = paddle.multiply(att_score, mask) + paddle.multiply( inf_tensor, (1 - mask)) att_weight = F.softmax(att_score, axis=1) # Shape: (batch_size, hidden_size) reps = paddle.bmm(input.transpose(perm=(0, 2, 1)), att_weight).squeeze(-1) return reps, att_weight
def forward(self, sparse_inputs, dense_inputs): # -------------------- first order term -------------------- sparse_inputs_concat = paddle.concat(sparse_inputs, axis=1) sparse_emb_one = self.embedding_one(sparse_inputs_concat) dense_emb_one = paddle.multiply(dense_inputs, self.dense_w_one) dense_emb_one = paddle.unsqueeze(dense_emb_one, axis=2) y_first_order = paddle.sum(sparse_emb_one, 1) + paddle.sum( dense_emb_one, 1) # -------------------- second order term -------------------- sparse_embeddings = self.embedding(sparse_inputs_concat) dense_inputs_re = paddle.unsqueeze(dense_inputs, axis=2) dense_embeddings = paddle.multiply(dense_inputs_re, self.dense_w) feat_embeddings = paddle.concat([sparse_embeddings, dense_embeddings], 1) # sum_square part summed_features_emb = paddle.sum(feat_embeddings, 1) # None * embedding_size summed_features_emb_square = paddle.square( summed_features_emb) # None * embedding_size # square_sum part squared_features_emb = paddle.square( feat_embeddings) # None * num_field * embedding_size squared_sum_features_emb = paddle.sum(squared_features_emb, 1) # None * embedding_size y_second_order = 0.5 * paddle.sum( summed_features_emb_square - squared_sum_features_emb, 1, keepdim=True) # None * 1 return y_first_order, y_second_order, feat_embeddings
def forward(self, input, label=None): input_norm = paddle.sqrt( paddle.sum(paddle.square(input), axis=1, keepdim=True)) input = paddle.divide(input, input_norm) weight_norm = paddle.sqrt( paddle.sum(paddle.square(self.weight), axis=0, keepdim=True)) weight = paddle.divide(self.weight, weight_norm) cos = paddle.matmul(input, weight) if not self.training or label is None: return cos sin = paddle.sqrt(1.0 - paddle.square(cos) + 1e-6) cos_m = math.cos(self.margin) sin_m = math.sin(self.margin) phi = cos * cos_m - sin * sin_m th = math.cos(self.margin) * (-1) mm = math.sin(self.margin) * self.margin if self.easy_margin: phi = self._paddle_where_more_than(cos, 0, phi, cos) else: phi = self._paddle_where_more_than(cos, th, phi, cos - mm) one_hot = paddle.nn.functional.one_hot(label, self.class_num) one_hot = paddle.squeeze(one_hot, axis=[1]) output = paddle.multiply(one_hot, phi) + paddle.multiply( (1.0 - one_hot), cos) output = output * self.scale return output
def forward(self, inputs): #deal with features with different length #1. padding to same lenght, make a tensor #2. make a mask tensor with the same shpae with 1 #3. compute output using mask tensor, s.t. output is nothing todo with padding assert (len(inputs) == self.feature_num ), "Input tensor does not contain {} features".format( self.feature_num) att_outs = [] for i in range(len(inputs)): ###1. fc m = getattr(self, "fc_feature{}".format(i)) output_fc = m(inputs[i][0]) output_fc = paddle.tanh(output_fc) ###2. bi_lstm m = getattr(self, "bi_lstm{}".format(i)) lstm_out, _ = m(inputs=output_fc, sequence_length=inputs[i][1]) lstm_dropout = self.dropout(lstm_out) ###3. att_fc m = getattr(self, "att_fc{}".format(i)) lstm_weight = m(lstm_dropout) ###4. softmax replace start, for it's relevant to sum in time step lstm_exp = paddle.exp(lstm_weight) lstm_mask = paddle.mean(inputs[i][2], axis=2) lstm_exp_with_mask = paddle.multiply(x=lstm_exp, y=lstm_mask, axis=0) lstm_sum_with_mask = paddle.sum(lstm_exp_with_mask, axis=1) exponent = -1 lstm_denominator = paddle.pow(lstm_sum_with_mask, exponent) lstm_softmax = paddle.multiply(x=lstm_exp, y=lstm_denominator, axis=0) lstm_weight = lstm_softmax ###softmax replace end lstm_scale = paddle.multiply(x=lstm_dropout, y=lstm_weight, axis=0) ###5. sequence_pool's replace start, for it's relevant to sum in time step lstm_scale_with_mask = paddle.multiply(x=lstm_scale, y=lstm_mask, axis=0) fea_lens = inputs[i][1] fea_len = int(fea_lens[0]) lstm_pool = paddle.sum(lstm_scale_with_mask, axis=1) ###sequence_pool's replace end att_outs.append(lstm_pool) att_out = paddle.concat(att_outs, axis=1) fc_out1 = self.fc_out1(att_out) fc_out1_act = self.relu(fc_out1) fc_out2 = self.fc_out2(fc_out1_act) fc_out2_act = paddle.tanh(fc_out2) fc_logit = self.fc_logit(fc_out2_act) output = self.sigmoid(fc_logit) return fc_logit, output
def __measure_parameterized(self, state, which_qubits, result_desired, theta): r"""进行参数化的测量。 Args: state (Tensor): 输入的量子态 which_qubits (list): 测量作用的量子比特编号 result_desired (str): 期望得到的测量结果 theta (Tensor): 测量运算的参数 Returns: Tensor: 测量坍塌后的量子态 Tensor:测量坍塌得到的概率 str: 测量得到的结果 """ n = self.get_qubit_number() assert len(which_qubits) == len(result_desired), \ "the length of qubits wanted to be measured and the result desired should be same" op_list = [paddle.to_tensor(np.eye(2, dtype=np.complex128))] * n for idx in range(0, len(which_qubits)): i = which_qubits[idx] ele = result_desired[idx] if int(ele) == 0: basis0 = paddle.to_tensor( np.array([[1, 0], [0, 0]], dtype=np.complex128)) basis1 = paddle.to_tensor( np.array([[0, 0], [0, 1]], dtype=np.complex128)) rho0 = multiply(basis0, cos(theta[idx])) rho1 = multiply(basis1, sin(theta[idx])) rho = add(rho0, rho1) op_list[i] = rho elif int(ele) == 1: # rho = diag(concat([cos(theta[idx]), sin(theta[idx])])) # rho = paddle.to_tensor(rho, zeros((2, 2), dtype="float64")) basis0 = paddle.to_tensor( np.array([[1, 0], [0, 0]], dtype=np.complex128)) basis1 = paddle.to_tensor( np.array([[0, 0], [0, 1]], dtype=np.complex128)) rho0 = multiply(basis0, sin(theta[idx])) rho1 = multiply(basis1, cos(theta[idx])) rho = add(rho0, rho1) op_list[i] = rho else: print("cannot recognize the result_desired.") # rho = paddle.to_tensor(ones((2, 2), dtype="float64"), zeros((2, 2), dtype="float64")) measure_operator = paddle.to_tensor(op_list[0]) if n > 1: for idx in range(1, len(op_list)): measure_operator = kron(measure_operator, op_list[idx]) state_measured = matmul(matmul(measure_operator, state), dagger(measure_operator)) prob = real( trace( matmul(matmul(dagger(measure_operator), measure_operator), state))) state_measured = divide(state_measured, prob) return state_measured, prob, result_desired
def forward(self, inputs): fields_wise_embeds_list = inputs # MF module field_wise_vectors = paddle.concat( [ paddle.sum(fields_i_vectors, axis=1, keepdim=True) for fields_i_vectors in fields_wise_embeds_list ], 1) left = [] right = [] for i, j in itertools.combinations(list(range(self.num_fields)), 2): left.append(i) right.append(j) left = paddle.to_tensor(left) right = paddle.to_tensor(right) embeddings_left = paddle.gather(field_wise_vectors, index=left, axis=1) embeddings_right = paddle.gather( field_wise_vectors, index=right, axis=1) embeddings_prod = paddle.multiply(embeddings_left, embeddings_right) field_weighted_embedding = paddle.multiply(embeddings_prod, self.kernel_mf) h_mf = paddle.sum(field_weighted_embedding, axis=1) if self.use_bias: h_mf = h_mf + self.bias_mf # FM module square_of_sum_list = [ paddle.square(paddle.sum(field_i_vectors, axis=1, keepdim=True)) for field_i_vectors in fields_wise_embeds_list ] sum_of_square_list = [ paddle.sum(paddle.multiply(field_i_vectors, field_i_vectors), axis=1, keepdim=True) for field_i_vectors in fields_wise_embeds_list ] field_fm = paddle.concat([ square_of_sum - sum_of_square for square_of_sum, sum_of_square in zip(square_of_sum_list, sum_of_square_list) ], 1) h_fm = paddle.sum(paddle.multiply(field_fm, self.kernel_fm), axis=1) if self.use_bias: h_fm = h_fm + self.bias_fm return h_mf
def _cross_layer(self, input_0, input_x): input_w = paddle.multiply(input_x, self.layer_w) input_w1 = paddle.sum(input_w, axis=1, keepdim=True) input_ww = paddle.multiply(input_0, input_w1) input_layer_0 = paddle.add(input_ww, self.layer_b) input_layer = paddle.add(input_layer_0, input_x) return input_layer, input_w
def create_loss(self, outputs): user_emb, pos_item_emb, neg_item_emb, ss_loss = outputs score = paddle.sum(paddle.multiply(user_emb, pos_item_emb), 1) - paddle.sum( paddle.multiply(user_emb, neg_item_emb), 1) rec_loss = -paddle.sum(paddle.log(F.sigmoid(score) + 10e-8)) ss_loss = ss_loss * 0.01 loss = rec_loss + ss_loss return loss, rec_loss, ss_loss
def bpr_loss(self, users, pos, neg): (users_emb, pos_emb, neg_emb, userEmb0, posEmb0, negEmb0) = self.getEmbedding( users.astype('int32'), pos.astype('int32'), neg.astype('int32')) reg_loss = (1 / 2) * (userEmb0.norm(2).pow(2) + posEmb0.norm(2).pow(2) + negEmb0.norm(2).pow(2)) / float(len(users)) pos_scores = paddle.multiply(users_emb, pos_emb) pos_scores = paddle.sum(pos_scores, axis=1) neg_scores = paddle.multiply(users_emb, neg_emb) neg_scores = paddle.sum(neg_scores, axis=1) loss = paddle.mean( paddle.nn.functional.softplus(neg_scores - pos_scores)) return loss, reg_loss
def _send_func(self, src_feat, dst_feat, edge_feat=None): pairwise_analysis = self.lin1( paddle.multiply(src_feat["src"], dst_feat["dst"])) pairwise_analysis = self.activation(pairwise_analysis) pairwise_analysis = self.lin2(pairwise_analysis) if edge_feat != None: edge_feat_ = paddle.reshape(edge_feat["e_attr"], [-1, 1]) interaction_analysis = paddle.multiply(pairwise_analysis, edge_feat_) else: interaction_analysis = pairwise_analysis return {"msg": interaction_analysis}
def _dice_loss(self, input, target): input = fluid.layers.reshape(input, shape=(fluid.layers.shape(input)[0], -1)) target = fluid.layers.reshape(target, shape=(fluid.layers.shape(target)[0], -1)) target = fluid.layers.cast(target, 'float32') a = fluid.layers.reduce_sum(paddle.multiply(input, target), dim=1) b = fluid.layers.reduce_sum(paddle.multiply(input, input), dim=1) + 0.001 c = fluid.layers.reduce_sum(paddle.multiply(target, target), dim=1) + 0.001 d = paddle.divide((2 * a), paddle.add(b, c)) return 1 - d
def bpr_loss(self, users, pos, neg): users_emb, pos_emb, neg_emb = self.getEmbedding( users.astype('int32'), pos.astype('int32'), neg.astype('int32')) reg_loss = (1 / 2) * (users_emb.norm(2).pow(2) + pos_emb.norm(2).pow(2) + neg_emb.norm(2).pow(2)) / float(len(users)) pos_scores = paddle.multiply(users_emb, pos_emb) pos_scores = paddle.sum(pos_scores, axis=1) neg_scores = paddle.multiply(users_emb, neg_emb) neg_scores = paddle.sum(neg_scores, axis=1) loss = nn.LogSigmoid()(pos_scores - neg_scores) loss = -1 * paddle.mean(loss) return loss, reg_loss
def forward(self, input_data): expert_outputs = [] # task-specific expert part for i in range(0, self.task_num): for j in range(0, self.exp_per_task): linear_out = self._param_expert[i * self.task_num + j]( input_data[i]) expert_output = F.relu(linear_out) expert_outputs.append(expert_output) # shared expert part for i in range(0, self.shared_num): linear_out = self._param_expert[self.exp_per_task * self.task_num + i](input_data[-1]) expert_output = F.relu(linear_out) expert_outputs.append(expert_output) # task gate part outputs = [] for i in range(0, self.task_num): cur_expert_num = self.exp_per_task + self.shared_num linear_out = self._param_gate[i](input_data[i]) cur_gate = F.softmax(linear_out) cur_gate = paddle.reshape(cur_gate, [-1, cur_expert_num, 1]) # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x)) cur_experts = expert_outputs[i * self.exp_per_task:( i + 1) * self.exp_per_task] + expert_outputs[-int( self.shared_num):] expert_concat = paddle.concat(x=cur_experts, axis=1) expert_concat = paddle.reshape( expert_concat, [-1, cur_expert_num, self.expert_size]) cur_gate_expert = paddle.multiply(x=expert_concat, y=cur_gate) cur_gate_expert = paddle.sum(x=cur_gate_expert, axis=1) outputs.append(cur_gate_expert) # shared gate if not self.if_last: cur_expert_num = self.task_num * self.exp_per_task + self.shared_num linear_out = self._param_gate_shared(input_data[-1]) cur_gate = F.softmax(linear_out) cur_gate = paddle.reshape(cur_gate, [-1, cur_expert_num, 1]) cur_experts = expert_outputs expert_concat = paddle.concat(x=cur_experts, axis=1) expert_concat = paddle.reshape( expert_concat, [-1, cur_expert_num, self.expert_size]) cur_gate_expert = paddle.multiply(x=expert_concat, y=cur_gate) cur_gate_expert = paddle.sum(x=cur_gate_expert, axis=1) outputs.append(cur_gate_expert) return outputs
def forward(self, inputs): outputs = self.avg_pool(inputs) outputs = self.conv1(outputs) outputs = F.relu(outputs) outputs = self.conv2(outputs) outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) return paddle.multiply(x=inputs, y=outputs)
def sequence_mask(seq_hidden, mask, mode='zero'): """ Args: seq_hidden (Tensor): NULL mask (Tensor): 1 for un-mask tokens, and 0 for mask tokens. mode (str): zero/-inf/+inf Returns: TODO Raises: NULL """ dtype = seq_hidden.dtype while len(mask.shape) < len(seq_hidden.shape): mask = mask.unsqueeze([-1]) mask = mask.cast(dtype=seq_hidden.dtype) masked = paddle.multiply(seq_hidden, mask) if mode == 'zero': return masked if mode == '-inf': scale_size = +1e5 elif mode == '+inf': scale_size = -1e5 else: raise ValueError( f'mask mode setting error. expect zero/-inf/+inf, but got {mode}') add_mask = paddle.scale(mask - 1, scale=scale_size) masked = paddle.add(masked, add_mask) return masked
def forward(self, center_words, target_words, label=None): """# 定义网络的前向计算逻辑 # center_words是一个tensor(mini-batch),表示中心词 # target_words是一个tensor(mini-batch),表示目标词 # label是一个tensor(mini-batch),表示这个词是正样本还是负样本(用0或1表示) # 用于在训练中计算这个tensor中对应词的同义词,用于观察模型的训练效果""" # 首先,通过embedding_para(self.embedding)参数,将mini-batch中的词转换为词向量 # 这里center_words和eval_words_emb查询的是一个相同的参数 # 而target_words_emb查询的是另一个参数 center_words_emb = self.embedding(center_words) target_words_emb = self.embedding_out(target_words) # 我们通过点乘的方式计算中心词到目标词的输出概率,并通过sigmoid函数估计这个词是正样本还是负样本的概率。 word_sim = paddle.multiply(center_words_emb, target_words_emb) # 向量点乘, 对应元素相乘 word_sim = paddle.sum(word_sim, axis=-1) word_sim = paddle.reshape(word_sim, shape=[-1]) if label is not None: # 通过估计的输出概率定义损失函数,注意我们使用的是binary_cross_entropy_with_logits函数 # 将sigmoid计算和cross entropy合并成一步计算可以更好的优化,所以输入的是word_sim,而不是pred loss = F.binary_cross_entropy_with_logits(word_sim, label) loss = paddle.mean(loss) return loss else: return F.sigmoid(word_sim)
def forward(self, inputs): outputs = self.avg_pool(inputs) outputs = self.conv1(outputs) outputs = F.relu(outputs) outputs = self.conv2(outputs) outputs = F.hard_sigmoid(outputs) return paddle.multiply(x=inputs, y=outputs, axis=0)
def forward(self, inputs): emb = [] # input feature data for data in inputs: feat_emb = self.embedding(data) # sum pooling feat_emb = paddle.sum(feat_emb, axis=1) emb.append(feat_emb) concat_emb = paddle.concat(x=emb, axis=1) ctr_output = concat_emb for n_layer in self._ctr_mlp_layers: ctr_output = n_layer(ctr_output) ctr_out = F.softmax(ctr_output) cvr_output = concat_emb for n_layer in self._cvr_mlp_layers: cvr_output = n_layer(cvr_output) cvr_out = F.softmax(cvr_output) ctr_prop_one = paddle.slice(ctr_out, axes=[1], starts=[1], ends=[2]) cvr_prop_one = paddle.slice(cvr_out, axes=[1], starts=[1], ends=[2]) ctcvr_prop_one = paddle.multiply(x=ctr_prop_one, y=cvr_prop_one) ctcvr_prop = paddle.concat(x=[1 - ctcvr_prop_one, ctcvr_prop_one], axis=1) return ctr_out, ctr_prop_one, cvr_out, cvr_prop_one, ctcvr_prop, ctcvr_prop_one
def _layer_mul(inputs, node): """ layer_mul, input(-1, emb_size), node(-1, n, emb_size) """ input_re = paddle.unsqueeze(inputs, axis=[1]) mul_res = paddle.multiply(input_re, node) return mul_res
def forward(self, inputs): with paddle.no_grad(): eps_in = self._scale_noise(self.epsilon_input.shape) eps_out = self._scale_noise(self.epsilon_output.shape) noise_v = paddle.multiply(eps_in, eps_out).detach() return F.linear(inputs, self.weight + self.sigma_weight * noise_v.t(), self.bias + self.sigma_bias * eps_out.squeeze().t())
def forward(self, inputs): input_emb = self.embedding(inputs[0]) true_emb_w = self.embedding_w(inputs[1]) true_emb_b = self.embedding_b(inputs[1]) input_emb = paddle.squeeze(x=input_emb, axis=[1]) true_emb_w = paddle.squeeze(x=true_emb_w, axis=[1]) true_emb_b = paddle.squeeze(x=true_emb_b, axis=[1]) neg_emb_w = self.embedding_w(inputs[2]) neg_emb_b = self.embedding_b(inputs[2]) neg_emb_b_vec = paddle.reshape(neg_emb_b, shape=[-1, self.neg_num]) true_logits = paddle.add(x=paddle.sum(x=paddle.multiply(x=input_emb, y=true_emb_w), axis=1, keepdim=True), y=true_emb_b) input_emb_re = paddle.reshape(input_emb, shape=[-1, 1, self.emb_dim]) neg_matmul = paddle.matmul(input_emb_re, neg_emb_w, transpose_y=True) neg_matmul_re = paddle.reshape(neg_matmul, shape=[-1, self.neg_num]) neg_logits = paddle.add(x=neg_matmul_re, y=neg_emb_b_vec) return true_logits, neg_logits
def forward(self, sparse_inputs, dense_inputs): sparse_inputs_concat = paddle.concat(sparse_inputs, axis=1) sparse_emb_one = self.embedding_one(sparse_inputs_concat) dense_emb_one = paddle.multiply(dense_inputs, self.dense_w_one) dense_emb_one = paddle.unsqueeze(dense_emb_one, axis=2) y_linear = paddle.sum(sparse_emb_one, 1) + paddle.sum(dense_emb_one, 1) sparse_embeddings = self.embedding(sparse_inputs_concat) dense_inputs_re = paddle.unsqueeze(dense_inputs, axis=2) dense_embeddings = paddle.multiply(dense_inputs_re, self.dense_w) feat_embeddings = paddle.concat([sparse_embeddings, dense_embeddings], 1) return y_linear, feat_embeddings
def forward(self, input_data): expert_outputs = [] for i in range(0, self.expert_num): linear_out = self._param_expert[i](input_data) expert_output = F.relu(linear_out) expert_outputs.append(expert_output) expert_concat = paddle.concat(x=expert_outputs, axis=1) expert_concat = paddle.reshape(expert_concat, [-1, self.expert_num, self.expert_size]) output_layers = [] for i in range(0, self.gate_num): cur_gate_linear = self._param_gate[i](input_data) cur_gate = F.softmax(cur_gate_linear) cur_gate = paddle.reshape(cur_gate, [-1, self.expert_num, 1]) cur_gate_expert = paddle.multiply(x=expert_concat, y=cur_gate) cur_gate_expert = paddle.sum(x=cur_gate_expert, axis=1) cur_tower = self._param_tower[i](cur_gate_expert) cur_tower = F.relu(cur_tower) out = self._param_tower_out[i](cur_tower) out = F.softmax(out) out = paddle.clip(out, min=1e-15, max=1.0 - 1e-15) output_layers.append(out) return output_layers
def test_with_input_lengths(self): mp = self.mp.clone() sp = self.sp rnn1 = self.rnn1 rnn2 = self.rnn2 exe = self.executor scope = self.scope x = np.random.randn(12, 4, 16) if not self.time_major: x = np.transpose(x, [1, 0, 2]) sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) with paddle.fluid.unique_name.guard(): with paddle.static.program_guard(mp, sp): x_data = paddle.data( "input", [-1, -1, 16], dtype=paddle.framework.get_default_dtype()) seq_len = paddle.data("seq_len", [-1], dtype="int64") mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) y, (h, c) = rnn2(x_data, sequence_length=seq_len) y = paddle.multiply(y, mask, axis=0) feed_dict = {x_data.name: x, seq_len.name: sequence_length} with paddle.static.scope_guard(scope): y2, h2, c2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h, c]) np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2, atol=1e-8, rtol=1e-5)