def test_dynamic_api(self): paddle.disable_static() x = paddle.to_tensor(self.input_x) y = paddle.to_tensor(self.input_y) z = paddle.to_tensor(self.input_z) a = paddle.to_tensor(self.input_a) b = paddle.to_tensor(self.input_b) c = paddle.to_tensor(self.input_c) res = paddle.subtract(x, y) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected1)) # test broadcast res = paddle.subtract(x, z) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected2)) res = paddle.subtract(a, c) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected3)) res = paddle.subtract(b, c) res = res.numpy() self.assertTrue(np.allclose(res, self.np_expected4))
def build_program(): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): with paddle.static.device_guard('cpu'): data = paddle.ones([4, 64], dtype='float32', name='data') # data -> [memcpy_h2d] -> data' -> [matmul] -> out ->[add] -> add_out with paddle.static.device_guard('gpu'): weight = paddle.randn([64, 64], name='weight') # gpu matmul_out = paddle.matmul(data, weight, name='matmul_out') # gpus bias = paddle.ones([4, 64], dtype='float32', name='bias') add_out = paddle.add(matmul_out, bias, name='add_out') # add_out -> [memcpy_d2h] -> add_out' -> [sub] -> sub_out -> [tanh] -> tanh_out with paddle.static.device_guard('cpu'): sub_out = paddle.subtract(add_out, data, name='sub_out') tanh_out = paddle.tanh(sub_out, name='tanh_out') with paddle.static.device_guard('gpu'): bias_1 = paddle.add(bias, sub_out, name='bias_1') out_before = paddle.tanh(bias_1, name='out_before') out_last = paddle.subtract(tanh_out, data, name='out_last') out = paddle.add(out_before, out_last, name='out') mean = paddle.mean(out, name='mean_out') return main_program, startup_program, [mean]
def test_static_api(self): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") data_y = paddle.static.data("y", shape=[10, 15], dtype="float32") result_max = paddle.subtract(data_x, data_y) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "x": self.input_x, "y": self.input_y }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected1)) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_x = paddle.static.data("x", shape=[10, 15], dtype="float32") data_z = paddle.static.data("z", shape=[15], dtype="float32") result_max = paddle.subtract(data_x, data_z) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "x": self.input_x, "z": self.input_z }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected2)) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_a = paddle.static.data("a", shape=[3], dtype="int64") data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.subtract(data_a, data_c) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "a": self.input_a, "c": self.input_c }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected3)) with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): data_b = paddle.static.data("b", shape=[3], dtype="int64") data_c = paddle.static.data("c", shape=[3], dtype="int64") result_max = paddle.subtract(data_b, data_c) exe = paddle.static.Executor(self.place) res, = exe.run(feed={ "b": self.input_b, "c": self.input_c }, fetch_list=[result_max]) self.assertTrue(np.allclose(res, self.np_expected4))
def test_name(self): with paddle.static.program_guard(paddle.static.Program()): x = paddle.static.data(name="x", shape=[2, 3], dtype="float32") y = paddle.static.data(name='y', shape=[2, 3], dtype='float32') y_1 = paddle.subtract(x, y, name='add_res') self.assertEqual(('add_res' in y_1.name), True)
def net(self, inputs, is_infer=False): pyramid_model = MatchPyramidLayer( self.emb_path, self.vocab_size, self.emb_size, self.kernel_num, self.conv_filter, self.conv_act, self.hidden_size, self.out_size, self.pool_size, self.pool_stride, self.pool_padding, self.pool_type, self.hidden_act) prediction = pyramid_model(inputs) if is_infer: self._infer_results["prediction"] = prediction return pos = paddle.slice( prediction, axes=[0, 1], starts=[0, 0], ends=[64, 1]) neg = paddle.slice( prediction, axes=[0, 1], starts=[64, 0], ends=[128, 1]) loss_part1 = paddle.subtract( paddle.full( shape=[64, 1], fill_value=1.0, dtype='float32'), pos) loss_part2 = paddle.add(loss_part1, neg) loss_part3 = paddle.maximum( paddle.full( shape=[64, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) self._cost = avg_cost
def test_static(self): with paddle.static.program_guard(paddle.static.Program()): x_np = np.array([2, 3, 4]).astype('float32') y_np = np.array([1, 5, 2]).astype('float32') x = paddle.static.data(name="x", shape=[3], dtype='float32') y = paddle.static.data(name="y", shape=[3], dtype='float32') x_reshape = paddle.reshape(x, [3, 1]) y_reshape = paddle.reshape(y, [3, 1]) z = paddle.subtract(x_reshape, y_reshape) z = paddle.reshape(z, shape=[3]) place = paddle.NPUPlace(0) exe = paddle.static.Executor(place) x_value, y_value, z_value = exe.run(feed={ "x": x_np, "y": y_np }, fetch_list=[x, y, z]) z_expected = np.array([1., -2., 2.]) self.assertEqual( (x_value == x_np).all(), True, msg="x_value = {}, but expected {}".format(x_value, x_np)) self.assertEqual( (y_value == y_np).all(), True, msg="y_value = {}, but expected {}".format(y_value, y_np)) self.assertEqual((z_value == z_expected).all(), True, msg="z_value = {}, but expected {}".format( z_value, z_expected))
def forward(self, x, y): if in_dygraph_mode(): sub = _C_ops.elementwise_sub(x, y) return _C_ops.final_state_p_norm(sub, self.p, 1, self.epsilon, self.keepdim, False) if _in_legacy_dygraph(): sub = _C_ops.elementwise_sub(x, y) return _C_ops.p_norm(sub, 'axis', 1, 'porder', self.p, 'keepdim', self.keepdim, 'epsilon', self.epsilon) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'PairwiseDistance') check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'PairwiseDistance') sub = paddle.subtract(x, y) helper = LayerHelper("PairwiseDistance", name=self.name) attrs = { 'axis': 1, 'porder': self.p, 'keepdim': self.keepdim, 'epsilon': self.epsilon, } out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type='p_norm', inputs={'X': sub}, outputs={'Out': out}, attrs=attrs) return out
def net(self, input, is_infer=False): pyramid_model = MatchPyramidLayer( self.emb_path, self.vocab_size, self.emb_size, self.kernel_num, self.conv_filter, self.conv_act, self.hidden_size, self.out_size, self.pool_size, self.pool_stride, self.pool_padding, self.pool_type, self.hidden_act) prediction = pyramid_model(input) if is_infer: fetch_dict = {'prediction': prediction} return fetch_dict # calculate hinge loss pos = paddle.slice(prediction, axes=[0, 1], starts=[0, 0], ends=[64, 1]) neg = paddle.slice(prediction, axes=[0, 1], starts=[64, 0], ends=[128, 1]) loss_part1 = paddle.subtract( paddle.full(shape=[64, 1], fill_value=1.0, dtype='float32'), pos) loss_part2 = paddle.add(loss_part1, neg) loss_part3 = paddle.maximum( paddle.full(shape=[64, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) self.inference_target_var = avg_cost self._cost = avg_cost fetch_dict = {'cost': avg_cost} return fetch_dict
def _layer_sub(inputs, node): """ layer_sub, input(-1, emb_size), node(-1, n, emb_size) """ input_re = paddle.unsqueeze(inputs, axis=[1]) sub_res = paddle.subtract(input_re, node) return sub_res
def softmax_with_cross_entropy(self, shard_logit, shard_one_hot): shard_max = paddle.max(shard_logit, axis=1, keepdim=True) global_max = shard_max paddle.distributed.all_reduce(global_max, op=paddle.distributed.ReduceOp.MAX) shard_logit_new = paddle.subtract(shard_logit, global_max) shard_exp = paddle.exp(shard_logit_new) shard_demon = paddle.sum(shard_exp, axis=1, keepdim=True) global_demon = shard_demon paddle.distributed.all_reduce(global_demon, op=paddle.distributed.ReduceOp.SUM) global_log_demon = paddle.log(global_demon) shard_log_prob = shard_logit_new - global_log_demon shard_prob = paddle.exp(shard_log_prob) target_log_prob = paddle.min(shard_log_prob * shard_one_hot, axis=1, keepdim=True) shard_loss = paddle.scale(target_log_prob, scale=-1.0) #TODO paddle.distributed.reducescatter not found global_loss = paddle.fluid.layers.collective._c_reducescatter( shard_loss, nranks=self.nranks, use_calc_stream=True) return global_loss, shard_prob
def forward(self): input = self.input('Input', 0) im_info = self.input('ImInfo', 0) im_info = paddle.reshape(im_info, shape=[3]) h, w, s = paddle.tensor.split(im_info, axis=0, num_or_sections=3) tensor_one = paddle.full(shape=[1], dtype='float32', fill_value=1.0) tensor_zero = paddle.full(shape=[1], dtype='float32', fill_value=0.0) h = paddle.subtract(h, tensor_one) w = paddle.subtract(w, tensor_one) xmin, ymin, xmax, ymax = paddle.tensor.split(input, axis=-1, num_or_sections=4) xmin = paddle.maximum(paddle.minimum(xmin, w), tensor_zero) ymin = paddle.maximum(paddle.minimum(ymin, h), tensor_zero) xmax = paddle.maximum(paddle.minimum(xmax, w), tensor_zero) ymax = paddle.maximum(paddle.minimum(ymax, h), tensor_zero) cliped_box = paddle.concat([xmin, ymin, xmax, ymax], axis=-1) return {'Output': [cliped_box]}
def create_loss(batch_size, margin, cos_pos, cos_neg): loss_part1 = paddle.subtract( paddle.full(shape=[batch_size, 1], fill_value=margin, dtype='float32'), cos_pos) loss_part2 = paddle.add(loss_part1, cos_neg) loss_part3 = paddle.maximum( paddle.full(shape=[batch_size, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) return avg_cost
def create_loss(prediction): pos = paddle.slice(prediction, axes=[0, 1], starts=[0, 0], ends=[64, 1]) neg = paddle.slice(prediction, axes=[0, 1], starts=[64, 0], ends=[128, 1]) loss_part1 = paddle.subtract( paddle.full(shape=[64, 1], fill_value=1.0, dtype='float32'), pos) loss_part2 = paddle.add(loss_part1, neg) loss_part3 = paddle.maximum( paddle.full(shape=[64, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) return avg_cost
def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.random(size=(32, 32)).astype('float32') b_np = np.random.random(size=(32, 32)).astype('float32') label_np = np.random.randint(2, size=(32, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') label = paddle.static.data(name="label", shape=[32, 1], dtype='int64') sum = paddle.add(a, b) c = paddle.assign(b) z = paddle.subtract(sum, c) fc_1 = fluid.layers.fc(input=z, size=128) prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.reduce_mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) for epoch in range(100): pred_res, loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np, "label": label_np }, fetch_list=[prediction, loss]) if epoch % 10 == 0: print("Epoch {} | Prediction[0]: {}, Loss: {}".format( epoch, pred_res[0], loss_res)) return pred_res, loss_res
def _margin_softmax(input, label, out_dim, param_attr, margin1, margin2, margin3, scale, sample_ratio): input_norm = paddle.sqrt( paddle.sum(paddle.square(input), axis=1, keepdim=True)) input = paddle.divide(input, input_norm) if param_attr is None: param_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.XavierNormal(fan_in=0.0)) weight = paddle.static.create_parameter( shape=[input.shape[1], out_dim], dtype='float32', name=unique_name.generate('final_fc_w'), attr=param_attr) if sample_ratio < 1.0: # partial fc sample process label, sampled_class_index = class_center_sample( label, out_dim, ratio=sample_ratio, ignore_label=-1) sampled_class_index.stop_gradient = True weight = paddle.gather(weight, sampled_class_index, axis=1) out_dim = paddle.shape(sampled_class_index) weight_norm = paddle.sqrt( paddle.sum(paddle.square(weight), axis=0, keepdim=True)) weight = paddle.divide(weight, weight_norm) cos = paddle.matmul(input, weight) theta = paddle.acos(cos) if margin1 != 1.0: theta = margin1 * theta if margin2 != 0.0: theta = theta + margin2 margin_cos = paddle.cos(theta) if margin3 != 0.0: margin_cos = margin_cos - margin3 one_hot = paddle.nn.functional.one_hot(label, num_classes=out_dim) diff = paddle.multiply(paddle.subtract(margin_cos, cos), one_hot) target_cos = paddle.add(cos, diff) logit = paddle.scale(target_cos, scale=scale) loss, prob = paddle.nn.functional.softmax_with_cross_entropy( logits=logit, label=paddle.reshape(label, (-1, 1)), return_softmax=True) avg_loss = paddle.mean(x=loss) one_hot.stop_gradient = True return avg_loss, prob
def forward(self, predicts, batch): if isinstance(predicts, (list, tuple)): predicts = predicts[-1] predicts = predicts.transpose((1, 0, 2)) N, B, _ = predicts.shape preds_lengths = paddle.to_tensor([N] * B, dtype='int64') labels = batch[1].astype("int32") label_lengths = batch[2].astype('int64') loss = self.loss_func(predicts, labels, preds_lengths, label_lengths) if self.use_focal_loss: weight = paddle.exp(-loss) weight = paddle.subtract(paddle.to_tensor([1.0]), weight) weight = paddle.square(weight) loss = paddle.multiply(loss, weight) loss = loss.mean() return {'loss': loss}
def forward(self, query_input_ids, title_input_ids, query_token_type_ids=None, query_position_ids=None, query_attention_mask=None, title_token_type_ids=None, title_position_ids=None, title_attention_mask=None): query_token_embedding, _ = self.ptm(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) query_token_embedding = self.dropout(query_token_embedding) query_attention_mask = paddle.unsqueeze( (query_input_ids != self.ptm.pad_token_id).astype( self.ptm.pooler.dense.weight.dtype), axis=2) # Set token embeddings to 0 for padding tokens query_token_embedding = query_token_embedding * query_attention_mask query_sum_embedding = paddle.sum(query_token_embedding, axis=1) query_sum_mask = paddle.sum(query_attention_mask, axis=1) query_mean = query_sum_embedding / query_sum_mask title_token_embedding, _ = self.ptm(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) title_token_embedding = self.dropout(title_token_embedding) title_attention_mask = paddle.unsqueeze( (title_input_ids != self.ptm.pad_token_id).astype( self.ptm.pooler.dense.weight.dtype), axis=2) # Set token embeddings to 0 for padding tokens title_token_embedding = title_token_embedding * title_attention_mask title_sum_embedding = paddle.sum(title_token_embedding, axis=1) title_sum_mask = paddle.sum(title_attention_mask, axis=1) title_mean = title_sum_embedding / title_sum_mask sub = paddle.abs(paddle.subtract(query_mean, title_mean)) projection = paddle.concat([query_mean, title_mean, sub], axis=-1) logits = self.classifier(projection) probs = F.softmax(logits) return probs
def __call__(self, predicts, batch): if isinstance(predicts, (list, tuple)): predicts = predicts[-1] B, N = predicts.shape[:2] div = paddle.to_tensor([N]).astype('float32') predicts = nn.functional.softmax(predicts, axis=-1) aggregation_preds = paddle.sum(predicts, axis=1) aggregation_preds = paddle.divide(aggregation_preds, div) length = batch[2].astype("float32") batch = batch[3].astype("float32") batch[:, 0] = paddle.subtract(div, length) batch = paddle.divide(batch, div) loss = self.loss_func(aggregation_preds, batch) return {"loss_ace": loss}
def net(self, input, is_infer=False): """ network""" if is_infer: self.batch_size = envs.get_global_env( "dataset.inferdata.batch_size") else: self.batch_size = envs.get_global_env( "dataset.sample_1.batch_size") tagspace_model = TagspaceLayer(self.vocab_text_size, self.vocab_tag_size, self.emb_dim, self.hid_dim, self.win_size, self.margin, self.neg_size, self.text_len) cos_pos, cos_neg = tagspace_model(input) # calculate hinge loss loss_part1 = paddle.subtract( paddle.full(shape=[self.batch_size, 1], fill_value=self.margin, dtype='float32'), cos_pos) loss_part2 = paddle.add(loss_part1, cos_neg) loss_part3 = paddle.maximum( paddle.full(shape=[self.batch_size, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) less = paddle.cast(paddle.less_than(cos_neg, cos_pos), dtype='float32') label_ones = paddle.full(dtype='float32', shape=[self.batch_size, 1], fill_value=1.0) correct = paddle.sum(less) total = paddle.sum(label_ones) acc = paddle.divide(correct, total) self._cost = avg_cost if is_infer: self._infer_results["acc"] = acc self._infer_results["loss"] = self._cost else: self._metrics["acc"] = acc self._metrics["loss"] = self._cost
def net(self, input, is_infer=False): if is_infer: self.batch_size = self.config.get("runner.infer_batch_size") else: self.batch_size = self.config.get("runner.train_batch_size") tagspace_model = TagspaceLayer(self.vocab_text_size, self.vocab_tag_size, self.emb_dim, self.hid_dim, self.win_size, self.margin, self.neg_size, self.text_len) cos_pos, cos_neg = tagspace_model(input) # calculate hinge loss loss_part1 = paddle.subtract( paddle.full(shape=[self.batch_size, 1], fill_value=self.margin, dtype='float32'), cos_pos) loss_part2 = paddle.add(loss_part1, cos_neg) loss_part3 = paddle.maximum( paddle.full(shape=[self.batch_size, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) less = paddle.cast(paddle.less_than(cos_neg, cos_pos), dtype='float32') label_ones = paddle.full(dtype='float32', shape=[self.batch_size, 1], fill_value=1.0) correct = paddle.sum(less) total = paddle.sum(label_ones) acc = paddle.divide(correct, total) self.inference_target_var = acc if is_infer: fetch_dict = {'ACC': acc} return fetch_dict self._cost = avg_cost fetch_dict = {'cost': avg_cost, 'ACC': acc} return fetch_dict
def net(self, input, is_infer=False): self.q_slots = self._sparse_data_var[0:1] self.pt_slots = self._sparse_data_var[1:2] if not is_infer: self.batch_size = envs.get_global_env( "dataset.dataset_train.batch_size") self.nt_slots = self._sparse_data_var[2:3] inputs = [self.q_slots, self.pt_slots, self.nt_slots] else: self.batch_size = envs.get_global_env( "dataset.dataset_infer.batch_size") inputs = [self.q_slots, self.pt_slots] simnet_model = MultiviewSimnetLayer( self.query_encoder, self.title_encoder, self.query_encode_dim, self.title_encode_dim, self.emb_size, self.emb_dim, self.hidden_size, self.margin, self.query_len, self.pos_len, self.neg_len) cos_pos, cos_neg = simnet_model(inputs, is_infer) if is_infer: self._infer_results['query_pt_sim'] = cos_pos return # pairwise hinge_loss loss_part1 = paddle.subtract( paddle.full(shape=[self.batch_size, 1], fill_value=self.margin, dtype='float32'), cos_pos) loss_part2 = paddle.add(loss_part1, cos_neg) loss_part3 = paddle.maximum( paddle.full(shape=[self.batch_size, 1], fill_value=0.0, dtype='float32'), loss_part2) self._cost = paddle.mean(loss_part3) self.acc = self.get_acc(cos_neg, cos_pos) self._metrics["loss"] = self._cost self._metrics["acc"] = self.acc
def forward(self, similarities_matrix, query_img_id, gallery_img_id, keep_mask): metric_dict = dict() choosen_indices = paddle.argsort(similarities_matrix, axis=1, descending=True) gallery_labels_transpose = paddle.transpose(gallery_img_id, [1, 0]) gallery_labels_transpose = paddle.broadcast_to( gallery_labels_transpose, shape=[ choosen_indices.shape[0], gallery_labels_transpose.shape[1] ]) choosen_label = paddle.index_sample(gallery_labels_transpose, choosen_indices) equal_flag = paddle.equal(choosen_label, query_img_id) if keep_mask is not None: keep_mask = paddle.index_sample(keep_mask.astype('float32'), choosen_indices) equal_flag = paddle.logical_and(equal_flag, keep_mask.astype('bool')) equal_flag = paddle.cast(equal_flag, 'float32') num_rel = paddle.sum(equal_flag, axis=1) num_rel = paddle.greater_than(num_rel, paddle.to_tensor(0.)) num_rel_index = paddle.nonzero(num_rel.astype("int")) num_rel_index = paddle.reshape(num_rel_index, [num_rel_index.shape[0]]) equal_flag = paddle.index_select(equal_flag, num_rel_index, axis=0) #do accumulative sum div = paddle.arange(equal_flag.shape[1]).astype("float32") + 2 minus = paddle.divide(equal_flag, div) auxilary = paddle.subtract(equal_flag, minus) hard_index = paddle.argmax(auxilary, axis=1).astype("float32") all_INP = paddle.divide(paddle.sum(equal_flag, axis=1), hard_index) mINP = paddle.mean(all_INP) metric_dict["mINP"] = mINP.numpy()[0] return metric_dict
def net(self, input, is_infer=False): self.q_slots = [input[0]] self.pt_slots = [input[1]] if not is_infer: self.batch_size = self.config.get("runner.train_batch_size") self.nt_slots = [input[2]] inputs = [self.q_slots, self.pt_slots, self.nt_slots] else: self.batch_size = self.config.get("runner.infer_batch_size") inputs = [self.q_slots, self.pt_slots] simnet_model = MultiviewSimnetLayer( self.query_encoder, self.title_encoder, self.query_encode_dim, self.title_encode_dim, self.emb_size, self.emb_dim, self.hidden_size, self.margin, self.query_len, self.pos_len, self.neg_len) cos_pos, cos_neg = simnet_model(inputs, is_infer) self.inference_target_var = cos_pos if is_infer: fetch_dict = {'query_pt_sim': cos_pos} return fetch_dict loss_part1 = paddle.subtract( paddle.full(shape=[self.batch_size, 1], fill_value=self.margin, dtype='float32'), cos_pos) loss_part2 = paddle.add(loss_part1, cos_neg) loss_part3 = paddle.maximum( paddle.full(shape=[self.batch_size, 1], fill_value=0.0, dtype='float32'), loss_part2) avg_cost = paddle.mean(loss_part3) self._cost = avg_cost self.acc = self.get_acc(cos_neg, cos_pos) fetch_dict = {'Acc': self.acc, 'Loss': avg_cost} return fetch_dict
def forward(self, input_ids=None, token_type_ids=None, position_ids=None, attention_mask=None, query_input_ids=None, query_token_type_ids=None, query_position_ids=None, query_attention_mask=None, title_input_ids=None, title_token_type_ids=None, title_position_ids=None, title_attention_mask=None, seq_lengths=None, labels=None): if self.task != 'text-matching': result = self.model(input_ids, token_type_ids, position_ids, attention_mask) else: query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) if self.task == 'seq-cls': logits = result probs = F.softmax(logits, axis=1) if labels is not None: loss = self.criterion(logits, labels) correct = self.metric.compute(probs, labels) acc = self.metric.update(correct) return probs, loss, {'acc': acc} return probs elif self.task == 'token-cls': logits = result token_level_probs = F.softmax(logits, axis=-1) preds = token_level_probs.argmax(axis=-1) if labels is not None: loss = self.criterion(logits, labels.unsqueeze(-1)) num_infer_chunks, num_label_chunks, num_correct_chunks = \ self.metric.compute(None, seq_lengths, preds, labels) self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) _, _, f1_score = map(float, self.metric.accumulate()) return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs elif self.task == 'text-matching': query_token_embedding = query_result query_token_embedding = self.dropout(query_token_embedding) query_attention_mask = paddle.unsqueeze( (query_input_ids != self.model.pad_token_id).astype( query_token_embedding.dtype), axis=2) query_token_embedding = query_token_embedding * query_attention_mask query_sum_embedding = paddle.sum(query_token_embedding, axis=1) query_sum_mask = paddle.sum(query_attention_mask, axis=1) query_mean = query_sum_embedding / query_sum_mask title_token_embedding = title_result title_token_embedding = self.dropout(title_token_embedding) title_attention_mask = paddle.unsqueeze( (title_input_ids != self.model.pad_token_id).astype( title_token_embedding.dtype), axis=2) title_token_embedding = title_token_embedding * title_attention_mask title_sum_embedding = paddle.sum(title_token_embedding, axis=1) title_sum_mask = paddle.sum(title_attention_mask, axis=1) title_mean = title_sum_embedding / title_sum_mask sub = paddle.abs(paddle.subtract(query_mean, title_mean)) projection = paddle.concat([query_mean, title_mean, sub], axis=-1) logits = self.classifier(projection) probs = F.softmax(logits) if labels is not None: loss = self.criterion(logits, labels) correct = self.metric.compute(probs, labels) acc = self.metric.update(correct) return probs, loss, {'acc': acc} return probs else: sequence_output, pooled_output = result return sequence_output, pooled_output
def test_quant_subtract(self): out_1 = paddle.subtract(self.x, self.y) out_2 = paddle.nn.quant.subtract()(self.x, self.y) self.check(out_1, out_2)
def sub_prim2orig(op, x, y): return paddle.subtract(x, y)
def add_subtract(inputs): return paddle.subtract(paddle.add(inputs, inputs), inputs)
def subtract_wrapper(self, x): return paddle.subtract(x[0], x[1])
def _executed_api(self, x, y, name=None): return paddle.subtract(x, y, name)
def forward(self, x, y, name=None): return paddle.subtract(x, y, name)