def get_metrics(self, inputs, outputs): """Get metrics.""" metrics = {} tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"]) lm_loss = layers.softmax_with_cross_entropy(logits=tgt_logits, label=inputs["tgt_label"]) need_cal = layers.not_equal( inputs["tgt_label"], layers.fill_constant(shape=[1], dtype="int64", value=1) ) need_cal = layers.cast(need_cal, self.dtype) mean_lm_loss = layers.reduce_sum(lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10) pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_idx"]) nsp_logits = self._get_classifier_output(pooled_out, name="next_sent") nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy( logits=nsp_logits, label=inputs["label"], return_softmax=True) nsp_acc = layers.accuracy(nsp_softmax, inputs["label"]) mean_nsp_loss = layers.mean(nsp_loss) loss = mean_nsp_loss if self.use_mlm: loss = loss + mean_lm_loss metrics["token_lm_loss"] = mean_lm_loss metrics["loss"] = loss metrics["nsp_loss"] = mean_nsp_loss metrics["nsp_acc"] = nsp_acc return metrics
def forward(self, *args, **kwargs): """ Args: start_pos (optional, `Variable` of shape [batch_size]): token index of start of answer span in `context` end_pos (optional, `Variable` of shape [batch_size]): token index of end of answer span in `context` Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None start_logits (`Variable` of shape [batch_size, hidden_size]): output logits of start position, use argmax(start_logit) to get start index end_logits (`Variable` of shape [batch_size, hidden_size]): output logits of end position, use argmax(end_logit) to get end index """ start_pos = kwargs.pop('start_pos', None) end_pos = kwargs.pop('end_pos', None) pooled, encoded = super(ErnieModelForQuestionAnswering, self).forward(*args, **kwargs) encoded = self.dropout(encoded) encoded = self.classifier(encoded) start_logit, end_logits = L.unstack(encoded, axis=-1) if start_pos is not None and end_pos is not None: if len(start_pos.shape) == 1: start_pos = L.unsqueeze(start_pos, axes=[-1]) if len(end_pos.shape) == 1: end_pos = L.unsqueeze(end_pos, axes=[-1]) start_loss = L.softmax_with_cross_entropy(start_logit, start_pos) end_loss = L.softmax_with_cross_entropy(end_logits, end_pos) loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2. else: loss = None return loss, start_logit, end_logits
def _get_metrics(self, inputs, outputs): metrics = {} fc_out = self._calc_logits(enc_out=outputs["enc_out"], seq_pos=inputs["tgt_pos"]) #fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], inputs["tgt_pos"]) lm_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["tgt_pos"]) need_cal = layers.not_equal( inputs["tgt_label"], layers.fill_constant(shape=[1], dtype="int64", value=1)) need_cal = layers.cast(need_cal, self.dtype) mean_lm_loss = layers.reduce_sum( lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10) pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_pos"]) nsp_fc_out = layers.fc(input=pooled_out, size=2, param_attr=fluid.ParamAttr( name="next_sent_fc.w_0", initializer=self.param_initializer), bias_attr="next_sent_fc.b_0") nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy( logits=nsp_fc_out, label=inputs["label"], return_softmax=True) nsp_acc = layers.accuracy(nsp_softmax, inputs["label"]) mean_nsp_loss = layers.mean(nsp_loss) metrics["loss"] = mean_lm_loss + mean_nsp_loss metrics["lm_loss"] = mean_lm_loss metrics["nsp_loss"] = mean_nsp_loss metrics["nsp_acc"] = nsp_acc return metrics
def test_softmax_with_cross_entropy(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[16], dtype='float32') y = layers.data(name='label', shape=[1], dtype='int64') loss, softmax = layers.softmax_with_cross_entropy( x, y, return_softmax=True) self.assertIsNotNone(loss) self.assertIsNotNone(softmax) loss = layers.softmax_with_cross_entropy(x, y) self.assertIsNotNone(loss) print(str(program))
def dynamic(train_data, use_cuda=False, use_parallel_exe=False): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = SEED fluid.default_main_program().random_seed = SEED dy_layer = DygraphLayer() adam = fluid.optimizer.Adam(learning_rate=LR, parameter_list=dy_layer.parameters()) sgd = fluid.optimizer.SGD(learning_rate=LR, parameter_list=dy_layer.parameters()) for epoch in range(EPOCH_NUM): image_data, label = train_data[epoch] var_input = fluid.dygraph.to_variable(image_data) var_label = fluid.dygraph.to_variable(label) hidden, prediction = dy_layer(var_input) if epoch % 2 == 0: cross_entropy_loss = layers.cross_entropy( prediction, var_label) loss = layers.mean(cross_entropy_loss) loss.backward() adam.minimize(loss) else: softmax_loss = layers.softmax_with_cross_entropy( prediction, var_label) loss = layers.mean(softmax_loss) loss.backward() sgd.minimize(loss) dy_layer.clear_gradients() return hidden.numpy(), prediction.numpy(), loss.numpy()
def fn_2(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: loss = layers.softmax_with_cross_entropy(logits=pred, label=label) avg_loss = layers.mean(loss, name='mean_softmax_loss') opt.minimize(avg_loss) return avg_loss
def infer(self, inputs, outputs): """Run model inference. Only support generation now. """ if self.do_generation: return self.generator.inference(self, inputs, outputs) else: tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"]) tgt_lm_loss = layers.softmax_with_cross_entropy( logits=tgt_logits, label=inputs["tgt_label"]) lm_loss = layers.fill_constant_batch_size_like( outputs["enc_out"], [-1], self.dtype, 0) lm_loss = layers.scatter(lm_loss, inputs["tgt_idx"][:, 0], tgt_lm_loss[:, 0], overwrite=False) tokens_num = layers.fill_constant_batch_size_like( outputs["enc_out"], [-1], self.dtype, 0) tgt_tokens_num = layers.fill_constant_batch_size_like( tgt_lm_loss, [-1], self.dtype, 1) tokens_num = layers.scatter(tokens_num, inputs["tgt_idx"][:, 0], tgt_tokens_num, overwrite=False) predictions = { "lm_loss": lm_loss, "tokens_num": tokens_num, "data_id": inputs["data_id"] } return predictions
def loss(ground_true, prediction): # ground_true: [batch_size, seq_len] # prediction: [batch_size, seq_len, vocab_size] ground_true = layers.unsqueeze(ground_true, axes=2) ground_true.stop_gradient = True los = layers.softmax_with_cross_entropy(prediction, ground_true, axis=-1) return los
def get_metrics(self, inputs, outputs): """Get metrics.""" metrics = {} pooled_out = self._get_pooled_output(outputs["enc_out"]) cls_logits = self._get_classifier_output(pooled_out, num_classes=self.num_classes, name="cls") cls_loss, cls_softmax = layers.softmax_with_cross_entropy( logits=cls_logits, label=inputs["label"], return_softmax=True) cls_acc = layers.accuracy(cls_softmax, inputs["label"]) mean_cls_loss = layers.mean(cls_loss) metrics["loss"] = mean_cls_loss metrics["cls_loss"] = mean_cls_loss metrics["cls_acc"] = cls_acc # statistics for recall & precision & f1 if self.num_classes == 2: pred = layers.argmax(cls_softmax, axis=1) label = layers.squeeze(inputs["label"], axes=[1]) metrics["stat_tp"] = layers.reduce_sum( layers.logical_and(pred == 1, label == 1).astype("float32")) metrics["stat_fp"] = layers.reduce_sum( layers.logical_and(pred == 1, label == 0).astype("float32")) metrics["stat_tn"] = layers.reduce_sum( layers.logical_and(pred == 0, label == 0).astype("float32")) metrics["stat_fn"] = layers.reduce_sum( layers.logical_and(pred == 0, label == 1).astype("float32")) return metrics
def forward(self, *args, **kwargs): """ Args: labels (optional, `Variable` of shape [batch_size, seq_len]): ground truth label id for each token Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None logits (`Variable` of shape [batch_size, seq_len, hidden_size]): output logits of classifier """ labels = kwargs.pop('labels', None) pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs) hidden = self.dropout(encoded) # maybe not? logits = self.classifier(hidden) if labels is not None: if len(labels.shape) == 2: labels = L.unsqueeze(labels, axes=[-1]) loss = L.softmax_with_cross_entropy(logits, labels) loss = L.reduce_mean(loss) else: loss = None return loss, logits
def loss(self, predictions, labels): labels = L.softmax(labels) loss = L.softmax_with_cross_entropy(predictions, labels, soft_label=True) loss = L.mean(loss) return loss
def transformer( src_vocab_size, trg_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, dropout_rate, label_smooth_eps, ): enc_inputs = make_all_inputs(encoder_data_input_fields + encoder_util_input_fields) enc_output = wrap_encoder( src_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, dropout_rate, enc_inputs, ) dec_inputs = make_all_inputs(decoder_data_input_fields[:-1] + decoder_util_input_fields) predict = wrap_decoder( trg_vocab_size, max_length, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, dropout_rate, dec_inputs, enc_output, ) # Padding index do not contribute to the total loss. The weights is used to # cancel padding index in calculating the loss. label, weights = make_all_inputs(label_data_input_fields) if label_smooth_eps: label = layers.label_smooth( label=layers.one_hot( input=label, depth=trg_vocab_size), epsilon=label_smooth_eps) cost = layers.softmax_with_cross_entropy( logits=predict, label=label, soft_label=True if label_smooth_eps else False) # cost = layers.softmax_with_cross_entropy(logits=predict, label=gold) weighted_cost = cost * weights sum_cost = layers.reduce_sum(weighted_cost) token_num = layers.reduce_sum(weights) avg_cost = sum_cost / token_num return sum_cost, avg_cost, predict, token_num
def forward(self): """forward""" features_list = [self.gw.node_feat["attr"]] for i in range(self.num_layers): h = gin(self.gw, features_list[i], hidden_size=self.hidden_size, activation="relu", name="gin_%s" % (i), init_eps=0.0, train_eps=self.train_eps) h = fl.batch_norm(h) h = fl.relu(h) features_list.append(h) output = 0 for i, h in enumerate(features_list): pooled_h = pgl.layers.graph_pooling(self.gw, h, self.pool_type) drop_h = fl.dropout(pooled_h, self.dropout_prob, dropout_implementation="upscale_in_train") output += fl.fc(drop_h, size=self.num_class, act=None, param_attr=fluid.ParamAttr(name="final_fc_%s" % (i))) # calculate loss self.loss = fl.softmax_with_cross_entropy(output, self.labels) self.loss = fl.reduce_mean(self.loss) self.acc = fl.accuracy(fl.softmax(output), self.labels)
def forward(self, *args, **kwargs): """ Args: labels (optional, `Variable` of shape [batch_size]): ground truth label id for each sentence Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch if labels not set, returns None logits (`Variable` of shape [batch_size, hidden_size]): output logits of classifier """ labels = kwargs.pop('labels', None) pooled, encoded = super(ErnieModelForSequenceClassification, self).forward(*args, **kwargs) hidden = self.dropout(pooled) logits = self.classifier(hidden) if labels is not None: if len(labels.shape) == 1: labels = L.reshape(labels, [-1, 1]) loss = L.softmax_with_cross_entropy(logits, labels) loss = L.reduce_mean(loss) else: loss = None return loss, logits
def forward(self, enc_inputs, dec_inputs, label, weights): """ forward :param enc_inputs: :param dec_inputs: :param label: :param weights: :return: """ enc_output = self._wrap_encoder_layer(enc_inputs) predict = self._wrap_decoder_layer(dec_inputs, enc_output) if self._label_smooth_eps: label_out = layers.label_smooth(label=layers.one_hot( input=label, depth=self._trg_vocab_size), epsilon=self._label_smooth_eps) cost = layers.softmax_with_cross_entropy( logits=predict, label=label_out, soft_label=True if self._label_smooth_eps else False) weighted_cost = cost * weights sum_cost = layers.reduce_sum(weighted_cost) token_num = layers.reduce_sum(weights) token_num.stop_gradient = True avg_cost = sum_cost / token_num return sum_cost, avg_cost, predict, token_num
def forward(self, is_test=False): """ Build the network. """ graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64") masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) masked_node_repr = layers.gather(node_repr, masked_node_indice) logits = layers.fc(masked_node_repr, size=len(CompoundConstants.atom_num_list), name="masked_node_logits") loss, pred = layers.softmax_with_cross_entropy( logits, masked_node_label, return_softmax=True) loss = layers.reduce_mean(loss) acc = layers.accuracy(pred, masked_node_label) self.graph_wrapper = graph_wrapper self.loss = loss
def loss(self, predictions, labels): ce_loss, probs = L.softmax_with_cross_entropy(logits=predictions, label=labels, return_softmax=True) #L.Print(ce_loss, message='per_example_loss') loss = L.mean(x=ce_loss) return loss
def forward(self, src_ids, *args, **kwargs): tgt_labels = kwargs.pop('tgt_labels', None) tgt_pos = kwargs.pop('tgt_pos', None) encode_only = kwargs.pop('encode_only', False) _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy()) #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy()) if encode_only: return None, None, info elif tgt_labels is None: encoded = self.mlm(encoded) encoded = self.mlm_ln(encoded) logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias output_ids = L.argmax(logits, -1) return output_ids, logits, info else: encoded_2d = L.gather_nd(encoded, tgt_pos) #log.debug('input shape %s' % repr(src_ids.shape)) #log.debug(L.gather_nd(src_ids, tgt_pos).numpy()) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias if len(tgt_labels.shape) == 1: tgt_labels = L.reshape(tgt_labels, [-1, 1]) loss = L.reduce_mean( L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1)) ) return loss, logits_2d, info
def train_program(self, ): label = F.data(name="label", shape=[None, 1], dtype="int64") train_idx = F.data(name='train_idx', shape=[None], dtype="int64") prediction = L.gather(self.out_feat, train_idx, overwrite=False) label = L.gather(label, train_idx, overwrite=False) cost = L.softmax_with_cross_entropy(logits=prediction, label=label) avg_cost = L.mean(cost) self.avg_cost = avg_cost
def test_softmax_with_cross_entropy(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[16], dtype='float32') y = layers.data(name='label', shape=[1], dtype='int64') loss = layers.softmax_with_cross_entropy(x, y) self.assertIsNotNone(loss) print(str(program))
def forward(self, *inputs, **kwargs): labels = kwargs.pop('labels', None) logits = super(MoCo, self).forward(*inputs, **kwargs) if len(labels.shape) == 1: labels = L.reshape(labels, [-1, 1]) loss = L.softmax_with_cross_entropy(logits, labels) loss = L.reduce_mean(loss) return loss, logits
def matrixwise_loss(self): """listwise model""" self.logits = L.matmul( self.query_repr, self.poi_repr, transpose_y=True) self.score = L.softmax(self.logits) self.loss = L.softmax_with_cross_entropy(self.logits, self.labels) self.loss = L.reduce_mean(self.loss) self.acc = L.accuracy(L.softmax(self.logits), self.labels) self.metrics = [self.loss, self.acc]
def loss(self, predictions, labels): logits, input_seqlen = predictions logits = L.flatten(logits, axis=2) labels = L.flatten(labels, axis=2) ce_loss, probs = L.softmax_with_cross_entropy(logits=logits, label=labels, return_softmax=True) loss = L.mean(x=ce_loss) return loss
def create_model(self, decoding=False): """create model for training""" if decoding: return self.fast_decode() if self.task_type == "dialog": emb_num = 4 else: emb_num = 3 input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \ [[-1, self.max_seq_len, self.max_seq_len]] input_dtypes = ['int64'] * emb_num + ['float32'] input_lod_levels = [0] * emb_num + [0] shapes = input_shapes + [[-1, 1], [-1, 1]] dtypes = input_dtypes + ['int64', 'int64'] lod_levels = input_lod_levels + [0, 0] inputs = self.to_tensor(shapes, dtypes, lod_levels) pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=70, iterable=False) emb_ids = {} for key, value in zip(self.emb_keys, inputs[:emb_num]): emb_ids[key] = value # for embeddings # src_ids, sent_ids, pos_ids = inputs[:emb_num] input_mask = inputs[emb_num] tgt_labels, tgt_pos = inputs[-2:] unimo = UNIMOModel(emb_ids=emb_ids, input_mask=input_mask, config=self.gene_config, task_type=self.task_type) enc_out = unimo.get_sequence_output() fc_out = self.cal_logit(enc_out, tgt_pos) if self.label_smooth: out_size = self.gene_config['vocab_size'] labels = fluid.layers.label_smooth(label=fluid.layers.one_hot( input=tgt_labels, depth=out_size), epsilon=self.label_smooth) ce_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=labels, soft_label=True) else: ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fc_out, label=tgt_labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) graph_vars = {"loss": loss} for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def _get_metrics(self, inputs, outputs): metrics = {} fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], inputs["tgt_pos"]) tgt_lm_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["tgt_label"]) mean_tgt_lm_loss = layers.mean(tgt_lm_loss) loss = mean_tgt_lm_loss metrics["token_lm_loss"] = mean_tgt_lm_loss metrics["loss"] = loss return metrics
def factory(cls, config): loss_type = config.loss_type if loss_type == "hinge": return HingeLoss(config) elif loss_type == "global_hinge": return GlobalHingeLoss(config) elif loss_type == "softmax_with_cross_entropy": return lambda logits, label: L.reduce_mean( L.softmax_with_cross_entropy(logits, label)) else: raise ValueError
def forward(self, outputs, labels): predict, (trg_length, label) = outputs[0], labels # for target padding mask mask = layers.sequence_mask( trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype) cost = layers.softmax_with_cross_entropy( logits=predict, label=label, soft_label=False) masked_cost = layers.elementwise_mul(cost, mask, axis=0) batch_mean_cost = layers.reduce_mean(masked_cost, dim=[0]) seq_cost = layers.reduce_sum(batch_mean_cost) return seq_cost
def get_metrics(self, inputs, outputs): """Get metrics.""" metrics = {} tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"]) tgt_lm_loss = layers.softmax_with_cross_entropy( logits=tgt_logits, label=inputs["tgt_label"]) mean_tgt_lm_loss = layers.mean(tgt_lm_loss) metrics["token_lm_loss"] = mean_tgt_lm_loss loss = mean_tgt_lm_loss metrics["loss"] = loss return metrics
def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size, trg_vocab_size): "vanilla seq2seq model" # data source = fluid.data(name="src", shape=[None, None], dtype="int64") source_length = fluid.data(name="src_sequence_length", shape=[None], dtype="int64") target = fluid.data(name="trg", shape=[None, None], dtype="int64") target_length = fluid.data(name="trg_sequence_length", shape=[None], dtype="int64") label = fluid.data(name="label", shape=[None, None, 1], dtype="int64") # embedding src_emb = fluid.embedding(source, (src_vocab_size, hidden_size)) tar_emb = fluid.embedding(target, (src_vocab_size, hidden_size)) # encoder enc_cell = EncoderCell(num_layers, hidden_size, dropout_prob) enc_output, enc_final_state = dynamic_rnn(cell=enc_cell, inputs=src_emb, sequence_length=source_length) # decoder dec_cell = DecoderCell(num_layers, hidden_size, dropout_prob) dec_output, dec_final_state = dynamic_rnn(cell=dec_cell, inputs=tar_emb, initial_states=enc_final_state) logits = layers.fc(dec_output, size=trg_vocab_size, num_flatten_dims=len(dec_output.shape) - 1, bias_attr=False) # loss loss = layers.softmax_with_cross_entropy(logits=logits, label=label, soft_label=False) loss = layers.unsqueeze(loss, axes=[2]) max_tar_seq_len = layers.shape(target)[1] tar_mask = layers.sequence_mask(target_length, maxlen=max_tar_seq_len, dtype="float32") loss = loss * tar_mask loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_sum(loss) # optimizer optimizer = fluid.optimizer.Adam(0.001) optimizer.minimize(loss) return loss
def _compute_loss(self, dec_output): loss = layers.softmax_with_cross_entropy(logits=dec_output, label=self.label, soft_label=False) loss = layers.unsqueeze(loss, axes=[2]) max_tar_seq_len = layers.shape(self.tar)[1] tar_mask = layers.sequence_mask(self.tar_sequence_length, maxlen=max_tar_seq_len, dtype='float32') loss = loss * tar_mask loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_sum(loss) return loss
def _compute_loss(self, pred): no_grad_set = [] label = layers.cast(self.label, dtype="int64") label = layers.reshape(label, [-1, 1]) pred = layers.reshape(pred, [-1, 2]) no_grad_set.append(label.name) loss = layers.softmax_with_cross_entropy(pred, label) loss = layers.reshape(loss, shape=[self.batch_size, -1]) loss = layers.reduce_mean(loss) return loss, no_grad_set