Esempio n. 1
0
    def get_metrics(self, inputs, outputs):
        """Get metrics."""
        metrics = {}
        tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"])
        lm_loss = layers.softmax_with_cross_entropy(logits=tgt_logits, label=inputs["tgt_label"])
        need_cal = layers.not_equal(
            inputs["tgt_label"], layers.fill_constant(shape=[1], dtype="int64", value=1)
        )
        need_cal = layers.cast(need_cal, self.dtype)
        mean_lm_loss = layers.reduce_sum(lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10)

        pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_idx"])
        nsp_logits = self._get_classifier_output(pooled_out, name="next_sent")
        nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy(
            logits=nsp_logits, label=inputs["label"], return_softmax=True)

        nsp_acc = layers.accuracy(nsp_softmax, inputs["label"])
        mean_nsp_loss = layers.mean(nsp_loss)

        loss = mean_nsp_loss
        if self.use_mlm:
            loss = loss + mean_lm_loss
            metrics["token_lm_loss"] = mean_lm_loss
        metrics["loss"] = loss
        metrics["nsp_loss"] = mean_nsp_loss
        metrics["nsp_acc"] = nsp_acc
        return metrics
Esempio n. 2
0
    def forward(self, *args, **kwargs):
        """
        Args:
            start_pos (optional, `Variable` of shape [batch_size]): 
                token index of start of answer span in `context`
            end_pos (optional, `Variable` of shape [batch_size]): 
                token index of end of answer span in `context`
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            start_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of start position, use argmax(start_logit) to get start index
            end_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of end position, use argmax(end_logit) to get end index
        """

        start_pos = kwargs.pop('start_pos', None)
        end_pos = kwargs.pop('end_pos', None)
        pooled, encoded = super(ErnieModelForQuestionAnswering,
                                self).forward(*args, **kwargs)
        encoded = self.dropout(encoded)
        encoded = self.classifier(encoded)
        start_logit, end_logits = L.unstack(encoded, axis=-1)
        if start_pos is not None and end_pos is not None:
            if len(start_pos.shape) == 1:
                start_pos = L.unsqueeze(start_pos, axes=[-1])
            if len(end_pos.shape) == 1:
                end_pos = L.unsqueeze(end_pos, axes=[-1])
            start_loss = L.softmax_with_cross_entropy(start_logit, start_pos)
            end_loss = L.softmax_with_cross_entropy(end_logits, end_pos)
            loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2.
        else:
            loss = None
        return loss, start_logit, end_logits
Esempio n. 3
0
    def _get_metrics(self, inputs, outputs):
        metrics = {}
        fc_out = self._calc_logits(enc_out=outputs["enc_out"],
                                   seq_pos=inputs["tgt_pos"])
        #fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], inputs["tgt_pos"])
        lm_loss = layers.softmax_with_cross_entropy(logits=fc_out,
                                                    label=inputs["tgt_pos"])
        need_cal = layers.not_equal(
            inputs["tgt_label"],
            layers.fill_constant(shape=[1], dtype="int64", value=1))
        need_cal = layers.cast(need_cal, self.dtype)
        mean_lm_loss = layers.reduce_sum(
            lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10)

        pooled_out = self._get_pooled_output(outputs["enc_out"],
                                             inputs["label_pos"])
        nsp_fc_out = layers.fc(input=pooled_out,
                               size=2,
                               param_attr=fluid.ParamAttr(
                                   name="next_sent_fc.w_0",
                                   initializer=self.param_initializer),
                               bias_attr="next_sent_fc.b_0")
        nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy(
            logits=nsp_fc_out, label=inputs["label"], return_softmax=True)

        nsp_acc = layers.accuracy(nsp_softmax, inputs["label"])
        mean_nsp_loss = layers.mean(nsp_loss)

        metrics["loss"] = mean_lm_loss + mean_nsp_loss
        metrics["lm_loss"] = mean_lm_loss
        metrics["nsp_loss"] = mean_nsp_loss
        metrics["nsp_acc"] = nsp_acc
        return metrics
Esempio n. 4
0
 def test_softmax_with_cross_entropy(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[16], dtype='float32')
         y = layers.data(name='label', shape=[1], dtype='int64')
         loss, softmax = layers.softmax_with_cross_entropy(
             x, y, return_softmax=True)
         self.assertIsNotNone(loss)
         self.assertIsNotNone(softmax)
         loss = layers.softmax_with_cross_entropy(x, y)
         self.assertIsNotNone(loss)
     print(str(program))
Esempio n. 5
0
def dynamic(train_data, use_cuda=False, use_parallel_exe=False):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        fluid.default_startup_program().random_seed = SEED
        fluid.default_main_program().random_seed = SEED
        dy_layer = DygraphLayer()
        adam = fluid.optimizer.Adam(learning_rate=LR,
                                    parameter_list=dy_layer.parameters())
        sgd = fluid.optimizer.SGD(learning_rate=LR,
                                  parameter_list=dy_layer.parameters())

        for epoch in range(EPOCH_NUM):
            image_data, label = train_data[epoch]
            var_input = fluid.dygraph.to_variable(image_data)
            var_label = fluid.dygraph.to_variable(label)
            hidden, prediction = dy_layer(var_input)

            if epoch % 2 == 0:
                cross_entropy_loss = layers.cross_entropy(
                    prediction, var_label)
                loss = layers.mean(cross_entropy_loss)
                loss.backward()
                adam.minimize(loss)
            else:
                softmax_loss = layers.softmax_with_cross_entropy(
                    prediction, var_label)
                loss = layers.mean(softmax_loss)
                loss.backward()
                sgd.minimize(loss)

            dy_layer.clear_gradients()
        return hidden.numpy(), prediction.numpy(), loss.numpy()
Esempio n. 6
0
 def fn_2(opt, avg_loss=None, pred=None, label=None):
     if avg_loss is None:
         loss = layers.softmax_with_cross_entropy(logits=pred,
                                                  label=label)
         avg_loss = layers.mean(loss, name='mean_softmax_loss')
     opt.minimize(avg_loss)
     return avg_loss
Esempio n. 7
0
    def infer(self, inputs, outputs):
        """Run model inference.

        Only support generation now.
        """
        if self.do_generation:
            return self.generator.inference(self, inputs, outputs)
        else:
            tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"])
            tgt_lm_loss = layers.softmax_with_cross_entropy(
                logits=tgt_logits, label=inputs["tgt_label"])
            lm_loss = layers.fill_constant_batch_size_like(
                outputs["enc_out"], [-1], self.dtype, 0)
            lm_loss = layers.scatter(lm_loss, inputs["tgt_idx"][:, 0], tgt_lm_loss[:, 0], overwrite=False)
            tokens_num = layers.fill_constant_batch_size_like(
                outputs["enc_out"], [-1], self.dtype, 0)
            tgt_tokens_num = layers.fill_constant_batch_size_like(
                tgt_lm_loss, [-1], self.dtype, 1)
            tokens_num = layers.scatter(tokens_num, inputs["tgt_idx"][:, 0], tgt_tokens_num, overwrite=False)
            predictions = {
                "lm_loss": lm_loss,
                "tokens_num": tokens_num,
                "data_id": inputs["data_id"]
            }
            return predictions
Esempio n. 8
0
 def loss(ground_true, prediction):
     # ground_true: [batch_size, seq_len]
     # prediction: [batch_size, seq_len, vocab_size]
     ground_true = layers.unsqueeze(ground_true, axes=2)
     ground_true.stop_gradient = True
     los = layers.softmax_with_cross_entropy(prediction, ground_true, axis=-1)
     return los
Esempio n. 9
0
    def get_metrics(self, inputs, outputs):
        """Get metrics."""
        metrics = {}
        pooled_out = self._get_pooled_output(outputs["enc_out"])
        cls_logits = self._get_classifier_output(pooled_out,
                                                 num_classes=self.num_classes,
                                                 name="cls")
        cls_loss, cls_softmax = layers.softmax_with_cross_entropy(
            logits=cls_logits, label=inputs["label"], return_softmax=True)

        cls_acc = layers.accuracy(cls_softmax, inputs["label"])
        mean_cls_loss = layers.mean(cls_loss)

        metrics["loss"] = mean_cls_loss
        metrics["cls_loss"] = mean_cls_loss
        metrics["cls_acc"] = cls_acc

        # statistics for recall & precision & f1
        if self.num_classes == 2:
            pred = layers.argmax(cls_softmax, axis=1)
            label = layers.squeeze(inputs["label"], axes=[1])
            metrics["stat_tp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 1).astype("float32"))
            metrics["stat_fp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 0).astype("float32"))
            metrics["stat_tn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 0).astype("float32"))
            metrics["stat_fn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 1).astype("float32"))
        return metrics
Esempio n. 10
0
    def forward(self, *args, **kwargs):
        """
        Args:
            labels (optional, `Variable` of shape [batch_size, seq_len]): 
                ground truth label id for each token
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            logits (`Variable` of shape [batch_size, seq_len, hidden_size]):
                output logits of classifier
        """

        labels = kwargs.pop('labels', None)
        pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs)
        hidden = self.dropout(encoded) # maybe not?
        logits = self.classifier(hidden)

        if labels is not None:
            if len(labels.shape) == 2:
                labels = L.unsqueeze(labels, axes=[-1])
            loss = L.softmax_with_cross_entropy(logits, labels)
            loss = L.reduce_mean(loss)
        else:
            loss = None
        return loss, logits
Esempio n. 11
0
 def loss(self, predictions, labels):
     labels = L.softmax(labels)
     loss = L.softmax_with_cross_entropy(predictions,
                                         labels,
                                         soft_label=True)
     loss = L.mean(loss)
     return loss
Esempio n. 12
0
def transformer(
        src_vocab_size,
        trg_vocab_size,
        max_length,
        n_layer,
        n_head,
        d_key,
        d_value,
        d_model,
        d_inner_hid,
        dropout_rate,
        label_smooth_eps, ):
    enc_inputs = make_all_inputs(encoder_data_input_fields +
                                 encoder_util_input_fields)

    enc_output = wrap_encoder(
        src_vocab_size,
        max_length,
        n_layer,
        n_head,
        d_key,
        d_value,
        d_model,
        d_inner_hid,
        dropout_rate,
        enc_inputs, )
    dec_inputs = make_all_inputs(decoder_data_input_fields[:-1] +
                                 decoder_util_input_fields)

    predict = wrap_decoder(
        trg_vocab_size,
        max_length,
        n_layer,
        n_head,
        d_key,
        d_value,
        d_model,
        d_inner_hid,
        dropout_rate,
        dec_inputs,
        enc_output, )

    # Padding index do not contribute to the total loss. The weights is used to
    # cancel padding index in calculating the loss.
    label, weights = make_all_inputs(label_data_input_fields)
    if label_smooth_eps:
        label = layers.label_smooth(
            label=layers.one_hot(
                input=label, depth=trg_vocab_size),
            epsilon=label_smooth_eps)
    cost = layers.softmax_with_cross_entropy(
        logits=predict,
        label=label,
        soft_label=True if label_smooth_eps else False)
    # cost = layers.softmax_with_cross_entropy(logits=predict, label=gold)
    weighted_cost = cost * weights
    sum_cost = layers.reduce_sum(weighted_cost)
    token_num = layers.reduce_sum(weights)
    avg_cost = sum_cost / token_num
    return sum_cost, avg_cost, predict, token_num
Esempio n. 13
0
    def forward(self):
        """forward"""
        features_list = [self.gw.node_feat["attr"]]

        for i in range(self.num_layers):
            h = gin(self.gw,
                    features_list[i],
                    hidden_size=self.hidden_size,
                    activation="relu",
                    name="gin_%s" % (i),
                    init_eps=0.0,
                    train_eps=self.train_eps)

            h = fl.batch_norm(h)
            h = fl.relu(h)

            features_list.append(h)

        output = 0
        for i, h in enumerate(features_list):
            pooled_h = pgl.layers.graph_pooling(self.gw, h, self.pool_type)
            drop_h = fl.dropout(pooled_h,
                                self.dropout_prob,
                                dropout_implementation="upscale_in_train")
            output += fl.fc(drop_h,
                            size=self.num_class,
                            act=None,
                            param_attr=fluid.ParamAttr(name="final_fc_%s" %
                                                       (i)))

        # calculate loss
        self.loss = fl.softmax_with_cross_entropy(output, self.labels)
        self.loss = fl.reduce_mean(self.loss)
        self.acc = fl.accuracy(fl.softmax(output), self.labels)
Esempio n. 14
0
    def forward(self, *args, **kwargs):
        """
        Args:
            labels (optional, `Variable` of shape [batch_size]): 
                ground truth label id for each sentence
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch
                if labels not set, returns None
            logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of classifier
        """
        labels = kwargs.pop('labels', None)
        pooled, encoded = super(ErnieModelForSequenceClassification,
                                self).forward(*args, **kwargs)
        hidden = self.dropout(pooled)
        logits = self.classifier(hidden)

        if labels is not None:
            if len(labels.shape) == 1:
                labels = L.reshape(labels, [-1, 1])
            loss = L.softmax_with_cross_entropy(logits, labels)
            loss = L.reduce_mean(loss)
        else:
            loss = None
        return loss, logits
Esempio n. 15
0
    def forward(self, enc_inputs, dec_inputs, label, weights):
        """
        forward
        :param enc_inputs:
        :param dec_inputs:
        :param label:
        :param weights:
        :return:
        """
        enc_output = self._wrap_encoder_layer(enc_inputs)
        predict = self._wrap_decoder_layer(dec_inputs, enc_output)
        if self._label_smooth_eps:
            label_out = layers.label_smooth(label=layers.one_hot(
                input=label, depth=self._trg_vocab_size),
                                            epsilon=self._label_smooth_eps)

        cost = layers.softmax_with_cross_entropy(
            logits=predict,
            label=label_out,
            soft_label=True if self._label_smooth_eps else False)
        weighted_cost = cost * weights
        sum_cost = layers.reduce_sum(weighted_cost)
        token_num = layers.reduce_sum(weights)
        token_num.stop_gradient = True
        avg_cost = sum_cost / token_num
        return sum_cost, avg_cost, predict, token_num
Esempio n. 16
0
    def forward(self, is_test=False):
        """
        Build the network.
        """
        graph_wrapper = GraphWrapper(name="graph",
                node_feat=[
                    ('atom_type', [None, 1], "int64"), 
                    ('chirality_tag', [None, 1], "int64")],
                edge_feat=[
                    ('bond_type', [None, 1], "int64"),
                    ('bond_direction', [None, 1], "int64")])
        masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64")
        masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64")

        node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test)
        masked_node_repr = layers.gather(node_repr, masked_node_indice)
        logits = layers.fc(masked_node_repr, 
                size=len(CompoundConstants.atom_num_list),
                name="masked_node_logits")

        loss, pred = layers.softmax_with_cross_entropy(
                logits, masked_node_label, return_softmax=True)
        loss = layers.reduce_mean(loss)
        acc = layers.accuracy(pred, masked_node_label)

        self.graph_wrapper = graph_wrapper
        self.loss = loss
Esempio n. 17
0
 def loss(self, predictions, labels):
     ce_loss, probs = L.softmax_with_cross_entropy(logits=predictions,
                                                   label=labels,
                                                   return_softmax=True)
     #L.Print(ce_loss, message='per_example_loss')
     loss = L.mean(x=ce_loss)
     return loss
Esempio n. 18
0
 def forward(self, src_ids, *args, **kwargs):
     tgt_labels = kwargs.pop('tgt_labels', None)
     tgt_pos = kwargs.pop('tgt_pos', None)
     encode_only = kwargs.pop('encode_only', False)
     _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs)
     #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy())
     #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy())
     if encode_only:
         return None, None, info
     elif tgt_labels is None:
         encoded = self.mlm(encoded)
         encoded = self.mlm_ln(encoded)
         logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         output_ids = L.argmax(logits, -1)
         return output_ids, logits, info
     else:
         encoded_2d = L.gather_nd(encoded, tgt_pos)
         #log.debug('input shape %s' % repr(src_ids.shape))
         #log.debug(L.gather_nd(src_ids, tgt_pos).numpy())
         encoded_2d = self.mlm(encoded_2d)
         encoded_2d = self.mlm_ln(encoded_2d)
         logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         if len(tgt_labels.shape) == 1:
             tgt_labels = L.reshape(tgt_labels, [-1, 1])
         
         loss = L.reduce_mean(
                 L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))
                 )
         return loss, logits_2d, info
Esempio n. 19
0
 def train_program(self, ):
     label = F.data(name="label", shape=[None, 1], dtype="int64")
     train_idx = F.data(name='train_idx', shape=[None], dtype="int64")
     prediction = L.gather(self.out_feat, train_idx, overwrite=False)
     label = L.gather(label, train_idx, overwrite=False)
     cost = L.softmax_with_cross_entropy(logits=prediction, label=label)
     avg_cost = L.mean(cost)
     self.avg_cost = avg_cost
Esempio n. 20
0
 def test_softmax_with_cross_entropy(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[16], dtype='float32')
         y = layers.data(name='label', shape=[1], dtype='int64')
         loss = layers.softmax_with_cross_entropy(x, y)
         self.assertIsNotNone(loss)
     print(str(program))
Esempio n. 21
0
 def forward(self, *inputs, **kwargs):
     labels = kwargs.pop('labels', None)
     logits = super(MoCo, self).forward(*inputs, **kwargs)
     if len(labels.shape) == 1:
         labels = L.reshape(labels, [-1, 1])
     loss = L.softmax_with_cross_entropy(logits, labels)
     loss = L.reduce_mean(loss)
     return loss, logits
Esempio n. 22
0
 def matrixwise_loss(self):
     """listwise model"""
     self.logits = L.matmul(
         self.query_repr, self.poi_repr, transpose_y=True)
     self.score = L.softmax(self.logits)
     self.loss = L.softmax_with_cross_entropy(self.logits, self.labels)
     self.loss = L.reduce_mean(self.loss)
     self.acc = L.accuracy(L.softmax(self.logits), self.labels)
     self.metrics = [self.loss, self.acc]
Esempio n. 23
0
 def loss(self, predictions, labels):
     logits, input_seqlen = predictions
     logits = L.flatten(logits, axis=2)
     labels = L.flatten(labels, axis=2)
     ce_loss, probs = L.softmax_with_cross_entropy(logits=logits,
                                                   label=labels,
                                                   return_softmax=True)
     loss = L.mean(x=ce_loss)
     return loss
Esempio n. 24
0
    def create_model(self, decoding=False):
        """create model for training"""
        if decoding:
            return self.fast_decode()

        if self.task_type == "dialog":
            emb_num = 4
        else:
            emb_num = 3
        input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                       [[-1, self.max_seq_len, self.max_seq_len]]
        input_dtypes = ['int64'] * emb_num + ['float32']
        input_lod_levels = [0] * emb_num + [0]
        shapes = input_shapes + [[-1, 1], [-1, 1]]
        dtypes = input_dtypes + ['int64', 'int64']
        lod_levels = input_lod_levels + [0, 0]

        inputs = self.to_tensor(shapes, dtypes, lod_levels)
        pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                      capacity=70,
                                                      iterable=False)

        emb_ids = {}
        for key, value in zip(self.emb_keys, inputs[:emb_num]):
            emb_ids[key] = value  # for embeddings

        # src_ids, sent_ids, pos_ids = inputs[:emb_num]
        input_mask = inputs[emb_num]
        tgt_labels, tgt_pos = inputs[-2:]

        unimo = UNIMOModel(emb_ids=emb_ids,
                           input_mask=input_mask,
                           config=self.gene_config,
                           task_type=self.task_type)

        enc_out = unimo.get_sequence_output()
        fc_out = self.cal_logit(enc_out, tgt_pos)

        if self.label_smooth:
            out_size = self.gene_config['vocab_size']
            labels = fluid.layers.label_smooth(label=fluid.layers.one_hot(
                input=tgt_labels, depth=out_size),
                                               epsilon=self.label_smooth)

            ce_loss = layers.softmax_with_cross_entropy(logits=fc_out,
                                                        label=labels,
                                                        soft_label=True)
        else:
            ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
                logits=fc_out, label=tgt_labels, return_softmax=True)

        loss = fluid.layers.mean(x=ce_loss)
        graph_vars = {"loss": loss}
        for k, v in graph_vars.items():
            v.persistable = True

        return pyreader, graph_vars
Esempio n. 25
0
    def _get_metrics(self, inputs, outputs):
        metrics = {}

        fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], inputs["tgt_pos"])
        tgt_lm_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["tgt_label"])
        mean_tgt_lm_loss = layers.mean(tgt_lm_loss)
        loss = mean_tgt_lm_loss
        metrics["token_lm_loss"] = mean_tgt_lm_loss

        metrics["loss"] = loss
        return metrics
Esempio n. 26
0
File: loss.py Progetto: zzs95/PGL
 def factory(cls, config):
     loss_type = config.loss_type
     if loss_type == "hinge":
         return HingeLoss(config)
     elif loss_type == "global_hinge":
         return GlobalHingeLoss(config)
     elif loss_type == "softmax_with_cross_entropy":
         return lambda logits, label: L.reduce_mean(
             L.softmax_with_cross_entropy(logits, label))
     else:
         raise ValueError
Esempio n. 27
0
    def forward(self, outputs, labels):
        predict, (trg_length, label) = outputs[0], labels
        # for target padding mask
        mask = layers.sequence_mask(
            trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype)

        cost = layers.softmax_with_cross_entropy(
            logits=predict, label=label, soft_label=False)
        masked_cost = layers.elementwise_mul(cost, mask, axis=0)
        batch_mean_cost = layers.reduce_mean(masked_cost, dim=[0])
        seq_cost = layers.reduce_sum(batch_mean_cost)
        return seq_cost
Esempio n. 28
0
    def get_metrics(self, inputs, outputs):
        """Get metrics."""
        metrics = {}

        tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"])
        tgt_lm_loss = layers.softmax_with_cross_entropy(
            logits=tgt_logits, label=inputs["tgt_label"])
        mean_tgt_lm_loss = layers.mean(tgt_lm_loss)
        metrics["token_lm_loss"] = mean_tgt_lm_loss

        loss = mean_tgt_lm_loss
        metrics["loss"] = loss
        return metrics
Esempio n. 29
0
def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size,
                      trg_vocab_size):
    "vanilla seq2seq model"
    # data
    source = fluid.data(name="src", shape=[None, None], dtype="int64")
    source_length = fluid.data(name="src_sequence_length",
                               shape=[None],
                               dtype="int64")
    target = fluid.data(name="trg", shape=[None, None], dtype="int64")
    target_length = fluid.data(name="trg_sequence_length",
                               shape=[None],
                               dtype="int64")
    label = fluid.data(name="label", shape=[None, None, 1], dtype="int64")

    # embedding
    src_emb = fluid.embedding(source, (src_vocab_size, hidden_size))
    tar_emb = fluid.embedding(target, (src_vocab_size, hidden_size))

    # encoder
    enc_cell = EncoderCell(num_layers, hidden_size, dropout_prob)
    enc_output, enc_final_state = dynamic_rnn(cell=enc_cell,
                                              inputs=src_emb,
                                              sequence_length=source_length)

    # decoder
    dec_cell = DecoderCell(num_layers, hidden_size, dropout_prob)
    dec_output, dec_final_state = dynamic_rnn(cell=dec_cell,
                                              inputs=tar_emb,
                                              initial_states=enc_final_state)
    logits = layers.fc(dec_output,
                       size=trg_vocab_size,
                       num_flatten_dims=len(dec_output.shape) - 1,
                       bias_attr=False)

    # loss
    loss = layers.softmax_with_cross_entropy(logits=logits,
                                             label=label,
                                             soft_label=False)
    loss = layers.unsqueeze(loss, axes=[2])
    max_tar_seq_len = layers.shape(target)[1]
    tar_mask = layers.sequence_mask(target_length,
                                    maxlen=max_tar_seq_len,
                                    dtype="float32")
    loss = loss * tar_mask
    loss = layers.reduce_mean(loss, dim=[0])
    loss = layers.reduce_sum(loss)

    # optimizer
    optimizer = fluid.optimizer.Adam(0.001)
    optimizer.minimize(loss)
    return loss
Esempio n. 30
0
    def _compute_loss(self, dec_output):
        loss = layers.softmax_with_cross_entropy(logits=dec_output,
                                                 label=self.label,
                                                 soft_label=False)
        loss = layers.unsqueeze(loss, axes=[2])

        max_tar_seq_len = layers.shape(self.tar)[1]
        tar_mask = layers.sequence_mask(self.tar_sequence_length,
                                        maxlen=max_tar_seq_len,
                                        dtype='float32')
        loss = loss * tar_mask
        loss = layers.reduce_mean(loss, dim=[0])
        loss = layers.reduce_sum(loss)
        return loss
Esempio n. 31
0
    def _compute_loss(self, pred):

        no_grad_set = []

        label = layers.cast(self.label, dtype="int64")
        label = layers.reshape(label, [-1, 1])
        pred = layers.reshape(pred, [-1, 2])

        no_grad_set.append(label.name)

        loss = layers.softmax_with_cross_entropy(pred, label)
        loss = layers.reshape(loss, shape=[self.batch_size, -1])
        loss = layers.reduce_mean(loss)

        return loss, no_grad_set