Example #1
0
 def metrics(self, predictions, labels):
     predictions = L.argmax(predictions, axis=1)
     labels = L.argmax(labels, axis=1)
     #predictions = L.unsqueeze(predictions, axes=[1])
     acc = propeller.metrics.Acc(labels, predictions)
     #auc = propeller.metrics.Auc(labels, predictions)
     return {'acc': acc}
Example #2
0
def greedy_search_infilling(model,
                            q_ids,
                            q_sids,
                            sos_id,
                            eos_id,
                            attn_id,
                            max_encode_len=640,
                            max_decode_len=100,
                            tgt_type_id=3):
    model.eval()
    _, logits, info = model(q_ids, q_sids)
    gen_ids = L.argmax(logits, -1)
    d_batch, d_seqlen = q_ids.shape
    seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True)
    has_stopped = np.zeros([d_batch], dtype=np.bool)
    gen_seq_len = np.zeros([d_batch], dtype=np.int64)
    output_ids = []

    past_cache = info['caches']

    cls_ids = L.ones([d_batch], dtype='int64') * sos_id
    attn_ids = L.ones([d_batch], dtype='int64') * attn_id
    ids = L.stack([cls_ids, attn_ids], -1)
    for step in range(max_decode_len):
        bias = gen_bias(q_ids, ids, step)
        pos_ids = D.to_variable(
            np.tile(np.array([[step, step + 1]], dtype=np.int64),
                    [d_batch, 1]))
        pos_ids += seqlen
        _, logits, info = model(ids,
                                L.ones_like(ids) * tgt_type_id,
                                pos_ids=pos_ids,
                                attn_bias=bias,
                                past_cache=past_cache)
        gen_ids = L.argmax(logits, -1)

        past_cached_k, past_cached_v = past_cache
        cached_k, cached_v = info['caches']
        cached_k = [
            L.concat([pk, k[:, :1, :]], 1)
            for pk, k in zip(past_cached_k, cached_k)
        ]  # concat cached
        cached_v = [
            L.concat([pv, v[:, :1, :]], 1)
            for pv, v in zip(past_cached_v, cached_v)
        ]
        past_cache = (cached_k, cached_v)

        gen_ids = gen_ids[:, 1]
        ids = L.stack([gen_ids, attn_ids], 1)

        gen_ids = gen_ids.numpy()
        has_stopped |= (gen_ids == eos_id).astype(np.bool)
        gen_seq_len += (1 - has_stopped.astype(np.int64))
        output_ids.append(gen_ids.tolist())
        if has_stopped.all():
            break
    output_ids = np.array(output_ids).transpose([1, 0])
    return output_ids
Example #3
0
def decode(args, s_arc, s_rel, mask):
    """Decode function"""
    mask = mask.numpy()
    lens = np.sum(mask, -1)
    # prevent self-loops
    arc_preds = layers.argmax(s_arc, -1).numpy()
    bad = [not utils.istree(seq[:i + 1]) for i, seq in zip(lens, arc_preds)]
    if args.tree and any(bad):
        arc_preds[bad] = utils.eisner(s_arc.numpy()[bad], mask[bad])
    arc_preds = dygraph.to_variable(arc_preds, zero_copy=False)
    rel_preds = layers.argmax(s_rel, axis=-1)
    # batch_size, seq_len, _ = rel_preds.shape
    rel_preds = nn.index_sample(rel_preds, layers.unsqueeze(arc_preds, -1))
    rel_preds = layers.squeeze(rel_preds, axes=[-1])
    return arc_preds, rel_preds
Example #4
0
    def get_metrics(self, inputs, outputs):
        """Get metrics."""
        metrics = {}
        pooled_out = self._get_pooled_output(outputs["enc_out"])
        cls_logits = self._get_classifier_output(pooled_out,
                                                 num_classes=self.num_classes,
                                                 name="cls")
        cls_loss, cls_softmax = layers.softmax_with_cross_entropy(
            logits=cls_logits, label=inputs["label"], return_softmax=True)

        cls_acc = layers.accuracy(cls_softmax, inputs["label"])
        mean_cls_loss = layers.mean(cls_loss)

        metrics["loss"] = mean_cls_loss
        metrics["cls_loss"] = mean_cls_loss
        metrics["cls_acc"] = cls_acc

        # statistics for recall & precision & f1
        if self.num_classes == 2:
            pred = layers.argmax(cls_softmax, axis=1)
            label = layers.squeeze(inputs["label"], axes=[1])
            metrics["stat_tp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 1).astype("float32"))
            metrics["stat_fp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 0).astype("float32"))
            metrics["stat_tn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 0).astype("float32"))
            metrics["stat_fn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 1).astype("float32"))
        return metrics
Example #5
0
    def create_cam_op(self, predict, class_dim, heatmaps):
        """compute loss with tensor

         Args:
         predict: model output tensor activated by softmax
         class_dim: dim of multi-class vector
         heatmaps: 全局池化前的特征图

         Returns:
         heatmaps: class activation map
         """
        if self.main_arch in DenseNetModels:
            weights_shape = 1024
            name = "fc_weights"
        elif self.main_arch == "xception":
            weights_shape = 2048
            name = "fc_weights"
        else:
            raise ValueError(
                "Calc CAM of model arch {} is not supported.".format(
                    self.main_arch))

        fc_weights = FL.create_parameter(shape=[weights_shape, class_dim],
                                         dtype='float32',
                                         name=name)  # 1024, 5

        pred_idx = FL.argmax(predict, 1)  # bs, 1
        fc_weights = FL.transpose(fc_weights, perm=[1, 0])  # 5, 1024
        fc_weights = FL.gather(fc_weights, index=pred_idx)  # bs, 1024

        heatmaps = heatmaps * fc_weights  # bs, 1024, 16, 16
        heatmaps = FL.reduce_sum(heatmaps, dim=1, keep_dim=False)

        return heatmaps
Example #6
0
 def should_continue(i, mel_input, outputs, hidden, attention, state,
                     coeffs):
     T_enc = coeffs.shape[-1]
     attn_peak = F.argmax(coeffs[first_mono_attention_layer, 0, 0]) \
         if num_monotonic_attention_layers > 0 \
         else F.fill_constant([1], "int64", value=0)
     return i < MAX_STEP and F.reshape(attn_peak, [1]) < T_enc - 1
Example #7
0
def node_classify_model(word2id, num_labels, embed_dim=16):
    """Build node classify model.

    Args:
        word2id(dict): map word(node) to its corresponding index

        num_labels: The number of labels.

        embed_dim: The dimension of embedding.
    """

    nodes = fl.data('nodes', shape=[None, 1], dtype='int64')
    labels = fl.data('labels', shape=[None, 1], dtype='int64')

    embed_nodes = fl.embedding(input=nodes,
                               size=[len(word2id), embed_dim],
                               param_attr=fluid.ParamAttr(name='content'))

    embed_nodes.stop_gradient = True
    probs = fl.fc(input=embed_nodes, size=num_labels, act='softmax')
    predict = fl.argmax(probs, axis=-1)
    loss = fl.cross_entropy(input=probs, label=labels)
    loss = fl.reduce_mean(loss)

    return {
        'loss': loss,
        'probs': probs,
        'predict': predict,
        'labels': labels,
    }
Example #8
0
 def define_network(self, l_src_ids, l_position_ids, l_sentence_ids,
                    l_input_mask, r_src_ids, r_position_ids, r_sentence_ids,
                    r_input_mask):
     conf = ErnieConfig(self.conf_path)
     l_model = ErnieModel(l_src_ids,
                          l_position_ids,
                          l_sentence_ids,
                          task_ids=None,
                          input_mask=l_input_mask,
                          config=conf)
     l_pool_feature = l_model.get_pooled_output()
     r_model = ErnieModel(r_src_ids,
                          r_position_ids,
                          r_sentence_ids,
                          task_ids=None,
                          input_mask=r_input_mask,
                          config=conf)
     r_pool_feature = r_model.get_pooled_output()
     l_pool_feature.stop_gradient = self.clock
     r_pool_feature.stop_gradient = self.clock
     # l_pool_feature = layers.fc(l_pool_feature,128)
     # r_pool_feature = layers.fc(r_pool_feature,128)
     self.confidence = layers.cos_sim(l_pool_feature, r_pool_feature)
     out = layers.fc([l_pool_feature, r_pool_feature], 128)
     out = layers.fc(out, 32)
     self.layers_out = layers.fc(out, 11, name="kea_out")
     # self.confidence = layers.softmax(self.layers_out)
     layers_out = layers.argmax(self.layers_out, axis=1)
     return layers_out
Example #9
0
 def forward(self, src_ids, *args, **kwargs):
     tgt_labels = kwargs.pop('tgt_labels', None)
     tgt_pos = kwargs.pop('tgt_pos', None)
     encode_only = kwargs.pop('encode_only', False)
     _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs)
     #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy())
     #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy())
     if encode_only:
         return None, None, info
     elif tgt_labels is None:
         encoded = self.mlm(encoded)
         encoded = self.mlm_ln(encoded)
         logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         output_ids = L.argmax(logits, -1)
         return output_ids, logits, info
     else:
         encoded_2d = L.gather_nd(encoded, tgt_pos)
         #log.debug('input shape %s' % repr(src_ids.shape))
         #log.debug(L.gather_nd(src_ids, tgt_pos).numpy())
         encoded_2d = self.mlm(encoded_2d)
         encoded_2d = self.mlm_ln(encoded_2d)
         logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         if len(tgt_labels.shape) == 1:
             tgt_labels = L.reshape(tgt_labels, [-1, 1])
         
         loss = L.reduce_mean(
                 L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))
                 )
         return loss, logits_2d, info
Example #10
0
    def build_model(self):
        node_features = self.graph_wrapper.node_feat["feat"]

        output = self.gcn(gw=self.graph_wrapper,
                          feature=node_features,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_1")
        output1 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_2")
        output2 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_3")

        output = L.concat(input=[output1, output2, output], axis=-1)

        output, ratio_length = sag_pool(gw=self.graph_wrapper,
                                        feature=output,
                                        ratio=self.pooling_ratio,
                                        graph_id=self.graph_id,
                                        dataset=self.args.dataset_name,
                                        name="sag_pool_1")
        output = L.lod_reset(output, self.graph_wrapper.graph_lod)
        cat1 = L.sequence_pool(output, "sum")
        ratio_length = L.cast(ratio_length, dtype="float32")
        cat1 = L.elementwise_div(cat1, ratio_length, axis=-1)
        cat2 = L.sequence_pool(output, "max")
        output = L.concat(input=[cat2, cat1], axis=-1)

        output = L.fc(output, size=self.hidden_size, act="relu")
        output = L.dropout(output, dropout_prob=self.dropout_ratio)
        output = L.fc(output, size=self.hidden_size // 2, act="relu")
        output = L.fc(output,
                      size=self.num_classes,
                      act=None,
                      param_attr=fluid.ParamAttr(name="final_fc"))

        self.labels = L.cast(self.labels, dtype="float32")
        loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels)
        self.loss = L.mean(loss)
        pred = L.sigmoid(output)
        self.pred = L.argmax(x=pred, axis=-1)
        correct = L.equal(self.pred, self.labels_1dim)
        correct = L.cast(correct, dtype="int32")
        self.correct = L.reduce_sum(correct)
Example #11
0
def evaluate_student(model, dataset):
    all_pred, all_label = [], []
    with D.base._switch_tracer_mode_guard_(is_train=False):
        model.eval()
        for step, (ids_student, ids, _, labels) in enumerate(dataset.start()):
            _, logits = model(ids_student)
            pred = L.argmax(logits, -1)
            all_pred.extend(pred.numpy())
            all_label.extend(labels.numpy())
        f1 = f1_score(all_label, all_pred, average='macro')
        model.train()
        return f1
Example #12
0
    def forward(self, q, k, v, lengths, speaker_embed, start_index, 
                force_monotonic=False, prev_coeffs=None, window=None):
        # add position encoding as an inductive bias 
        if self.has_bias: # multi-speaker model
            omega_q = 2 * F.sigmoid(
                F.squeeze(self.q_pos_affine(speaker_embed), axes=[-1]))
            omega_k = 2 * self.omega_initial * F.sigmoid(F.squeeze(
                self.k_pos_affine(speaker_embed), axes=[-1]))
        else: # single-speaker case
            batch_size = q.shape[0]
            omega_q = F.ones((batch_size, ), dtype="float32")
            omega_k = F.ones((batch_size, ), dtype="float32") * self.omega_default
        q += self.position_encoding_weight * positional_encoding(q, start_index, omega_q)
        k += self.position_encoding_weight * positional_encoding(k, 0, omega_k)

        q, k, v = self.q_affine(q), self.k_affine(k), self.v_affine(v)
        activations = F.matmul(q, k, transpose_y=True)
        activations /= np.sqrt(self.attention_dim)

        if self.training:
            # mask the <pad> parts from the encoder
            mask = F.sequence_mask(lengths, dtype="float32")
            attn_bias = F.scale(1. - mask, -1000)
            activations += F.unsqueeze(attn_bias, [1])
        elif force_monotonic:
            assert window is not None
            backward_step, forward_step = window
            T_enc = k.shape[1]
            batch_size, T_dec, _ = q.shape

            # actually T_dec = 1 here
            alpha = F.fill_constant((batch_size, T_dec), value=0, dtype="int64") \
                   if prev_coeffs is None \
                   else F.argmax(prev_coeffs, axis=-1)
            backward = F.sequence_mask(alpha - backward_step, maxlen=T_enc, dtype="bool")
            forward = F.sequence_mask(alpha + forward_step, maxlen=T_enc, dtype="bool")
            mask = F.cast(F.logical_xor(backward, forward), "float32")
            # print("mask's shape:", mask.shape)
            attn_bias = F.scale(1. - mask, -1000)
            activations += attn_bias

        # softmax
        coefficients = F.softmax(activations, axis=-1)
        # context vector
        coefficients = F.dropout(coefficients, 1. - self.keep_prob,
                                 dropout_implementation='upscale_in_train')
        contexts = F.matmul(coefficients, v)
        # context normalization
        enc_lengths = F.cast(F.unsqueeze(lengths, axes=[1, 2]), "float32")
        contexts *= F.sqrt(enc_lengths)
        # out affine
        contexts = self.out_affine(contexts)
        return contexts, coefficients
Example #13
0
    def metrics(self, predictions, label):
        qid, logits = predictions

        positive_class_logits = L.slice(logits, axes=[1], starts=[1], ends=[2])
        mrr = propeller.metrics.Mrr(qid, label, positive_class_logits)

        predictions = L.argmax(logits, axis=1)
        predictions = L.unsqueeze(predictions, axes=[1])
        f1 = propeller.metrics.F1(label, predictions)
        acc = propeller.metrics.Acc(label, predictions)
        #auc = propeller.metrics.Auc(label, predictions)

        return {'acc': acc, 'f1': f1, 'mrr': mrr}
Example #14
0
def _select_column(condition,
                   inputs,
                   column_enc,
                   column_len,
                   ptr_net,
                   grammar,
                   column2table_mask,
                   name=None):
    """select_column.

    Args:
        condition (TYPE): NULL
        inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1
        column_enc (TYPE): NULL
        column_len (TYPE): NULL
        ptr_net (TYPE): NULL
        grammar (TYPE): NULL
        column2table_mask (Variable):
        name (str):

    Returns: TODO

    Raises: NULL
    """
    condition = layers.cast(condition, dtype='float32')

    column_mask = layers.sequence_mask(column_len,
                                       maxlen=grammar.MAX_COLUMN,
                                       dtype='float32')
    column_mask = layers.reshape(column_mask, [-1, grammar.MAX_COLUMN])
    predicts = ptr_net.forward(inputs, column_enc, column_mask)

    pred_ids = layers.argmax(predicts, axis=-1)
    valid_table_mask = nn_utils.batch_gather(column2table_mask, pred_ids)

    ## concat zeros to vocab size
    zeros_l = tensor.fill_constant_batch_size_like(
        predicts,
        shape=[-1, grammar.grammar_size + grammar.MAX_TABLE],
        dtype='float32',
        value=-INF)
    zeros_r = tensor.fill_constant_batch_size_like(
        predicts, shape=[-1, grammar.MAX_VALUE], dtype='float32', value=-INF)
    final_output = tensor.concat([zeros_l, predicts, zeros_r], axis=-1)
    true_final_output = layers.elementwise_mul(final_output, condition, axis=0)
    true_valid_table_mask = layers.elementwise_mul(valid_table_mask,
                                                   condition,
                                                   axis=0)
    return true_final_output, true_valid_table_mask
Example #15
0
 def loss_cardinality(self, outputs, targets, indices, num_boxes):
     """
     Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
     This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
     """
     with dg.no_grad():
         pred_logits = outputs[
             "pred_logits"]  # [bs, num_queries, num_classes]
         tgt_lengths = dg.to_variable([len(v["labels"])
                                       for v in targets]).astype("float32")
         # Count the number of predictions that are NOT "no-object" (which is the last class)
         card_pred = L.reduce_sum(
             (L.argmax(pred_logits, -1) !=
              pred_logits.shape[-1] - 1).astype("float32"))
         card_err = F.loss.l1_loss(card_pred, tgt_lengths)
         losses = {"cardinality_error": card_err}
         return losses
Example #16
0
    def forward(self, *args, **kwargs):
        """
        Args
            tgt_labels(`Variable` of shape [batch_size, seqlen] or [batch, seqlen, vocab_size]):
                ground trouth target sequence id (hard label) or distribution (soft label)
            tgt_pos(`Variable` of shape [n_targets, 2]):
                index of tgt_labels in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)`
            encoder_only(Bool):
                if set, will not return loss, logits_2d
        Returns:
            loss(`Variable` of shape []):
                cross entropy loss mean over every target label. if `encode_only`, returns None.
            logits(`Variable` of shape [n_targets, vocab_size]):
                logits for every targets. if `encode_only`, returns None.
            info(Dictionary): see `ErnieModel`
        """
        tgt_labels = kwargs.pop('tgt_labels', None)
        tgt_pos = kwargs.pop('tgt_pos', None)
        encode_only = kwargs.pop('encode_only', False)
        _, encoded, info = ErnieModel.forward(self, *args, **kwargs)
        if encode_only:
            return None, None, info
        elif tgt_labels is None or tgt_pos is None:
            encoded = self.mlm(encoded)
            encoded = self.mlm_ln(encoded)
            logits = L.matmul(encoded, self.word_emb.weight,
                              transpose_y=True) + self.mlm_bias
            output_ids = L.argmax(logits, -1)
            return output_ids, logits, info
        else:
            encoded_2d = L.gather_nd(encoded, tgt_pos)
            encoded_2d = self.mlm(encoded_2d)
            encoded_2d = self.mlm_ln(encoded_2d)
            logits_2d = L.matmul(encoded_2d,
                                 self.word_emb.weight,
                                 transpose_y=True) + self.mlm_bias
            if len(tgt_labels.shape) == 1:
                tgt_labels = L.reshape(tgt_labels, [-1, 1])

            loss = L.reduce_mean(
                L.softmax_with_cross_entropy(
                    logits_2d,
                    tgt_labels,
                    soft_label=(tgt_labels.shape[-1] != 1)))
            return loss, logits_2d, info
Example #17
0
    def build_model(self, enc_input, dec_input, tgt_label, label_weights):
        """Build the model with source encoding and target decoding"""

        enc_word_output, enc_sen_output = self.encode(enc_input)
        dec_output = self.decode(dec_input, enc_word_output, enc_sen_output)

        predict_token_idx = layers.argmax(dec_output, axis=-1)
        correct_token_idx = layers.cast(layers.equal(
            tgt_label, layers.reshape(predict_token_idx, shape=[-1, 1])),
                                        dtype='float32')
        weighted_correct = layers.elementwise_mul(x=correct_token_idx,
                                                  y=label_weights,
                                                  axis=0)
        sum_correct = layers.reduce_sum(weighted_correct)
        sum_correct.stop_gradient = True

        # Padding index do not contribute to the total loss. The weights is used to
        # cancel padding index in calculating the loss.
        if self._label_smooth_eps:
            # TODO: use fluid.input.one_hot after softmax_with_cross_entropy removing
            # the enforcement that the last dimension of label must be 1.
            tgt_label = layers.label_smooth(label=layers.one_hot(
                input=tgt_label, depth=self.voc_size),
                                            epsilon=self._label_smooth_eps)

        cost = layers.softmax_with_cross_entropy(
            logits=dec_output,
            label=tgt_label,
            soft_label=True if self._label_smooth_eps else False)

        weighted_cost = layers.elementwise_mul(x=cost, y=label_weights, axis=0)
        sum_cost = layers.reduce_sum(weighted_cost)
        token_num = layers.reduce_sum(label_weights)
        token_num.stop_gradient = True
        avg_cost = sum_cost / token_num

        graph_vars = {
            "loss": avg_cost,
            "sum_correct": sum_correct,
            "token_num": token_num,
        }
        for k, v in graph_vars.items():
            v.persistable = True

        return graph_vars
Example #18
0
def model_fn(features, mode, params, run_config):
    ernie = ErnieModelForSequenceClassification(params, name='')
    if not params is propeller.RunMode.TRAIN:
        ernie.eval()

    metrics, loss = None, None
    if mode is propeller.RunMode.PREDICT:
        src_ids, sent_ids = features
        _, logits = ernie(src_ids, sent_ids)
        predictions = [
            logits,
        ]
    else:
        src_ids, sent_ids, labels = features
        if mode is propeller.RunMode.EVAL:
            loss, logits = ernie(src_ids, sent_ids, labels=labels)
            pred = L.argmax(logits, axis=1)
            acc = propeller.metrics.Acc(labels, pred)
            metrics = {'acc': acc}
            predictions = [pred]
        else:
            loss, logits = ernie(src_ids, sent_ids, labels=labels)
            scheduled_lr, _ = optimization(
                loss=loss,
                warmup_steps=int(run_config.max_steps *
                                 params['warmup_proportion']),
                num_train_steps=run_config.max_steps,
                learning_rate=params['learning_rate'],
                train_program=F.default_main_program(),
                startup_prog=F.default_startup_program(),
                use_fp16=params.use_fp16,
                weight_decay=params['weight_decay'],
                scheduler="linear_warmup_decay",
            )
            propeller.summary.scalar('lr', scheduled_lr)
            predictions = [
                logits,
            ]

    return propeller.ModelSpec(loss=loss,
                               mode=mode,
                               metrics=metrics,
                               predictions=predictions)
Example #19
0
def node_classify_model(config):
    """Build node classify model.
    """
    nodes = fl.data('nodes', shape=[None, 1], dtype='int64')
    labels = fl.data('labels', shape=[None, 1], dtype='int64')

    embed_nodes = fl.embedding(input=nodes,
                               size=[config.num_nodes, config.embed_dim],
                               param_attr=fluid.ParamAttr(name='weight'))

    embed_nodes.stop_gradient = True
    probs = fl.fc(input=embed_nodes, size=config.num_labels, act='softmax')
    predict = fl.argmax(probs, axis=-1)
    loss = fl.cross_entropy(input=probs, label=labels)
    loss = fl.reduce_mean(loss)

    return {
        'loss': loss,
        'probs': probs,
        'predict': predict,
        'labels': labels,
    }
Example #20
0
def infer(model, infer_data, max_seq_len=300, is_tensor=True, logits_softmax=True):
    """ 用dygraph模型预测
    [IN]  model: dygraph模型结构
          infer_data: list[(input1[, input2, ...])], 待预测数据
          max_seq_len: int, 最大长度
          is_tensor: boolean, true则infer_data已经是paddle可处理的tensor
          logits_softmax: boolean, true则预测结果为softmax后的logits
    [OUT] pred: list[float], 预测结果
    """
    # 在这个with域内ernie不会进行梯度计算;
    with D.base._switch_tracer_mode_guard_(is_train=False):
        # 控制模型进入eval模式,这将会关闭所有的dropout;
        model.eval()
        # 如果infer_data没有转tensor 则转为paddle接收的tensor
        if not is_tensor:
            infer_data = D.to_variable(np.array(infer_data))

        logits = model(infer_data, logits_softmax=logits_softmax)
        # TODO: 返回rate值
        pred = L.argmax(logits, -1).numpy()
        # 进入train模式
        model.train()
    return pred
Example #21
0
    def forward(self, outputs, target_sizes):
        """
        Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each image
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs["pred_logits"], outputs["pred_boxes"]

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = L.softmax(out_logits, -1)  # [bs, num_queries, num_classes + 1]
        labels = L.argmax(prob[:, :, :], axis=-1)  # [bs, num_queries]
        scores = L.reduce_max(prob, dim=-1)  # [bs, num_queries]

        # convert to [x0, y0, x1, y1] format
        bs, num_queries, _ = out_bbox.shape
        out_bbox = L.reshape(out_bbox, (-1, 4))
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        boxes = L.reshape(boxes, (bs, num_queries, 4))
        # and fromm relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes[:, 0], target_sizes[:, 1]
        scale_fct = L.stack([img_w, img_h, img_w, img_h], 1)  # [bs, 4]
        scale_fct = L.expand(L.unsqueeze(scale_fct, [1]), (1, num_queries, 1))
        boxes = boxes * scale_fct

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b
        } for s, l, b in zip(scores.numpy(), labels.numpy(), boxes.numpy())]

        return results
Example #22
0
 def metrics(self, predictions, label):
     pred, seqlen = predictions
     pred = L.argmax(pred, axis=-1)
     pred = L.unsqueeze(pred, axes=[-1])
     f1 = propeller.metrics.ChunkF1(label, pred, seqlen, self.num_label)
     return {'f1': f1}
Example #23
0
    def call(self, global_img_feat, p_img_feat, embedding_fn, words=None):
        # 图片特征
        img_feat = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2, act='tanh')  # [batch, k, hid]
        img_feat_emb = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2)

        if self.mode == 'eval':
            word = layers.fill_constant_batch_size_like(global_img_feat, [-1],
                                                        dtype='int64',
                                                        value=config.data['start_idx'])
        else:
            words = layers.transpose(words, [1, 0])  # [seq, batch]
            words.stop_gradient = True
        # lstm 初始化
        hid, cell = create_zero_state(global_img_feat), create_zero_state(global_img_feat)

        # While loop 参数初始化
        mx = decoder_config['sentence_length'] - 1 if self.mode == 'train' else decoder_config['infer_max_length']
        if self.mode == 'eval':
            mx = decoder_config['infer_max_length']
            while_op_output = layers.create_array('int64')
        else:
            while_op_output = layers.create_array('float32')
        max_step = layers.fill_constant(shape=[1], dtype='int64', value=mx)
        step = layers.fill_constant(shape=[1], dtype='int64', value=0)
        cond = layers.less_than(step, max_step)
        while_op = layers.While(cond)

        with while_op.block():
            if self.mode == 'train':
                st = layers.cast(step, 'int32')
                word = layers.slice(words, axes=[0], starts=st, ends=st + 1)
                word = layers.squeeze(word, [0])
                word.stop_gradient = True

            word_emb = embedding_fn(word)
            # 这里可能用+效果更好?
            xt = layers.concat([word_emb, global_img_feat], axis=-1)  # [batch, feat]
            h, c = layers.lstm_unit(xt, hid, cell, param_attr=fluid.ParamAttr('lstm_w'),
                                    bias_attr=fluid.ParamAttr('lstm_b'))
            p_word_emb = layers.fc(xt, size=self.hid_size)
            p_hidden = layers.fc(hid, size=self.hid_size)
            sentinel_gate = layers.sigmoid(p_word_emb + p_hidden)  # [batch, hidden]
            sentinel = layers.elementwise_mul(sentinel_gate, layers.tanh(c))  # [batch, hidden]

            layers.assign(h, hid)
            layers.assign(c, cell)

            k = layers.shape(p_img_feat)[1]

            p_hid = layers.fc(h, self.hid_size, act='tanh')
            # attention 部分
            #     alpha
            hid_emb = layers.fc(p_hid, self.hid_size)  # [batch, hidden]
            exp_hid_emb = layers.expand(layers.unsqueeze(hid_emb, 1), [1, k + 1, 1])  # [batch, k+1, hidden]
            sentinel_emb = layers.unsqueeze(layers.fc(sentinel, self.hid_size), axes=1)  # [batch, 1, hidden]
            feat_emb = layers.concat([img_feat_emb, sentinel_emb], axis=1)  # [batch, k+1, hidden]
            z = layers.tanh(feat_emb + exp_hid_emb)  # [batch, k+1, 1]
            alpha = layers.fc(z, size=1, num_flatten_dims=2, act='softmax')  # [batch, k+1, 1]

            #     context vector

            context = layers.concat([img_feat, layers.unsqueeze(sentinel, axes=1)], axis=1)  # [batch, k+1, hidden]
            context = layers.elementwise_mul(context, alpha, axis=0)
            context = layers.reduce_mean(context, dim=1)  # [batch, hidden]

            out = layers.fc(context + p_hid, self.hid_size, act='tanh')

            word_pred = weight_tying_fc(out)  # [batch, vocab]

            if self.mode == 'eval':
                next_word = layers.argmax(word_pred, axis=-1)
                layers.assign(next_word, word)
                next_word = layers.cast(next_word, 'float32')
                layers.array_write(next_word, step, array=while_op_output)
            else:
                layers.array_write(word_pred, step, array=while_op_output)
            layers.increment(step)
            layers.less_than(step, max_step, cond=cond)
        if self.mode == 'train':
            output_time_major, _ = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)
            output = layers.transpose(output_time_major, [1, 0, 2])
        else:
            output_time_major = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)[0]
            output = layers.transpose(output_time_major, [1, 0])

        return output
    def _forward(self, inputs, is_training):
        """ Real forward process of model in different mode(train/test). """
        outputs = {}

        src_token = inputs["src_token"]
        src_mask = inputs["src_mask"]
        src_pos = inputs["src_pos"]
        src_type = inputs["src_type"]
        src_turn = inputs["src_turn"]

        tgt_token = inputs["tgt_token"][:, :-1]
        tgt_mask = inputs["tgt_mask"][:, :-1]
        tgt_pos = inputs["tgt_pos"][:, :-1]
        tgt_type = inputs["tgt_type"][:, :-1]
        tgt_turn = inputs["tgt_turn"][:, :-1]

        input_mask = layers.concat([src_mask, tgt_mask], axis=1)
        input_mask.stop_gradient = True
        src_embed = self.embedder(src_token, src_pos, src_type, src_turn)
        tgt_embed = self.embedder(tgt_token, tgt_pos, tgt_type, tgt_turn)
        embed = layers.concat([src_embed, tgt_embed], axis=1)
        embed = self.embed_layer_norm(embed)

        batch_size = src_token.shape[0]
        src_len = src_token.shape[1]
        tgt_len = tgt_token.shape[1]

        if self.num_latent > 0:
            post_embed, post_probs, post_logits = self._posteriori_network(
                input_mask, embed, batch_size, src_len, tgt_len)
            outputs["post_logits"] = post_logits

            if self.use_discriminator:
                pos_probs, neg_probs = self._discriminator_network(
                    input_mask, embed, batch_size, src_len, tgt_len, post_embed)
                outputs["pos_probs"] = pos_probs
                outputs["neg_probs"] = neg_probs

            if is_training:
                z = F.gumbel_softmax(post_logits, self.tau)
            else:
                indices = layers.argmax(post_logits, axis=1)
                z = layers.one_hot(F.unsqueeze(indices, [1]), self.num_latent)
            latent_embeddings = self.latent_embeddings
            latent_embed = layers.matmul(z, latent_embeddings)
            outputs["latent_embed"] = latent_embed
        else:
            latent_embed = None

        latent_embed, dec_probs = self._generation_network(
            input_mask, embed, batch_size, src_len, tgt_len, latent_embed)
        outputs["dec_probs"] = dec_probs

        if self.num_latent > 0 and self.with_bow:
            if self.two_layer_predictor:
                latent_embed = self.pre_bow_predictor(latent_embed)
            bow_logits = self.bow_predictor(latent_embed)
            bow_probs = layers.softmax(bow_logits)
            outputs["bow_probs"] = bow_probs

        return outputs
Example #25
0
                model.clear_gradients()
                if global_step % args.save_steps == 0:
                    F.save_dygraph(model.state_dict(),
                                   args.save_dir + '_%s' % global_step)
                if global_step % args.eval_steps == 0 and step > 0:
                    y_true, y_pred = [], []
                    with FD.base._switch_tracer_mode_guard_(is_train=False):
                        model.eval()
                        for step, d in enumerate(
                                tqdm(dev_batch_data,
                                     desc='evaluating %d' % epoch)):
                            ids, sids, labels = d
                            ids, sids, labels = FD.to_variable(
                                ids), FD.to_variable(sids), FD.to_variable(
                                    labels)
                            loss, logits = model(ids, sids, labels=labels)
                            #print('\n'.join(map(str, logits.numpy().tolist())))
                            y_pred += L.argmax(logits, -1).numpy().tolist()
                            y_true += labels.numpy().tolist()
                        model.train()

                    if args.debug:
                        print(y_pred[:10], y_true[:10])
                    f1 = f1_score(y_true, y_pred)
                    print('f1 %.5f' % f1)
                    print(classification_report(y_true, y_pred))

                    if f1 > bst_f1:
                        F.save_dygraph(model.state_dict(), args.save_dir)
                        bst_f1 = f1
                        print('saving model with best f1: %.3f' % bst_f1)
 def _sampling(self, logits):
     """ Implement greedy sampling. """
     preds = layers.argmax(logits, axis=1)
     return preds
Example #27
0
                            tea_acc = []
                            with FD.base._switch_tracer_mode_guard_(
                                    is_train=False):
                                ofa_model.model.eval()
                                for step, d in enumerate(
                                        tqdm(dev_ds.start(place),
                                             desc='evaluating %d' % epoch)):
                                    ids, sids, label = d
                                    [loss, logits,
                                     _], [_, tea_logits, _] = ofa_model(
                                         ids,
                                         sids,
                                         labels=label,
                                         num_layers=model_cfg[
                                             'num_hidden_layers'])
                                    a = L.argmax(logits, -1) == label
                                    acc.append(a.numpy())

                                    ta = L.argmax(tea_logits, -1) == label
                                    tea_acc.append(ta.numpy())
                                ofa_model.model.train()
                            print(
                                'width_mult: %f, depth_mult: %f: acc %.5f, teacher acc %.5f'
                                % (width_mult, depth_mult,
                                   np.concatenate(acc).mean(),
                                   np.concatenate(tea_acc).mean()))
        if args.save_dir is not None:
            if not os.path.exists(args.save_dir):
                os.makedirs(args.save_dir)
            F.save_dygraph(ofa_model.model.state_dict(), args.save_dir)
    def forward(self, ref_image, ref_label, label, k):
        """
        Encode the reference image to get features for weight generation.

        Args:

            ref_image ((NxK)x3xHxW): Reference images.
            ref_label ((NxK)xCxHxW): Reference labels.
            label (NxCxHxW): Target label.
            k (int): Number of reference images.
        
        Returns: (tuple)
            - x (NxC2xH2xW2): Encoded features from reference images
              for the main branch (as input to the decoder).
            - encoded_ref (list of Variable): Encoded features from reference
              images for the weight generation branch.
            - attention (Nx(KxH1xW1)x(H1xW1)): Attention maps.
            - atn_vis (1x1xH1xW1): Visualization for attention scores.
            - ref_idx (Nx1): Index for which image to use from the
              reference image.
        """
        if self.concat_ref_label:
            # concat reference label map and image together for encoding.
            concat_ref = L.concat([ref_image, ref_label], axis=1)
            x = self.ref_img_first(concat_ref)
        elif self.mul_ref_label:
            x = self.ref_img_first(ref_image)
            x_label = self.ref_label_first(ref_label)
        else:
            x = self.ref_img_first(ref_image)

        atn_ref_image = atn_ref_label = None
        atn = atn_vis = ref_idx = None
        for i in range(self.num_downsamples):
            x = getattr(self, 'ref_img_down_' + str(i))(x)
            if self.mul_ref_label:
                x_label = getattr(self, 'ref_label_down_' + str(i))(x_label)
            # Preserve reference for attention module.
            if k > 1 and i == self.num_downsample_atn - 1:
                x, atn, atn_vis = self.attention_module(x, label, ref_label)
                if self.mul_ref_label:
                    x_label, _, _ = self.attention_module(
                        x_label, None, None, atn)

                atn_sum = L.reshape(atn,
                                    (label.shape[0], k, -1))  # [b, k, h*w*h*w]
                atn_sum = L.reduce_sum(atn_sum, dim=2)
                ref_idx = L.argmax(atn_sum, axis=1)

        # Get all corresponding layers in the encoder output for generating
        # weights in corresponding layers.
        encoded_image_ref = [x]
        if self.mul_ref_label:
            encoded_ref_label = [x_label]

        for i in reversed(range(self.num_downsamples)):  # 4 -> 0
            conv = getattr(self, 'ref_img_up_' + str(i))(encoded_image_ref[-1])
            encoded_image_ref.append(conv)
            if self.mul_ref_label:
                conv_label = getattr(self, 'ref_label_up_' + str(i))(
                    encoded_ref_label[-1])
                encoded_ref_label.append(conv_label)

        if self.mul_ref_label:
            encoded_ref = []
            for i in range(len(encoded_image_ref)):
                conv, conv_label = encoded_image_ref[i], encoded_ref_label[i]
                b, c, h, w = conv.shape
                conv_label = L.softmax(conv_label, axis=1)
                conv_label = L.reshape(conv_label, (b, 1, c, h * w))
                # conv_label = L.expand(conv_label, (1, c, 1, 1))
                conv = L.reshape(conv, (b, c, 1, h * w))
                # conv = L.expand(conv, (1, 1, c, 1))
                conv_prod = conv * conv_label  # (b, c, c, h * w)
                conv_prod = L.reduce_sum(conv_prod, dim=3,
                                         keep_dim=True)  # (b, c, c, 1)
                encoded_ref.append(conv_prod)
        else:
            encoded_ref = encoded_image_ref

        encoded_ref = encoded_ref[::-1]  # level0 -> level4
        return x, encoded_ref, atn, atn_vis, ref_idx
                        log.debug('train loss %.5f lr %.3e' %
                                  (loss.numpy(), opt.current_step_lr()))
                    opt.minimize(loss)
                    model.clear_gradients()
                    if step % 100 == 0:
                        acc = []
                        with FD.base._switch_tracer_mode_guard_(
                                is_train=False):
                            model.eval()
                            for step, d in enumerate(
                                    tqdm(dev_ds.start(place),
                                         desc='evaluating %d' % epoch)):
                                ids, sids, label = d
                                loss, logits = model(ids, sids, labels=label)
                                #print('\n'.join(map(str, logits.numpy().tolist())))
                                a = L.argmax(logits, -1) == label
                                acc.append(a.numpy())
                            model.train()
                        log.debug('acc %.5f' % np.concatenate(acc).mean())
            if args.save_dir is not None:
                F.save_dygraph(model.state_dict(), args.save_dir)
        else:
            feature_column = propeller.data.FeatureColumns([
                propeller.data.TextColumn('seg_a',
                                          unk_id=tokenizer.unk_id,
                                          vocab_dict=tokenizer.vocab,
                                          tokenizer=tokenizer.tokenize),
            ])

            assert args.save_dir is not None
            sd, _ = FD.load_dygraph(args.save_dir)
Example #30
0
         loss.backward()
         if step % 10 == 0:
             log.debug('train loss %.5f lr %.3e' % (loss.numpy(), opt.current_step_lr()))
         opt.minimize(loss)
         model.clear_gradients()
     with FD.base._switch_tracer_mode_guard_(is_train=False):
         model.eval()
         FP = 0
         TP = 0
         FN = 0
         TN = 0
         for step, d in enumerate(tqdm(dev_ds.start(place), desc='evaluating %d' % epoch)):
             ids, sids, label = d
             loss, logits = model(ids, sids, labels=label)
             #print('\n'.join(map(str, logits.numpy().tolist())))
             a = L.argmax(logits, -1).numpy()
             label = label.numpy()
             length = a.shape[0]
             label = np.reshape(label,[length])
             for i in range(length):
                 if a[i] == label[i] and a[i] == 1:
                     TP += 1
                 elif a[i] == label[i] and a[i] == 0:
                     TN += 1
                 elif a[i] != label[i] and a[i] == 1:
                     FP += 1
                 elif a[i] != label[i] and a[i] == 0:
                     FN += 1
         mcc.append((TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)))
         print('mcc:',mcc[-1])
 if args.save_dir is not None: