Esempio n. 1
0
  def __call__(self,
               prediction_tensor,
               target_tensor,
               ignore_nan_targets=False,
               scope=None,
               **params):
    """Call the loss function.

    Args:
      prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
        representing predicted quantities.
      target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
        regression or classification targets.
      ignore_nan_targets: whether to ignore nan targets in the loss computation.
        E.g. can be used if the target tensor is missing groundtruth data that
        shouldn't be factored into the loss.
      scope: Op scope name. Defaults to 'Loss' if None.
      **params: Additional keyword arguments for specific implementations of
              the Loss.

    Returns:
      loss: a tensor representing the value of the loss function.
    """
    if ignore_nan_targets:
      target_tensor = paddle.where(paddle.isnan(target_tensor),
                                prediction_tensor,
                                target_tensor)
    return self._compute_loss(prediction_tensor, target_tensor, **params)
Esempio n. 2
0
    def get_loss(self, model, batch_data, pred_dict, train=True, flag = 0):
        n_support_train = self.args.n_shot_train
        n_support_test = self.args.n_shot_test
        n_query = self.args.n_query
        if not train:
            losses_adapt = self.criterion(pred_dict['s_logits'].reshape((2*n_support_test*n_query,2)), 
                                          paddle.expand(batch_data['s_label'],[n_query,n_support_test*2]).reshape((1,2*n_support_test*n_query)).squeeze(0))
        else:
            if flag:
                losses_adapt = self.criterion(pred_dict['s_logits'].reshape((2*n_support_train*n_query,2)), 
                                              paddle.expand(batch_data['s_label'],[n_query,n_support_train*2]).reshape((1,2*n_support_train*n_query)).squeeze(0))
            else:
                losses_adapt = self.criterion(pred_dict['q_logits'], batch_data['q_label'])

        if paddle.isnan(losses_adapt).any() or paddle.isinf(losses_adapt).any():
            print('!!!!!!!!!!!!!!!!!!! Nan value for supervised CE loss', losses_adapt)
            print(pred_dict['s_logits'])
            losses_adapt = paddle.zeros_like(losses_adapt)

        if self.args.reg_adj > 0:
            n_support = batch_data['s_label'].shape[0]
            adj = pred_dict['adj'][-1]
            if train:
                if flag:
                    s_label = paddle.expand(batch_data['s_label'], [n_query,batch_data['s_label'].shape[0]])
                    n_d = n_query * n_support
                    label_edge = model.layers.label2edge(s_label).reshape((n_d, -1))
                    pred_edge = adj[:,:,:-1,:-1].reshape((n_d, -1))
                else:
                    s_label = paddle.expand(batch_data['s_label'], [n_query,batch_data['s_label'].shape[0]])
                    q_label = batch_data['q_label'].unsqueeze(1)
                    total_label = paddle.concat([s_label, q_label], 1)
                    label_edge = model.layers.label2edge(total_label)[:,:,-1,:-1]
                    pred_edge = adj[:,:,-1,:-1]
            else:
                s_label = batch_data['s_label'].unsqueeze(0)
                n_d = n_support * self.args.rel_edge
                label_edge = model.layers.label2edge(s_label).reshape((n_d, -1))
                pred_edge = adj[:, :, :n_support, :n_support].mean(0).reshape((n_d, -1))
            adj_loss_val = F.mse_loss(pred_edge, label_edge)
            if paddle.isnan(adj_loss_val).any() or paddle.isinf(adj_loss_val).any():
                print('!!!!!!!!!!!!!!!!!!!  Nan value for adjacency loss', adj_loss_val)
                adj_loss_val = paddle.zeros_like(adj_loss_val)

            losses_adapt += self.args.reg_adj * adj_loss_val

        return losses_adapt
Esempio n. 3
0
    def get_loss(self, model, batch_data, pred_dict, train=True):
        if not train and self.update_s_q:
            losses_adapt = self.criterion(pred_dict['s_logits'],
                                          batch_data['s_label'])
        else:
            losses_adapt = self.criterion(pred_dict['logits'],
                                          batch_data['label'])

        if paddle.isnan(losses_adapt).any() or paddle.isinf(
                losses_adapt).any():
            print('!!!!!!!!!!!!!!!!!!! Nan value for supervised CE loss',
                  losses_adapt)
            print(pred_dict['s_logits'])
            losses_adapt = paddle.zeros_like(losses_adapt)

        if self.args.reg_adj > 0:
            n_support = batch_data['s_label'].shape[0]
            adj = pred_dict['adj'][-1]
            if train:
                n_query = batch_data['q_label'].shape[0]
                s_label = paddle.expand(
                    batch_data['s_label'],
                    [n_query, batch_data['s_label'].shape[0]])
                q_label = batch_data['q_label'].unsqueeze(1)
                total_label = paddle.concat([s_label, q_label], 1)
                n_d = n_query * self.args.rel_edge * (n_support + 1)
                label_edge = model.layers.label2edge(total_label).reshape(
                    (n_d, -1))
                pred_edge = adj.reshape((n_d, -1))
            else:
                s_label = batch_data['s_label'].unsqueeze(0)
                n_d = n_support * self.args.rel_edge
                label_edge = model.layers.label2edge(s_label).reshape(
                    (n_d, -1))
                pred_edge = adj[:, :, :n_support, :n_support].mean(0).reshape(
                    (n_d, -1))
            adj_loss_val = F.mse_loss(pred_edge, label_edge)
            if paddle.isnan(adj_loss_val).any() or paddle.isinf(
                    adj_loss_val).any():
                print('!!!!!!!!!!!!!!!!!!!  Nan value for adjacency loss',
                      adj_loss_val)
                adj_loss_val = paddle.zeros_like(adj_loss_val)

            losses_adapt += self.args.reg_adj * adj_loss_val

        return losses_adapt
Esempio n. 4
0
def set_grad(params, params_with_grad, scale=1.0):
    for param, param_w_grad in zip(params, params_with_grad):
        if param.grad is None:
            param.grad = paddle.ParamAttr(
                param.data.new().resize_(*param.data.shape()))
        grad = param_w_grad.grad.data
        if scale is not None:
            grad /= scale
        if paddle.isnan(grad).any() or paddle.isinf(grad).any():
            return True  # invalid grad
        param.grad.data.copy_(grad)
    return False
Esempio n. 5
0
 def test_case(x, axis=None, keepdim=False):
     if isinstance(axis, list):
         axis = list(axis)
         if len(axis) == 0:
             axis = None
     x_tensor = paddle.to_tensor(x, stop_gradient=False)
     y = paddle.nanmean(x_tensor, axis, keepdim)
     dx = paddle.grad(y, x_tensor)[0].numpy()
     sum_dx_ref = np.prod(y.shape)
     if np.isnan(y.numpy()).sum():
         sum_dx_ref -= np.isnan(y.numpy()).sum()
     cnt = paddle.sum(~paddle.isnan(x_tensor),
                      axis=axis,
                      keepdim=keepdim)
     if (cnt == 0).sum():
         dx[np.isnan(dx)] = 0
     sum_dx = dx.sum()
     self.assertEqual(np.allclose(sum_dx, sum_dx_ref, rtol=1e-04), True)
Esempio n. 6
0
 def forward(self, inputs):
     return paddle.cast(paddle.isnan(inputs), "int32")
Esempio n. 7
0
def median(x, axis=None, keepdim=False, name=None):
    """
    Compute the median along the specified axis.

    Args:
        x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, int32, int64.
        axis (int, optional): The axis along which to perform median calculations ``axis`` should be int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor, results of median along ``axis`` of ``x``. If data type of ``x`` is float64, data type of results will be float64, otherwise data type will be float32.

    Examples:
        .. code-block:: python

            import paddle

            x = paddle.arange(12).reshape([3, 4])
            # Tensor(shape=[3, 4], dtype=int64, place=Place(cpu), stop_gradient=True,
            #        [[0 , 1 , 2 , 3 ],
            #         [4 , 5 , 6 , 7 ],
            #         [8 , 9 , 10, 11]])

            y1 = paddle.median(x)
            # Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
            #        [5.50000000])

            y2 = paddle.median(x, axis=0)
            # Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
            #        [4., 5., 6., 7.])

            y3 = paddle.median(x, axis=1)
            # Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
            #        [1.50000000, 5.50000000, 9.50000000])

            y4 = paddle.median(x, axis=0, keepdim=True)
            # Tensor(shape=[1, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
            #        [[4., 5., 6., 7.]])

    """
    if not isinstance(x, Variable):
        raise TypeError("In median, the input x should be a Tensor.")
    is_flatten = axis is None
    dims = len(x.shape)
    if is_flatten:
        x = paddle.flatten(x)
        axis = 0
    else:
        if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
            raise ValueError(
                "In median, axis should be none or an integer in range [-rank(x), rank(x))."
            )
        if axis < 0:
            axis += dims
    sz = x.shape[axis]
    kth = sz >> 1
    tensor_topk, idx = paddle.topk(x, kth + 1, axis=axis, largest=False)
    dtype = 'float64' if x.dtype == core.VarDesc.VarType.FP64 else 'float32'
    if sz & 1 == 0:
        out_tensor = paddle.slice(
            tensor_topk, axes=[axis], starts=[kth - 1],
            ends=[kth]) + paddle.slice(
                tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1])
        out_tensor = paddle.cast(out_tensor, dtype=dtype) / 2
    else:
        out_tensor = paddle.cast(
            paddle.slice(
                tensor_topk, axes=[axis], starts=[kth], ends=[kth + 1]),
            dtype=dtype)
    out_tensor = out_tensor + paddle.sum(
        paddle.cast(
            paddle.isnan(x), dtype=dtype) * x, axis=axis, keepdim=True)
    if not keepdim or is_flatten:
        if not is_flatten:
            newshape = x.shape[:axis] + x.shape[axis + 1:]
        elif not keepdim:
            newshape = [1]
        else:
            newshape = [1] * dims
    else:
        newshape = out_tensor.shape
    out_tensor = out_tensor.reshape(newshape, name=name)
    return out_tensor
Esempio n. 8
0
 def test_quantile_include_NaN(self):
     input_data = np.random.randn(2, 3, 4)
     input_data[0, 1, 1] = np.nan
     x = paddle.to_tensor(input_data)
     paddle_res = paddle.quantile(x, q=0.35, axis=0)
     self.assertTrue(paddle.isnan(paddle_res[1, 1]))
    def train_step(self,
                   interaction,
                   max_generation_length,
                   snippet_alignment_probability=1.,
                   db2id=None,
                   id2db=None,
                   step=None):
        """ Trains the interaction-level model on a single interaction.

        Args:
            interaction (Interaction): The interaction to train on.
            learning_rate (float): Learning rate to use.
            snippet_keep_age (int): Age of oldest snippets to use.
            snippet_alignment_probability (float): The probability that a snippet will
                be used in constructing the gold sequence.
        """
        # assert self.params.discourse_level_lstm

        losses = []
        total_gold_tokens = 0

        input_hidden_states = []
        input_sequences = []

        final_utterance_states_c = []
        final_utterance_states_h = []

        previous_query_states = []
        previous_queries = []

        decoder_states = []

        discourse_state = None
        if self.params.discourse_level_lstm:
            discourse_state, discourse_lstm_states = self._initialize_discourse_states(
            )
        discourse_states = []

        # Schema and schema embeddings
        input_schema = interaction.get_schema()
        schema_states = []

        if input_schema and not self.params.use_bert:
            schema_states = self.encode_schema_bow_simple(input_schema)

        # Get the intra-turn graph and cross-turn graph
        inner = []
        for i, ele in enumerate(
                interaction.interaction.schema.column_names_surface_form):
            for j in range(
                    i + 1,
                    len(interaction.interaction.schema.
                        column_names_surface_form)):
                if ele.split(
                        '.'
                )[0] == interaction.interaction.schema.column_names_surface_form[
                        j].split('.')[0]:
                    inner.append([i, j])
        adjacent_matrix = self.get_adj_matrix(
            inner, input_schema.table_schema['foreign_keys'],
            input_schema.num_col)
        adjacent_matrix_cross = self.get_adj_utterance_matrix(
            inner, input_schema.table_schema['foreign_keys'],
            input_schema.num_col)
        adjacent_matrix = paddle.to_tensor(adjacent_matrix)
        adjacent_matrix_cross = paddle.to_tensor(adjacent_matrix_cross)

        previous_schema_states = paddle.zeros(
            [input_schema.num_col, self.params.encoder_state_size])

        for utterance_index, utterance in enumerate(
                interaction.gold_utterances()):

            if interaction.identifier in LIMITED_INTERACTIONS and utterance_index > LIMITED_INTERACTIONS[
                    interaction.identifier]:
                break

            input_sequence = utterance.input_sequence()

            available_snippets = utterance.snippets()
            previous_query = utterance.previous_query()

            # Get the gold query: reconstruct if the alignment probability is less than one
            if snippet_alignment_probability < 1.:
                gold_query = sql_util.add_snippets_to_query(
                    available_snippets,
                    utterance.contained_entities(),
                    utterance.anonymized_gold_query(),
                    prob_align=snippet_alignment_probability) + [
                        vocab.EOS_TOK
                    ]
            else:
                gold_query = utterance.gold_query()

            final_utterance_state, utterance_states, schema_states = self.get_bert_encoding(
                input_sequence, input_schema, discourse_state, dropout=True)

            # temp1=final_utterance_state

            schema_states = paddle.stack(schema_states, axis=0)
            for i in range(self.params.gnn_layer_number):
                schema_states = self.gnn_history[2 * i](schema_states,
                                                        adjacent_matrix_cross,
                                                        previous_schema_states)
                schema_states = self.gnn_history[2 * i + 1](
                    schema_states, adjacent_matrix_cross,
                    previous_schema_states)
                schema_states = self.gnn[i](schema_states, adjacent_matrix)
            previous_schema_states = schema_states
            schema_states_ls = paddle.split(schema_states,
                                            schema_states.shape[0],
                                            axis=0)
            schema_states = [ele.squeeze(0) for ele in schema_states_ls]

            input_hidden_states.extend(utterance_states)
            input_sequences.append(input_sequence)

            num_utterances_to_keep = min(self.params.maximum_utterances,
                                         len(input_sequences))

            if self.params.discourse_level_lstm:
                discourse_state, discourse_lstm_states = self.discourse_lstms(
                    final_utterance_state[0].unsqueeze(0),
                    discourse_lstm_states)
                discourse_state = discourse_state.squeeze()

            if self.params.use_utterance_attention:

                final_utterance_states_c, final_utterance_states_h, final_utterance_state = self.get_utterance_attention(
                    final_utterance_states_c, final_utterance_states_h,
                    final_utterance_state, num_utterances_to_keep)

            if self.params.state_positional_embeddings:
                utterance_states, flat_sequence = self._add_positional_embeddings(
                    input_hidden_states, input_sequences)

            snippets = None

            if self.params.use_previous_query:
                if len(previous_query) > 0:
                    previous_queries, previous_query_states = self.get_previous_queries(
                        previous_queries, previous_query_states,
                        previous_query, input_schema)

            if len(gold_query) <= max_generation_length and len(
                    previous_query) <= max_generation_length:
                prediction = self.predict_turn(
                    final_utterance_state,
                    utterance_states,
                    schema_states,
                    max_generation_length,
                    gold_query=gold_query,
                    snippets=snippets,
                    input_sequence=flat_sequence,
                    previous_queries=previous_queries,
                    previous_query_states=previous_query_states,
                    input_schema=input_schema,
                    feed_gold_tokens=True,
                    training=True)
                loss = prediction[1]
                decoder_states = prediction[3]
                total_gold_tokens += len(gold_query)
                losses.append(loss)
            else:
                # Break if previous decoder snippet encoding -- because the previous
                # sequence was too long to run the decoder.
                if self.params.previous_decoder_snippet_encoding:
                    break
                continue

        if losses:
            average_loss = paddle.sum(paddle.stack(losses)) / total_gold_tokens
            print(f"total_gold_tokens:{total_gold_tokens}, step:{step}")
            print(f"LOSS:{float(average_loss.numpy())}")
            if paddle.sum(paddle.cast(paddle.isinf(average_loss),
                                      'int32')) == paddle.ones([1]):
                self.save("./inf_checkpoint")

            # Renormalize so the effect is normalized by the batch size.
            normalized_loss = average_loss
            if self.params.reweight_batch:
                normalized_loss = len(losses) * average_loss / float(
                    self.params.batch_size)

            normalized_loss.backward()

            if step <= self.params.warmup_step:
                self.set_learning_rate(step / self.params.warmup_step *
                                       self.params.initial_learning_rate)
            step += 1

            self.trainer.step()
            if self.params.fine_tune_bert:
                self.bert_trainer.step()
                self.bert_trainer.clear_grad()
            self.trainer.clear_grad()

            loss_scalar = float(normalized_loss.numpy())
            isNan = sum(
                paddle.cast(paddle.isnan(normalized_loss),
                            'float32').numpy().tolist()) == 0
            if paddle.isnan(normalized_loss):
                print("nan error but keep running")
            assert isNan

        else:
            loss_scalar = 0.

        return loss_scalar, step
Esempio n. 10
0
 def forward(self, inputs):
     """
     forward
     """
     x = paddle.isnan(inputs)
     return x.astype('float32')