Esempio n. 1
0
    def forward(self, logits, labels, length_ratio, source_length,
                target_length):
        """
        :param logits: Model logits. Shape: (batch, length, vocab_size).
        :param labels: Gold targets. Shape: (batch, length).
        :param length_ratio: Length Ratios. Shape: (batch,).
        :param source_length: Source lengths. Shape: (batch,).
        :param target_length: Target lengths. Shape: (batch,).
        :return: Sequence scores. Shape: (batch,).
        """
        logprobs = npx.log_softmax(logits,
                                   axis=-1,
                                   temperature=self.softmax_temperature)

        # Select the label probability, then take their logs.
        # probs and scores: (batch_size, target_seq_len)
        token_scores = npx.pick(logprobs, labels, axis=-1)
        if self.score_type == C.SCORING_TYPE_NEGLOGPROB:
            token_scores = token_scores * -1

        # Sum, then apply length penalty. The call to `np.where` masks out invalid values from scores.
        # zeros and sums: (batch_size,)
        scores = np.sum(np.where(labels != 0, token_scores,
                                 np.zeros_like(token_scores)),
                        axis=1)

        if self.constant_length_ratio is not None and self.constant_length_ratio > 0.0:
            predicted_output_length = source_length * self.constant_length_ratio
        else:
            predicted_output_length = source_length * length_ratio

        scores = self.scorer(scores, target_length, predicted_output_length)

        return scores
Esempio n. 2
0
    def forward(self, scores, target_dists, finished, best_hyp_indices):
        """
        Choose an extension of each hypothesis from its softmax distribution.

        :param scores: Vocabulary scores for the next beam step. (batch_size * beam_size, target_vocabulary_size)
        :param target_dists: The non-cumulative target distributions (ignored).
        :param finished: The list of finished hypotheses.
        :param best_hyp_indices: Best hypothesis indices constant.
        :return: The row indices, column indices, and values of the sampled words.
        """
        # Map the negative logprobs to probabilities so as to have a distribution
        target_dists = np.exp(-target_dists)

        # n == 0 means sample from the full vocabulary. Otherwise, we sample from the top n.
        if self.n != 0:
            # select the top n in each row, via a mask
            masked_items = npx.topk(target_dists, k=self.n, ret_typ='mask', axis=1, is_ascend=False)
            # set unmasked items to 0
            masked_items = np.where(masked_items, target_dists, masked_items)
            # renormalize
            target_dists = masked_items / np.sum(masked_items, axis=1, keepdims=True)

        # Sample from the target distributions over words, then get the corresponding values from the cumulative scores
        best_word_indices = npx.random.categorical(target_dists, get_prob=False)
        # Zeroes for finished hypotheses.
        best_word_indices = np.where(finished, np.zeros_like(best_word_indices), best_word_indices)
        values = npx.pick(scores, best_word_indices, axis=1, keepdims=True)

        best_hyp_indices = npx.slice_like(best_hyp_indices, best_word_indices, axes=(0,))

        return best_hyp_indices, best_word_indices, values
Esempio n. 3
0
    def forward(self, logits: np.ndarray, labels: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        pred = npx.log_softmax(logits, axis=-1)

        # (batch, len)
        neg_log_likelihood = - npx.pick(pred,  # pylint: disable=invalid-unary-operand-type
                                        labels, axis=-1, keepdims=False)

        # label smoothing as in
        # https://github.com/dmlc/gluon-nlp/blob/b714eaccc67619d7bdcbd1574d30be87d9c73f0c/src/gluonnlp/loss.py#L4
        if self._alpha > 0:
            all_scores = np.sum(pred, axis=-1)
            neg_log_likelihood = (1 - self._alpha) * neg_log_likelihood - self._alpha / self._num_labels * all_scores

        # (batch, len,)
        valid_mask = labels != self.ignore_label

        # (batch, len)
        loss = neg_log_likelihood * valid_mask

        # (1,)
        num_valid = np.sum(valid_mask)

        # (1,)
        ce = np.sum(loss) * self.weight

        # we need to divide by num_valid here to backpropagate a 'valid' normalized loss value like in SoftmaxOutput.
        return ce / num_valid, np.ones((1,))
def test_pick():
    A = np.zeros((INT_OVERFLOW, 2))
    B = np.zeros((INT_OVERFLOW))
    A.attach_grad()
    B.attach_grad()
    with mx.autograd.record():
        C = npx.pick(A, B)
    assert C.shape == (INT_OVERFLOW, )
    assert C[0] == 0
    C.backward()
    assert A.grad.shape == (INT_OVERFLOW, 2)
    assert B.grad.shape == (INT_OVERFLOW, )
    assert A.grad[0][0] == 1
Esempio n. 5
0
    def forward(self, hidden, target):
        """

        Parameters
        ----------
        hidden
            The hidden representation
            Shape (..., in_units)
        target
            The target representation
            Shape (...,)

        Returns
        -------
        sel_logits
            The log probability that each hidden has when label == target
        """
        # TODO(sxjscience) The computation here can be greatly accelerated! Due to the
        #  missing feature of index_update, we are not able to do this here.
        logits = self.get_logits(hidden)
        sel_logits = npx.pick(logits, target, axis=-1)
        return sel_logits
Esempio n. 6
0
    def forward(self, pred, label):
        """

        Parameters
        ----------
        pred :
            The predictions of the network. Shape (..., V)
        label :
            The labels. Shape (..., )

        Returns
        -------
        loss :
            Shape (..., )
        """
        if not self._from_logits:
            pred = npx.log_softmax(pred, axis=-1)
        log_likelihood = npx.pick(pred, label, axis=-1)
        all_scores = pred.sum(axis=-1)
        loss = - (1 - self._alpha) * log_likelihood\
               - self._alpha / float(self._num_labels) * all_scores
        return loss