コード例 #1
0
    def forward(self,
                queries: np.ndarray,
                key_values: np.ndarray,
                heads: np.ndarray,
                lengths: Optional[np.ndarray] = None,
                bias: Optional[np.ndarray] = None):

        # (n*h, lq, lk)
        logits = npx.interleaved_matmul_encdec_qk(queries,
                                                  key_values,
                                                  heads=heads)

        if bias is not None:
            logits = logits + bias

        if lengths is not None:
            # required shape for lengths: (n*h, lq); required dtype: int32
            probs = npx.softmax(logits,
                                axis=-1,
                                length=lengths,
                                use_length=True)
        else:
            probs = npx.softmax(logits, axis=-1)

        probs = npx.dropout(probs,
                            p=self.dropout) if self.dropout > 0.0 else probs

        # key_values: (lk, n, dv * 2)
        # probs: (n*h, lq, lk)
        # result: (n, lq, dv)
        return npx.interleaved_matmul_encdec_valatt(key_values,
                                                    probs,
                                                    heads=heads)
コード例 #2
0
    def decode_step(self,
                    step_input: np.ndarray,
                    states: List[np.ndarray],
                    vocab_slice_ids: Optional[np.ndarray] = None):
        outputs = []  # type: List[np.ndarray]
        new_states = []  # type: List[np.ndarray]
        factor_outputs = []  # type: List[List[np.ndarray]]
        state_index = 0
        for model, model_state_structure in zip(self._models, self.state_structure()):
            model_states = states[state_index:state_index+len(model_state_structure)]
            state_index += len(model_state_structure)
            logits, model_states, target_factor_outputs = model.decode_step(step_input, model_states, vocab_slice_ids)
            probs = npx.softmax(logits, axis=-1, temperature=self._softmax_temperature)
            outputs.append(probs)
            target_factor_probs = [npx.softmax(tfo, axis=-1) for tfo in target_factor_outputs]
            factor_outputs.append(target_factor_probs)
            new_states += model_states
        scores = self._interpolation(outputs)

        target_factors = None  # type: Optional[np.ndarray]
        if factor_outputs:
            # target factors are greedily 'decoded'.
            factor_predictions = [npx.cast(np.expand_dims(np.argmin(self._interpolation(fs), axis=-1), axis=1), dtype='int32')
                                  for fs in zip(*factor_outputs)]
            if factor_predictions:
                target_factors = factor_predictions[0] if len(factor_predictions) == 1 \
                    else np.concatenate(factor_predictions, axis=1)
        return scores, new_states, target_factors
コード例 #3
0
def masked_softmax(X, valid_len):   # ToDo : Why masked softmax is necessary? What is valid_len?
    # X: 3-D tensor, valid_len: 1-D or 2-D tensor
    if valid_len is None:
        return npx.softmax(X)
    else:
        shape = X.shape
        if valid_len.ndim == 1:
            valid_len = valid_len.repeat(shape[1], axis=0)
        else:
            valid_len = valid_len.reshape(-1)
        # Fill masked elements with a large negative, whose exp is 0
        X = npx.sequence_mask(X.reshape(-1, shape[-1]), valid_len, True, axis=1, value=-1e6)
        return npx.softmax(X).reshape(shape)
コード例 #4
0
def masked_softmax(att_score, mask, axis: int = -1, temperature=None):
    """Ignore the masked elements when calculating the softmax. The mask can be broadcastable.

    Parameters
    ----------
    att_score : Symbol or NDArray
        Shape (..., length, ...)
    mask : Symbol or NDArray or None
        Shape (..., length, ...)
        1 --> The element is not masked
        0 --> The element is masked
    axis
        The axis to calculate the softmax. att_score.shape[axis] must be the same as mask.shape[axis]
    temperature
        The temperature. It scales down the scores before applying the softmax.

    Returns
    -------
    att_weights : Symborl or NDArray
        Shape (..., length, ...)
    """
    if mask is None:
        return npx.softmax(att_score, axis=axis, temperature=temperature)
    else:
        return npx.masked_softmax(att_score, mask=mask.astype(np.bool),
                                  axis=axis, temperature=temperature)
コード例 #5
0
def masked_softmax(X, valid_lens):
    """Perform softmax operation by masking elements on the last axis."""
    # `X`: 3D tensor, `valid_lens`: 1D or 2D tensor
    if valid_lens is None:
        return npx.softmax(X)
    else:
        shape = X.shape
        if valid_lens.ndim == 1:
            valid_lens = valid_lens.repeat(shape[1])
        else:
            valid_lens = valid_lens.reshape(-1)
        # On the last axis, replace masked elements with a very large negative
        # value, whose exponentiation outputs 0
        X = npx.sequence_mask(X.reshape(-1, shape[-1]), valid_lens, True,
                              value=-1e6, axis=1)
        return npx.softmax(X).reshape(shape)
コード例 #6
0
def masked_softmax(X, valid_len):
    """Perform softmax by filtering out some elements."""
    # X: 3-D tensor, valid_len: 1-D or 2-D tensor
    if valid_len is None:
        return npx.softmax(X)
    else:
        shape = X.shape
        if valid_len.ndim == 1:
            valid_len = valid_len.repeat(shape[1], axis=0)
        else:
            valid_len = valid_len.reshape(-1)
        # Fill masked elements with a large negative, whose exp is 0
        X = npx.sequence_mask(X.reshape(-1, shape[-1]),
                              valid_len,
                              True,
                              axis=1,
                              value=-1e6)
        return npx.softmax(X).reshape(shape)
コード例 #7
0
def test_softmax():
    input_data = np.ones((SMALL_Y, LARGE_X))
    for axis in [0, 1]:
        true_output = np.full((SMALL_Y, LARGE_X), (1 / input_data.shape[axis]))
        output = npx.softmax(input_data, axis=axis)
        assert_almost_equal(output.asnumpy(),
                            true_output,
                            rtol=1e-5,
                            atol=1e-5)
コード例 #8
0
def gumbel_softmax(logits,
                   temperature: float = 1.0,
                   eps: float = 1E-10,
                   hard=True,
                   use_np_gumbel: bool = True):
    r"""Perform the gumbel-softmax trick to generate differentiable one-hot vectors from the input
    logits.

    Here, the gumbel distribution is

    Gumbel(\alpha) = -log (-log U) + \log \alpha, in which U is the uniform(0, 1) distribution.

    A nice property of Gumbel is:

    \argmax({Gumbel(\alpha_i)}) \sim multinomial(\alpha_i)

    The Gumbel-Softmax trick is to use the softmax + straight-through estimator to produce
    one-hot vectors that represent the sampling result.

    References:

        1. https://en.wikipedia.org/wiki/Gumbel_distribution
        2. [ICLR2017] Categorical Reparameterization with Gumbel-Softmax

    Parameters
    ----------
    logits
        Logits. Shape (..., V)
    temperature
        The temperature that controls the
    eps
        The eps for stability of gradient
    hard
        Whether to use the straight-through estimator to produce one-hot vectors.
    use_np_gumbel
        Whether to use the random.gumble operator

    Returns
    -------
    ret
        The returned output. Shape (..., V)
    """
    # TODO(sxjscience) Investigate the impact of random.gumbel:
    #  Actually, random.gumble has no eps and may have problem in calculating the gradient.
    if use_np_gumbel:
        gumbels = np.random.gumbel(np.zeros_like(logits))
    else:
        u = np.random.uniform(np.zeros_like(logits), 1)
        gumbels = -np.log(-np.log(u + eps) + eps)
    y = npx.softmax((gumbels + logits) / temperature, axis=-1)
    if hard:
        y_hard = np.max(y, axis=-1, keepdims=True) == y
        y_hard = npx.stop_gradient(y_hard - y) + y
        return y_hard
    else:
        return y
コード例 #9
0
 def forward(self, A, B):
     # Shape of `A`/`B`: (b`atch_size`, no. of words in sequence A/B,
     # `embed_size`)
     # Shape of `f_A`/`f_B`: (`batch_size`, no. of words in sequence A/B,
     # `num_hiddens`)
     f_A = self.f(A)
     f_B = self.f(B)
     # Shape of `e`: (`batch_size`, no. of words in sequence A,
     # no. of words in sequence B)
     e = npx.batch_dot(f_A, f_B, transpose_b=True)
     # Shape of `beta`: (`batch_size`, no. of words in sequence A,
     # `embed_size`), where sequence B is softly aligned with each word
     # (axis 1 of `beta`) in sequence A
     beta = npx.batch_dot(npx.softmax(e), B)
     # Shape of `alpha`: (`batch_size`, no. of words in sequence B,
     # `embed_size`), where sequence A is softly aligned with each word
     # (axis 1 of `alpha`) in sequence B
     alpha = npx.batch_dot(npx.softmax(e.transpose(0, 2, 1)), A)
     return beta, alpha
コード例 #10
0
def masked_softmax(att_score, mask, dtype=np.float32, axis: int = -1):
    """Ignore the masked elements when calculating the softmax. The mask can be broadcastable.

    Parameters
    ----------
    att_score : Symborl or NDArray
        Shape (..., length, ...)
    mask : Symbol or NDArray or None
        Shape (..., length, ...)
        1 --> The element is not masked
        0 --> The element is masked
    dtype
        data type
    axis
        The axis to calculate the softmax. att_score.shape[axis] must be the same as mask.shape[axis]
    Returns
    -------
    att_weights : Symborl or NDArray
        Shape (..., length, ...)
    """
    if mask is not None:
        # Fill in the masked scores with a very small value
        neg = -1e18
        if _np.dtype(dtype) == np.float16:
            neg = -1e4
        else:
            try:
                # if AMP (automatic mixed precision) is enabled, -1e18 will cause NaN.
                from mxnet import amp
                if amp.amp._amp_initialized:
                    neg = -1e4
            except ImportError:
                pass

        att_score = np.where(mask, att_score, neg)
        logits = npx.softmax(att_score, axis=axis) * mask
    else:
        logits = npx.softmax(att_score, axis=axis)
    return logits
コード例 #11
0
ファイル: SSD.py プロジェクト: ar-mine/Captcha-Crack
def predict(X):
    anchors, cls_preds, bbox_preds = net(X.as_in_context(ctx[0]))
    cls_probs = npx.softmax(cls_preds).transpose(0, 2, 1)
    output = npx.multibox_detection(cls_probs, bbox_preds, anchors)
    idx = [i for i, row in enumerate(output[0]) if row[0] != -1]
    return output[0, idx]
コード例 #12
0
"""## Classifying the Testing Set and Submitting Results on Kaggle

After obtaining a satisfactory model design and hyperparameters, we use all training datasets (including validation sets) to retrain the model and then classify the testing set. Note that predictions are made by the output network we just trained.

"""

net = get_net(devices)
net.hybridize()
train(net, train_valid_iter, None, num_epochs, lr, wd, devices, lr_period,
      lr_decay)

preds = []
for data, label in test_iter:
    output_features = net.features(data.as_in_ctx(devices[0]))
    output = npx.softmax(net.output_new(output_features))
    preds.extend(output.asnumpy())
ids = sorted(os.listdir(
    os.path.join(data_dir, 'train_valid_test', 'test', 'unknown')))
with open('submission.csv', 'w') as f:
    f.write('id,' + ','.join(train_valid_ds.synsets) + '\n')
    for i, output in zip(ids, preds):
        f.write(i.split('.')[0] + ',' + ','.join(
            [str(num) for num in output]) + '\n')

"""After executing the above code, we will generate a "submission.csv" file. The
format of this file is consistent with the Kaggle competition requirements. The
method for submitting results is similar to method in
:numref:`sec_kaggle_house`.

コード例 #13
0
ファイル: actor_critic.py プロジェクト: tlby/mxnet
 def forward(self, x):
     x = self.dense(x)
     probs = self.action_pred(x)
     values = self.value_pred(x)
     return npx.softmax(probs), values