예제 #1
0
파일: loss.py 프로젝트: bricksdont/sockeye
    def forward(self, logits: np.ndarray, labels: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        pred = npx.log_softmax(logits, axis=-1)

        # (batch, len)
        neg_log_likelihood = - npx.pick(pred,  # pylint: disable=invalid-unary-operand-type
                                        labels, axis=-1, keepdims=False)

        # label smoothing as in
        # https://github.com/dmlc/gluon-nlp/blob/b714eaccc67619d7bdcbd1574d30be87d9c73f0c/src/gluonnlp/loss.py#L4
        if self._alpha > 0:
            all_scores = np.sum(pred, axis=-1)
            neg_log_likelihood = (1 - self._alpha) * neg_log_likelihood - self._alpha / self._num_labels * all_scores

        # (batch, len,)
        valid_mask = labels != self.ignore_label

        # (batch, len)
        loss = neg_log_likelihood * valid_mask

        # (1,)
        num_valid = np.sum(valid_mask)

        # (1,)
        ce = np.sum(loss) * self.weight

        # we need to divide by num_valid here to backpropagate a 'valid' normalized loss value like in SoftmaxOutput.
        return ce / num_valid, np.ones((1,))
예제 #2
0
 def forward(self, x):
     square_of_sum = np.sum(self.embedding(x), axis=1)**2
     sum_of_square = np.sum(self.embedding(x)**2, axis=1)
     x = self.linear_layer(self.fc(x).sum(1)) \
         + 0.5 * (square_of_sum - sum_of_square).sum(1, keepdims=True)
     x = npx.sigmoid(x)
     return x
예제 #3
0
def optimize_quantization_mse(tensor, rounds=10):
    """
    Minimize mean squared error of quantizing a tensor, returning the top value
    (i.e. the one that quantizes to 127).  Scaling = 127.0 / return value.

    This is a convex optimization problem.  EM works but makes slow steps.
    Instead of EM, use binary search in the direction minimization suggests.
    """
    best_mse = math.inf
    best_top = None
    maxabs = npx.intgemm_maxabsolute(tensor)
    low = 0.0
    high = maxabs
    for _ in range(rounds):
        value = (low + high) / 2.0
        quant = npx.intgemm_prepare_data(tensor, value)
        quant_float = quant.astype(C.DTYPE_FP32)
        mse = (quant_float *
               (value / 127.0) - tensor).norm().item() / math.sqrt(
                   float(tensor.size))
        if mse < best_mse:
            best_mse = mse
            best_top = value
        # This optimizes scaling subject to cluster assignment.
        # It can be used for EM but the step is really slow, so use it for direction.
        scale = np.sum(quant_float * quant_float) / np.sum(
            quant_float * tensor)
        top = 127.0 / scale.item()
        if top < value:
            high = value
        else:
            low = value
    return best_top
예제 #4
0
 def forward(self, x):
     embed_x = self.embedding(x)
     square_of_sum = np.sum(embed_x, axis=1)**2
     sum_of_square = np.sum(embed_x**2, axis=1)
     inputs = np.reshape(embed_x, (-1, self.embed_output_dim))
     x = self.linear_layer(self.fc(x).sum(1)) \
         + 0.5 * (square_of_sum - sum_of_square).sum(1, keepdims=True) \
         + self.mlp(inputs)
     x = npx.sigmoid(x)
     return x
예제 #5
0
def evaluator(network, inter_matrix, test_data, ctx):
    scores = []
    for values in inter_matrix:
        feat = gluon.utils.split_and_load(values, ctx, even_split=False)
        scores.extend([network(i).asnumpy() for i in feat])
    recons = np.array([item for sublist in scores for item in sublist])
    # Calculate the test RMSE.
    rmse = np.sqrt(
        np.sum(np.square(test_data - np.sign(test_data) * recons)) /
        np.sum(np.sign(test_data)))
    return float(rmse)
예제 #6
0
def test_fully_connected():
    a = np.ones(shape=(LARGE_X, SMALL_Y))
    b = np.ones(shape=(SMALL_Y, SMALL_Y))
    c = np.ones(shape=(b.shape[0], ))

    # w/o bias
    res = mx.npx.fully_connected(a, b, num_hidden=b.shape[0], no_bias=True)
    assert np.sum(res[-1] == a.shape[1]) == b.shape[0]

    # w/ bias
    res = mx.npx.fully_connected(a, b, c, num_hidden=b.shape[0], no_bias=False)
    assert np.sum(res[-1] == a.shape[1] + 1) == b.shape[0]
예제 #7
0
파일: loss.py 프로젝트: bricksdont/sockeye
    def forward(self, length_predictions, labels):
        """
        Returns Poisson loss and output given data and expected integers as labels.

        :param length_predictions: Length predictions. Shape: (batch_size,).
        :param labels: Targets. Shape: (batch_size,).
        :return: Poisson loss of length predictions of the batch, and number of samples (batch size).
        """
        # (batch_size,)
        loss = length_predictions - labels * np.log(np.maximum(1e-10, length_predictions))
        # (1,)
        loss = np.sum(loss * self.weight)
        num_samples = np.sum(np.ones_like(length_predictions))
        return loss, num_samples
예제 #8
0
파일: loss.py 프로젝트: bricksdont/sockeye
    def forward(self, length_predictions, labels):
        """
        Returns MSE loss.

        :param length_predictions: Length predictions. Shape: (batch_size,).
        :param labels: Targets. Shape: (batch_size,).
        :return: MSE loss of length predictions of the batch.
        """
        # (batch_size,)
        loss = (self.weight / 2) * np.square(length_predictions - labels)
        # (1,)
        loss = np.sum(loss)
        num_samples = np.sum(np.ones_like(length_predictions))
        return loss, num_samples
예제 #9
0
    def forward(self, logits, labels, length_ratio, source_length,
                target_length):
        """
        :param logits: Model logits. Shape: (batch, length, vocab_size).
        :param labels: Gold targets. Shape: (batch, length).
        :param length_ratio: Length Ratios. Shape: (batch,).
        :param source_length: Source lengths. Shape: (batch,).
        :param target_length: Target lengths. Shape: (batch,).
        :return: Sequence scores. Shape: (batch,).
        """
        logprobs = npx.log_softmax(logits,
                                   axis=-1,
                                   temperature=self.softmax_temperature)

        # Select the label probability, then take their logs.
        # probs and scores: (batch_size, target_seq_len)
        token_scores = npx.pick(logprobs, labels, axis=-1)
        if self.score_type == C.SCORING_TYPE_NEGLOGPROB:
            token_scores = token_scores * -1

        # Sum, then apply length penalty. The call to `np.where` masks out invalid values from scores.
        # zeros and sums: (batch_size,)
        scores = np.sum(np.where(labels != 0, token_scores,
                                 np.zeros_like(token_scores)),
                        axis=1)

        if self.constant_length_ratio is not None and self.constant_length_ratio > 0.0:
            predicted_output_length = source_length * self.constant_length_ratio
        else:
            predicted_output_length = source_length * length_ratio

        scores = self.scorer(scores, target_length, predicted_output_length)

        return scores
예제 #10
0
    def forward(self, scores, target_dists, finished, best_hyp_indices):
        """
        Choose an extension of each hypothesis from its softmax distribution.

        :param scores: Vocabulary scores for the next beam step. (batch_size * beam_size, target_vocabulary_size)
        :param target_dists: The non-cumulative target distributions (ignored).
        :param finished: The list of finished hypotheses.
        :param best_hyp_indices: Best hypothesis indices constant.
        :return: The row indices, column indices, and values of the sampled words.
        """
        # Map the negative logprobs to probabilities so as to have a distribution
        target_dists = np.exp(-target_dists)

        # n == 0 means sample from the full vocabulary. Otherwise, we sample from the top n.
        if self.n != 0:
            # select the top n in each row, via a mask
            masked_items = npx.topk(target_dists, k=self.n, ret_typ='mask', axis=1, is_ascend=False)
            # set unmasked items to 0
            masked_items = np.where(masked_items, target_dists, masked_items)
            # renormalize
            target_dists = masked_items / np.sum(masked_items, axis=1, keepdims=True)

        # Sample from the target distributions over words, then get the corresponding values from the cumulative scores
        best_word_indices = npx.random.categorical(target_dists, get_prob=False)
        # Zeroes for finished hypotheses.
        best_word_indices = np.where(finished, np.zeros_like(best_word_indices), best_word_indices)
        values = npx.pick(scores, best_word_indices, axis=1, keepdims=True)

        best_hyp_indices = npx.slice_like(best_hyp_indices, best_word_indices, axes=(0,))

        return best_hyp_indices, best_word_indices, values
예제 #11
0
def get_rmse_log(net, X_train, y_train):
    """Gets root mse between the logarithms of the prediction and the truth."""
    num_train = X_train.shape[0]
    clipped_preds = np.clip(net(X_train), 1, float('inf'))
    return np.sqrt(
        2 *
        np.sum(square_loss(np.log(clipped_preds), np.log(y_train))).item() /
        num_train)
예제 #12
0
def products(A):
    x = np.arange(4)
    y = np.ones(4)
    print("x . y : {}, {}".format(np.dot(x, y), np.sum(x * y)))
    print("A . x : {} has shape {}".format(np.dot(A, x), np.dot(A, x).shape))
    B = np.ones(shape=(4, 3))
    print("A . B : {} has shape {}".format(np.dot(A, B), np.dot(A, B).shape))
    print("{}.{} has shape {}".format(A.shape, B.shape, np.dot(A, B).shape))
예제 #13
0
def test_np_sum():
    class TestSum(HybridBlock):
        def __init__(self, axis=None, dtype=None, keepdims=False):
            super(TestSum, self).__init__()
            self._axis = axis
            self._dtype = dtype
            self._keepdims = keepdims

        def hybrid_forward(self, F, a, *args, **kwargs):
            return F.np.sum(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims)

    def is_int(dtype):
        return 'int' in dtype

    in_data_dim = random.choice([2, 3, 4])
    shape = rand_shape_nd(in_data_dim, dim=3)
    acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64',
                'int8': 'int32', 'int32': 'int64', 'int64': 'int64'}
    for hybridize in [False, True]:
        for keepdims in [True, False]:
            for axis in ([i for i in range(in_data_dim)] + [(), None]):
                for itype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']:
                    for dtype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']:
                        if is_int(dtype) and not is_int(itype):
                            continue
                        # test gluon
                        test_sum = TestSum(axis=axis, dtype=dtype, keepdims=keepdims)
                        if hybridize:
                            test_sum.hybridize()
                        if is_int(itype):
                            x = _np.random.randint(-128, 128, shape, dtype=itype)
                            x = mx.nd.array(x)
                        else:
                            x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype)
                        x = x.as_np_ndarray()
                        x.attach_grad()
                        expected_ret = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims)
                        expected_ret = expected_ret.astype(dtype)
                        with mx.autograd.record():
                            y = test_sum(x)
                        assert y.shape == expected_ret.shape
                        assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3,
                                            atol=1e-5 if dtype == 'float16' else 1e-5)

                        y.backward()
                        assert same(x.grad.asnumpy(), _np.ones(shape=x.shape, dtype=x.dtype))

                        # test numeric
                        if itype == 'float32' and dtype == 'float32':
                            x_sym = mx.sym.Variable("x").as_np_ndarray()
                            mx_sym = mx.sym.np.sum(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray()
                            check_numeric_gradient(mx_sym, [x.as_nd_ndarray()],
                                                   numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32)

                        # test imperative
                        mx_out = np.sum(x, axis=axis, dtype=dtype, keepdims=keepdims)
                        np_out = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype)
                        assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5)
예제 #14
0
 def forward(self, user_id, item_id):
     p_mf = self.P(user_id)
     q_mf = self.Q(item_id)
     gmf = p_mf * q_mf
     p_mlp = self.U(user_id)
     q_mlp = self.V(item_id)
     mlp = self.mlp(np.concatenate([p_mlp, q_mlp], axis=1))  # 1024*20
     con_res = np.concatenate([gmf, mlp], axis=1)
     return np.sum(con_res, axis=-1)  # 1024*1
예제 #15
0
def test_sum():
    inp = np.zeros((2, INT_OVERFLOW))
    inp[-1, -1] = 10
    inp.attach_grad()
    with mx.autograd.record():
        out1 = np.sum(inp, axis=1)
        out1.backward()
    assert out1.shape == (2, )
    assert out1[0] == 0 and out1[1] == 10
    assert inp.grad.shape == inp.shape
    assert inp.grad[-1, -1] == 1
    with mx.autograd.record():
        out2 = np.sum(inp, axis=0)
        out2.backward()
    assert out2.shape == (INT_OVERFLOW, )
    assert out2[0] == 0 and out2[-1] == 10
    assert inp.grad.shape == inp.shape
    assert inp.grad[-1, -1] == 1
예제 #16
0
def test_samplek_func(batch_size, beam_size, target_vocab_size, top_n):
    pytest.importorskip("mxnet")
    from mxnet import np
    import sockeye.beam_search

    # arrange scores increasing values from left to right, so the best item is always index 0, next-best 1, and so on
    scores = np.array([
        list(range(1, target_vocab_size + 1))
        for _ in range(batch_size * beam_size)
    ])
    # normalize
    target_dists = scores / scores.sum(axis=1, keepdims=True)

    samplek = sockeye.beam_search.SampleK(n=top_n)
    samplek.initialize()

    sample_best_hyp_indices = np.arange(0,
                                        batch_size * beam_size,
                                        dtype='int32')

    # 0..(batch_size * beam_size)-1
    expected_hyps = np.array(range(batch_size * beam_size), dtype='int32')
    finished = (np.random.uniform(0, 1, (batch_size * beam_size)) >
                0.5).astype('int32')

    for i in [1, 2]:
        if i == 2:
            samplek.hybridize()

        hyps, words, values = samplek(scores, scores, finished,
                                      sample_best_hyp_indices)
        assert hyps.shape[0] == batch_size * beam_size

        # The indices should always be the integers from 0 to batch*beam-1
        assert sum(hyps == expected_hyps).item() == (batch_size * beam_size)
        if top_n != 0:
            # Scores are increasing left-to-right, so best items are all the lowest word IDs.
            # No word id greater than the cap (top_n) should be selected
            assert np.sum(words >= top_n).item() == 0

        # word index should be zero for all finished hypotheses
        assert np.sum(np.where(finished, words, finished)).item() == 0
예제 #17
0
def test_np_loss_ndarray():
    # Ported from test_loss.test_loss_ndarray
    output = np.array([1, 2, 3, 4])
    label = np.array([1, 3, 5, 7])
    weighting = np.array([0.5, 1, 0.5, 1])

    loss = gluon.loss.L1Loss()
    assert float(np.sum(loss(output, label))) == 6.
    loss = gluon.loss.L1Loss(weight=0.5)
    assert float(np.sum(loss(output, label))) == 3.
    loss = gluon.loss.L1Loss()
    assert float(np.sum(loss(output, label, weighting))) == 5.

    loss = gluon.loss.L2Loss()
    assert float(np.sum(loss(output, label))) == 7.
    loss = gluon.loss.L2Loss(weight=0.25)
    assert float(np.sum(loss(output, label))) == 1.75
    loss = gluon.loss.L2Loss()
    assert float(np.sum(loss(output, label, weighting))) == 6

    output = np.array([[0, 2], [1, 4]])
    label = np.array([0, 1])
    weighting = np.array([[0.5], [1.0]])

    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    L = loss(output, label).asnumpy()
    assert_almost_equal(L, _np.array([2.12692809,  0.04858733]), use_broadcast=False, rtol=1e-3)

    L = loss(output, label, weighting).asnumpy()
    assert_almost_equal(L, _np.array([1.06346405,  0.04858733]), use_broadcast=False, rtol=1e-3)
예제 #18
0
def pixel_accuracy(output, y):
    '''
        binary class prediction accuracy. 
        output is dim(B, 1, W, H, {D})
        target is dim(B,  W, H, {D})
    '''
    true_pos = np.sum(y)
    if output.shape[1] == 1:
        classes = (output > 0.5).astype('float32')
    if output.shape[1] == 2:
        classes = np.argmax(output, axis=1)
    acc = (classes.astype('bool') * y.astype('bool')).sum()
    # print('Acc:',acc)
    if true_pos > 0:
        pix_acc = acc / y.sum().astype('float32')
    if true_pos == 0 and acc == 0:
        pix_acc = 1.0
    if true_pos == 0 and acc != 0:
        pix_acc = 0.0

    return pix_acc
예제 #19
0
    def forward(self, source_encoded: np.ndarray,
                source_encoded_length: np.ndarray) -> np.ndarray:
        """
        Transformation to the length ratio. Returns a vector.

        :param source_encoded: Encoder representation for n elements. Shape: (n, source_encoded_length, hidden_size).
        :param source_encoded_length: A vector of encoded sequence lengths. Shape: (n,).
        :return: Predictions of the ratio length(hypothesis)/length(reference). Shape(n, 1).
        """
        # source_masked: (n, source_encoded_length, hidden_size)
        source_masked = npx.sequence_mask(
            source_encoded,
            axis=1,
            sequence_length=source_encoded_length,
            use_sequence_length=True,
            value=0.)
        # calculate the proper means of encoded sources
        # data: (n, hidden_size)
        data = np.sum(source_masked, axis=1, keepdims=False) / np.reshape(
            source_encoded_length, (-1, 1))
        # MLP. Shape: (n, 1)
        data = self.layers(data)
        # Shape: (n,)
        return np.squeeze(data)
예제 #20
0
    def inner_prod(self, prob, label):
        prod = prob * label
        prod = FF.sum(prod, axis=self.axis, keepdims=True)

        return prod
예제 #21
0
def test_get_training_data_iters():
    pytest.importorskip('mxnet')
    from sockeye import data_io
    from mxnet import np
    from sockeye.test_utils import tmp_digits_dataset
    train_line_count = 100
    train_line_count_empty = 0
    train_max_length = 30
    dev_line_count = 20
    dev_max_length = 30
    expected_mean = 1.0
    expected_std = 0.0
    test_line_count = 20
    test_line_count_empty = 0
    test_max_length = 30
    batch_size = 5
    num_source_factors = num_target_factors = 1
    with tmp_digits_dataset("tmp_corpus",
                            train_line_count, train_line_count_empty, train_max_length - C.SPACE_FOR_XOS,
                            dev_line_count, dev_max_length - C.SPACE_FOR_XOS,
                            test_line_count, test_line_count_empty,
                            test_max_length - C.SPACE_FOR_XOS) as data:
        # tmp common vocab
        vcb = vocab.build_from_paths([data['train_source'], data['train_target']])

        train_iter, val_iter, config_data, data_info = data_io.get_training_data_iters(
            sources=[data['train_source']],
            targets=[data['train_target']],
            validation_sources=[data['dev_source']],
            validation_targets=[data['dev_target']],
            source_vocabs=[vcb],
            target_vocabs=[vcb],
            source_vocab_paths=[None],
            target_vocab_paths=[None],
            shared_vocab=True,
            batch_size=batch_size,
            batch_type=C.BATCH_TYPE_SENTENCE,
            batch_num_devices=1,
            max_seq_len_source=train_max_length,
            max_seq_len_target=train_max_length,
            bucketing=True,
            bucket_width=10)
        assert isinstance(train_iter, data_io.ParallelSampleIter)
        assert isinstance(val_iter, data_io.ParallelSampleIter)
        assert isinstance(config_data, data_io.DataConfig)
        assert data_info.sources == [data['train_source']]
        assert data_info.targets == [data['train_target']]
        assert data_info.source_vocabs == [None]
        assert data_info.target_vocabs == [None]
        assert config_data.data_statistics.max_observed_len_source == train_max_length
        assert config_data.data_statistics.max_observed_len_target == train_max_length
        assert np.isclose(config_data.data_statistics.length_ratio_mean, expected_mean)
        assert np.isclose(config_data.data_statistics.length_ratio_std, expected_std)

        assert train_iter.batch_size == batch_size
        assert val_iter.batch_size == batch_size
        assert train_iter.default_bucket_key == (train_max_length, train_max_length)
        assert val_iter.default_bucket_key == (dev_max_length, dev_max_length)
        assert train_iter.dtype == 'float32'

        # test some batches
        bos_id = vcb[C.BOS_SYMBOL]
        eos_id = vcb[C.EOS_SYMBOL]
        expected_first_target_symbols = np.full((batch_size, 1), bos_id, dtype='float32')
        for epoch in range(2):
            while train_iter.iter_next():
                batch = train_iter.next()
                assert isinstance(batch, data_io.Batch)
                source = batch.source
                target = batch.target
                label = batch.labels[C.TARGET_LABEL_NAME]  # TODO: still 2-shape: (batch, length)
                length_ratio_label = batch.labels[C.LENRATIO_LABEL_NAME]
                assert source.shape[0] == target.shape[0] == label.shape[0] == batch_size
                assert source.shape[2] == target.shape[2] == num_source_factors == num_target_factors
                # target first symbol should be BOS
                # each source sequence contains one EOS symbol
                assert np.sum(source == eos_id) == batch_size
                assert np.array_equal(target[:, 0], expected_first_target_symbols)
                # label first symbol should be 2nd target symbol
                assert np.array_equal(label[:, 0], target[:, 1, 0])
                # each label sequence contains one EOS symbol
                assert np.sum(label == eos_id) == batch_size
            train_iter.reset()
예제 #22
0
A / sum_A

# we can call the cumsum function
# this function will not reduce the input tensor along any axis.
A.cumsum(axis=0)



############### 2.3.7. Dor Products ###############
y = np.ones(4)
x
y
np.dot(x, y)

# we can express the dot product of two vectors equivalently by performing an elementwise multiplication and then a sum:
np.sum(x * y)



############### 2.3.8. Matrix-Vector Products ###############
# we can begin to understand matrix-vector products
A.shape, x.shape, np.dot(A, x)



############### 2.3.9. Matrix-Matrix Multiplication ###############
# if you have gotten the hang of dot products and matrix-vector products, then matrix-matrix multiplication should be straightforward.
B = np.ones(shape=(4, 3))
np.dot(A, B)

 def forward(self, positive, negative, margin=1):
     distances = positive - negative
     loss = np.sum(np.maximum(- distances + margin, 0))
     return loss
예제 #24
0
    def inner_prod(self, prob, label):
        prod = prob * label
        prod = FF.sum(prod, axis=self.axis)

        return prod
예제 #25
0
    def dynamic_masking(self, input_ids, valid_lengths):
        # TODO(zheyuye), two additional flag `disallow_from_mask` and `already_masked`
        # that control the masking status for each positions in the sequence.
        """
        Generate masking positions on-the-fly instead of during preprocessing
        Parameters
        ----------
        input_ids
            The batchified input_ids with shape (batch_size, max_seq_length)
        valid_lengths
            The batchified valid_lengths with shape (batch_size, )
        Returns
        ------
        masked_input_ids
            The masked input sequence with 15% tokens are masked with [MASK]
            shape (batch_size, max_seq_length)
        length_masks
            The masking matrix for the whole sequence that indicates the positions
            are greater than valid_length.

            shape (batch_size, max_seq_length)
        unmasked_tokens
            The original tokens that appear in the unmasked input sequence
            shape (batch_size, num_masked_positions)
        masked_positions
            The masking positions in mx.np.ndarray with shape (batch_size, num_masked_positions)
            shape (batch_size, num_masked_positions)
        masked_lm_weights
            The weight matrix containing 0 or 1 to mark the actual effect of masked positions
            shape (batch_size, num_masked_positions)
        """
        N = self._max_num_masked_position
        # Only valid token without special token are allowed to mask
        valid_candidates = np.ones_like(input_ids, dtype=np.bool)
        ignore_tokens = [
            self.vocab.cls_id, self.vocab.sep_id, self.vocab.pad_id
        ]

        for ignore_token in ignore_tokens:
            # TODO(zheyuye), Update when operation += supported
            valid_candidates = valid_candidates * \
                np.not_equal(input_ids, ignore_token)
        valid_lengths = valid_lengths.astype(np.float32)
        valid_candidates = valid_candidates.astype(np.float32)
        num_masked_position = mxnp.maximum(
            1, np.minimum(N, round(valid_lengths * self._mask_prob)))

        # Get the masking probability of each position
        sample_probs = self._proposal_distribution * valid_candidates
        sample_probs /= mxnp.sum(sample_probs, axis=-1, keepdims=True)
        sample_probs = npx.stop_gradient(sample_probs)
        gumbels = mxnp.random.gumbel(np.zeros_like(sample_probs))
        # Following the instruction of official repo to avoid deduplicate postions
        # with Top_k Sampling as https://github.com/google-research/electra/issues/41
        masked_positions = npx.topk(mxnp.log(sample_probs) + gumbels,
                                    k=N,
                                    axis=-1,
                                    ret_typ='indices',
                                    dtype=np.int32)

        masked_weights = npx.sequence_mask(mxnp.ones_like(masked_positions),
                                           sequence_length=num_masked_position,
                                           use_sequence_length=True,
                                           axis=1,
                                           value=0)
        masked_positions = masked_positions * masked_weights
        length_masks = npx.sequence_mask(mxnp.ones_like(input_ids,
                                                        dtype=np.float32),
                                         sequence_length=valid_lengths,
                                         use_sequence_length=True,
                                         axis=1,
                                         value=0)
        unmasked_tokens = select_vectors_by_position(
            input_ids, masked_positions) * masked_weights
        masked_weights = masked_weights.astype(np.float32)
        replaced_positions = (mxnp.random.uniform(
            mxnp.zeros_like(masked_positions), mxnp.ones_like(
                masked_positions)) < self._replace_prob) * masked_positions
        # dealing with multiple zero values in replaced_positions which causes
        # the [CLS] being replaced
        filled = mxnp.where(replaced_positions, self.vocab.mask_id,
                            self.vocab.cls_id).astype(np.int32)
        # Masking token by replacing with [MASK]
        masked_input_ids = update_vectors_by_position(input_ids, filled,
                                                      replaced_positions)

        # Note: It is likely have multiple zero values in masked_positions if number of masked of
        # positions not reached the maximum. However, this example hardly exists since valid_length
        # is almost always equal to max_seq_length
        masked_input = self.MaskedInput(input_ids=masked_input_ids,
                                        masks=length_masks,
                                        unmasked_tokens=unmasked_tokens,
                                        masked_positions=masked_positions,
                                        masked_weights=masked_weights)
        return masked_input
 def forward(self, positive, negative):
     distances = positive - negative
     loss = - np.sum(np.log(npx.sigmoid(distances)), 0, keepdims=True)
     return loss
예제 #27
0
############### 2.1.2. Operations ###############

# import
from mxnet import np, npx
npx.set_np()

# 기본 계산
x = np.array([1, 2, 4, 8])
y = np.array([2, 2, 2, 2])
x + y  # 더하기
x - y  # 빼기
x * y  # 곱하기
x / y  # 나누기
x**y  # 제곱

# Many more operations can be applied elementwise, including unary operators like exponentiation.
np.exp(x)

# concatenate multiple ndarrays together
x = np.arange(12).reshape(3, 4)
y = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
np.concatenate([x, y], axis=0)
np.concatenate([x, y], axis=1)

# logical statements 로 나타내기
x == y

# 모든 요소 합하기
x.sum()
np.sum(x)
예제 #28
0
        train_acc_sum = sum(
            d2l.accuracy(py.asnumpy(), y.asnumpy()) for py, y in zip(pys, ys))
        l, acc = train_loss_sum, train_acc_sum
        metric.add(l, acc, ys_in.shape[0], ys_in.size)
        timer.stop()
        if (i + 1) % (num_batches // 5) == 0:
            animator.add(
                epoch + i / num_batches,
                (metric[0] / metric[2], metric[1] / metric[3], None, None))

    # val_acc = d2l.evaluate_accuracy_gpus(net, val_iter, split_f)
    metric_val = d2l.Accumulator(2)  # num_corrected_examples, num_examples
    for i, (Xs_in, ys_in) in enumerate(DataLoader_Single_test):
        Xs = gluon.utils.split_and_load(Xs_in.astype("float32"), ctx)
        ys = gluon.utils.split_and_load(ys_in.astype("float32"), ctx)
        pys = [net(X) for X in Xs]
        ls = [loss(py, y) for py, y in zip(pys, ys)]
        val_loss_sum = sum([float(l.sum().asnumpy()[0]) for l in ls])

        OA_val = np.sum(
            np.argmax(pys[0].asnumpy(), axis=1) == ys[0].asnumpy()).astype(
                "float32") / np.prod(ys[0].shape)
        metric_val.add(OA_val, len(ys))

    val_acc = OA_val
    animator.add(epoch + 1,
                 (None, None, val_loss_sum / ys_in.shape[0], val_acc))
print('loss %.3f, train acc %.3f, val acc %.3f' %
      (metric[0] / metric[2], metric[1] / metric[3], val_acc))
print('%.1f examples/sec on %s' %
      (metric[2] * num_epochs / timer.sum(), d2l.try_all_gpus()))
예제 #29
0
def loss(y_hat, y):
    m = y.shape[0]
    p = softmax(y_hat)
    return np.sum(-np.log(p[range(m), y]))
예제 #30
0
def softmax(y_hat):
    exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)