Esempio n. 1
0
def relative_position_bucket(relative_position,
                             bidirectional: bool = True,
                             num_buckets: int = 32,
                             max_distance: int = 128):
    """Map the relative position to buckets. The implementation is consistent with that
    in [mesh_tensorflow](https://github.com/tensorflow/mesh/blob/c59988047e49b4d2af05603e3170724cdbadc467/mesh_tensorflow/transformer/transformer_layers.py#L595-L637)
    where relative position is defined as `mem_i - query_j`. Thus, a positive value indicates 
    that the memory slot is in a later timestamp than the query slot. 

    After handling the bidirectional case (see below), the implementation uses the first half 
    of buckets to store exact differences and the second half to store the differences after 
    a logrithmic transformation. 

    Parameters
    ----------
    relative_position
        Shape (...,)
    bidirectional
        Whether we are dealing with bidirectional attention.
        If it's bidirectional, positive shifts are mappd to [0, num_buckets // 2), 
        and negative shifts are mapped to [num_buckets // 2, num_buckets). 
    num_buckets
        The number of buckets.
    max_distance
        Maximum distance. Positions that fall outside of 'max_distance' will be trimmed.

    Returns
    -------
    buckets
        Shape (...,).
        It has the same shape as the `relative_position`. It will have int32 type.
    """
    ret = 0
    relative_position = -relative_position
    if bidirectional:
        assert num_buckets % 2 == 0, 'When bidirectional is True, the number of buckets must be ' \
                                     'divisible by 2.'
        num_buckets //= 2
        ret = ret + (relative_position < 0).astype(np.int32) * num_buckets
        relative_position = np.abs(relative_position)
    else:
        # Clip all the negative values to 0
        relative_position = np.clip(relative_position, a_min=0, a_max=None)
    # Now, the relative_position is in the range [0, inf)

    # Half of the buckets deal with the exact increments,
    # i.e., 0, 1, 2, ..., max_exact - 1, where max_exact = num_buckets // 2
    max_exact = num_buckets // 2
    is_small = relative_position < max_exact

    # The other half of the buckets are for logarithmically bigger bins in positions up to
    # max_distance
    val_if_large = max_exact + (
        np.log(relative_position.astype(np.float32) / max_exact) /
        math.log(max_distance / max_exact) *
        (num_buckets - max_exact)).astype(np.int32)
    val_if_large = np.minimum(val_if_large, num_buckets - 1)
    ret = ret + np.where(is_small, relative_position, val_if_large)
    return ret
Esempio n. 2
0
 def check_minimum(x1, x2):
     mx_out = np.minimum(x1, x2)
     if isinstance(x1, np.ndarray) or isinstance(x2, np.ndarray):
         assert type(mx_out) == np.ndarray
     np_out = _np.minimum(
         x1.asnumpy() if isinstance(x1, np.ndarray) else x1,
         x2.asnumpy() if isinstance(x2, np.ndarray) else x2)
     assert same(
         mx_out.asnumpy() if isinstance(mx_out, np.ndarray) else mx_out,
         np_out)
Esempio n. 3
0
 def forward(self, hyp_lengths, reference_lengths):
     if self.weight == 0.0:
         if isinstance(hyp_lengths, (int, float)):
             return 0.0
         else:
             # subtract to avoid MxNet's warning of not using both arguments
             # this branch should not and is not used during inference
             return np.zeros_like(hyp_lengths - reference_lengths)
     else:
         # log_bp is always <= 0.0
         if isinstance(hyp_lengths, (int, float)):
             log_bp = min(0.0, 1.0 - reference_lengths / hyp_lengths)
         else:
             log_bp = np.minimum(np.zeros_like(hyp_lengths, dtype='float32'), 1.0 - reference_lengths / hyp_lengths)
         return self.weight * log_bp
Esempio n. 4
0
def test_minimum():
    inp1 = np.ones((INT_OVERFLOW, 2))
    inp1[-1, -1] = -1
    inp2 = np.zeros((INT_OVERFLOW, 1))
    inp1.attach_grad()
    inp2.attach_grad()
    with mx.autograd.record():
        out = np.minimum(inp1, inp2)
        out.backward()
    assert out.shape == inp1.shape
    assert out[-1, -1] == -1
    assert inp1.grad.shape == inp1.shape
    assert inp1.grad[-1, -1] == 1 and inp1.grad[0, 0] == 0
    assert inp2.grad.shape == inp2.shape
    assert inp2.grad[-1] == 1 and inp2.grad[0] == 2
def box_iou(boxes1, boxes2):
    print(boxes1)
    print(boxes2)
    """Compute IOU between two sets of boxes of shape (N,4) and (M,4)."""
    # Compute box areas
    box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) *
                              (boxes[:, 3] - boxes[:, 1]))
    area1 = box_area(boxes1)
    area2 = box_area(boxes2)
    lt = np.maximum(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
    rb = np.minimum(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
    wh = (rb - lt).clip(min=0)  # [N,M,2]
    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
    unioun = area1[:, None] + area2 - inter
    print("Go")
    print(inter / unioun)
    return inter / unioun
Esempio n. 6
0
    def get_corrupted_tokens(self, inputs, original_tokens, masked_positions, logits):
        """
        Sample from the generator to create corrupted input.

        Parameters
        ----------
        F
        inputs
            The masked input
            - layout = 'NT'
                Shape (batch_size, seq_length)
            - layout = 'TN'
                Shape (seq_length, batch_size)
        original_tokens
            The original tokens that appear in the unmasked input sequence
            Shape (batch_size, num_masked_positions).
        masked_positions
            The masked position of the sequence
            Shape (batch_size, num_masked_positions).
        logits
            The logits of each tokens
            Shape (batch_size, num_masked_positions, vocab_size)

        Returns
        -------
        corrupted_tokens
            Shape (batch_size, )
        fake_data
            - layout = 'NT'
                Shape (batch_size, seq_length)
            - layout = 'TN'
                Shape (seq_length, batch_size)
        labels
            - layout = 'NT'
                Shape (batch_size, seq_length)
            - layout = 'TN'
                Shape (seq_length, batch_size)
        """

        if self._disallow_correct:
            # TODO(sxjscience), Revise the implementation
            disallow = npx.one_hot(masked_positions, depth=self.vocab_size, dtype=self._dtype)
            logits = logits - 1000.0 * disallow
        # gumbel_softmax() samples from the logits with a noise of Gumbel distribution
        prob = gumbel_softmax(
            F,
            logits,
            temperature=self._temperature,
            eps=self._gumbel_eps,
            use_np_gumbel=False)
        corrupted_tokens = np.argmax(prob, axis=-1).astype(np.int32)

        if self.disc_backbone.layout == 'TN':
            inputs = inputs.T
        original_data = update_vectors_by_position(F,
            inputs, original_tokens, masked_positions)
        fake_data = update_vectors_by_position(F,
            inputs, corrupted_tokens, masked_positions)
        updates_mask = add_vectors_by_position(np.zeros_like(inputs),
                np.ones_like(masked_positions), masked_positions)
        # Dealing with multiple zeros in masked_positions which
        # results in a non-zero value in the first index [CLS]
        updates_mask = np.minimum(updates_mask, 1)
        labels = updates_mask * np.not_equal(fake_data, original_data)
        if self.disc_backbone.layout == 'TN':
            return corrupted_tokens, fake_data.T, labels.T
        else:
            return corrupted_tokens, fake_data, labels