def relative_position_bucket(relative_position, bidirectional: bool = True, num_buckets: int = 32, max_distance: int = 128): """Map the relative position to buckets. The implementation is consistent with that in [mesh_tensorflow](https://github.com/tensorflow/mesh/blob/c59988047e49b4d2af05603e3170724cdbadc467/mesh_tensorflow/transformer/transformer_layers.py#L595-L637) where relative position is defined as `mem_i - query_j`. Thus, a positive value indicates that the memory slot is in a later timestamp than the query slot. After handling the bidirectional case (see below), the implementation uses the first half of buckets to store exact differences and the second half to store the differences after a logrithmic transformation. Parameters ---------- relative_position Shape (...,) bidirectional Whether we are dealing with bidirectional attention. If it's bidirectional, positive shifts are mappd to [0, num_buckets // 2), and negative shifts are mapped to [num_buckets // 2, num_buckets). num_buckets The number of buckets. max_distance Maximum distance. Positions that fall outside of 'max_distance' will be trimmed. Returns ------- buckets Shape (...,). It has the same shape as the `relative_position`. It will have int32 type. """ ret = 0 relative_position = -relative_position if bidirectional: assert num_buckets % 2 == 0, 'When bidirectional is True, the number of buckets must be ' \ 'divisible by 2.' num_buckets //= 2 ret = ret + (relative_position < 0).astype(np.int32) * num_buckets relative_position = np.abs(relative_position) else: # Clip all the negative values to 0 relative_position = np.clip(relative_position, a_min=0, a_max=None) # Now, the relative_position is in the range [0, inf) # Half of the buckets deal with the exact increments, # i.e., 0, 1, 2, ..., max_exact - 1, where max_exact = num_buckets // 2 max_exact = num_buckets // 2 is_small = relative_position < max_exact # The other half of the buckets are for logarithmically bigger bins in positions up to # max_distance val_if_large = max_exact + ( np.log(relative_position.astype(np.float32) / max_exact) / math.log(max_distance / max_exact) * (num_buckets - max_exact)).astype(np.int32) val_if_large = np.minimum(val_if_large, num_buckets - 1) ret = ret + np.where(is_small, relative_position, val_if_large) return ret
def check_minimum(x1, x2): mx_out = np.minimum(x1, x2) if isinstance(x1, np.ndarray) or isinstance(x2, np.ndarray): assert type(mx_out) == np.ndarray np_out = _np.minimum( x1.asnumpy() if isinstance(x1, np.ndarray) else x1, x2.asnumpy() if isinstance(x2, np.ndarray) else x2) assert same( mx_out.asnumpy() if isinstance(mx_out, np.ndarray) else mx_out, np_out)
def forward(self, hyp_lengths, reference_lengths): if self.weight == 0.0: if isinstance(hyp_lengths, (int, float)): return 0.0 else: # subtract to avoid MxNet's warning of not using both arguments # this branch should not and is not used during inference return np.zeros_like(hyp_lengths - reference_lengths) else: # log_bp is always <= 0.0 if isinstance(hyp_lengths, (int, float)): log_bp = min(0.0, 1.0 - reference_lengths / hyp_lengths) else: log_bp = np.minimum(np.zeros_like(hyp_lengths, dtype='float32'), 1.0 - reference_lengths / hyp_lengths) return self.weight * log_bp
def test_minimum(): inp1 = np.ones((INT_OVERFLOW, 2)) inp1[-1, -1] = -1 inp2 = np.zeros((INT_OVERFLOW, 1)) inp1.attach_grad() inp2.attach_grad() with mx.autograd.record(): out = np.minimum(inp1, inp2) out.backward() assert out.shape == inp1.shape assert out[-1, -1] == -1 assert inp1.grad.shape == inp1.shape assert inp1.grad[-1, -1] == 1 and inp1.grad[0, 0] == 0 assert inp2.grad.shape == inp2.shape assert inp2.grad[-1] == 1 and inp2.grad[0] == 2
def box_iou(boxes1, boxes2): print(boxes1) print(boxes2) """Compute IOU between two sets of boxes of shape (N,4) and (M,4).""" # Compute box areas box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])) area1 = box_area(boxes1) area2 = box_area(boxes2) lt = np.maximum(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] rb = np.minimum(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] wh = (rb - lt).clip(min=0) # [N,M,2] inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] unioun = area1[:, None] + area2 - inter print("Go") print(inter / unioun) return inter / unioun
def get_corrupted_tokens(self, inputs, original_tokens, masked_positions, logits): """ Sample from the generator to create corrupted input. Parameters ---------- F inputs The masked input - layout = 'NT' Shape (batch_size, seq_length) - layout = 'TN' Shape (seq_length, batch_size) original_tokens The original tokens that appear in the unmasked input sequence Shape (batch_size, num_masked_positions). masked_positions The masked position of the sequence Shape (batch_size, num_masked_positions). logits The logits of each tokens Shape (batch_size, num_masked_positions, vocab_size) Returns ------- corrupted_tokens Shape (batch_size, ) fake_data - layout = 'NT' Shape (batch_size, seq_length) - layout = 'TN' Shape (seq_length, batch_size) labels - layout = 'NT' Shape (batch_size, seq_length) - layout = 'TN' Shape (seq_length, batch_size) """ if self._disallow_correct: # TODO(sxjscience), Revise the implementation disallow = npx.one_hot(masked_positions, depth=self.vocab_size, dtype=self._dtype) logits = logits - 1000.0 * disallow # gumbel_softmax() samples from the logits with a noise of Gumbel distribution prob = gumbel_softmax( F, logits, temperature=self._temperature, eps=self._gumbel_eps, use_np_gumbel=False) corrupted_tokens = np.argmax(prob, axis=-1).astype(np.int32) if self.disc_backbone.layout == 'TN': inputs = inputs.T original_data = update_vectors_by_position(F, inputs, original_tokens, masked_positions) fake_data = update_vectors_by_position(F, inputs, corrupted_tokens, masked_positions) updates_mask = add_vectors_by_position(np.zeros_like(inputs), np.ones_like(masked_positions), masked_positions) # Dealing with multiple zeros in masked_positions which # results in a non-zero value in the first index [CLS] updates_mask = np.minimum(updates_mask, 1) labels = updates_mask * np.not_equal(fake_data, original_data) if self.disc_backbone.layout == 'TN': return corrupted_tokens, fake_data.T, labels.T else: return corrupted_tokens, fake_data, labels