def permute_hidden(self, hx, index=None): """permute hx by index Args: hx: tuple(h, c), hidden and cell state. index: index Returns: sorted hx """ if index is None: return hx h = layers.index_select(hx[0], index, dim=1) c = layers.index_select(hx[1], index, dim=1) return h, c
def reorder_neurons_matrix(linearLayer, index, dim): W = L.index_select(linearLayer.weight, index, dim=dim).detach() if linearLayer.bias is not None: if dim == 0: b = L.assign(linearLayer.bias).detach() else: b = L.assign(L.index_select(linearLayer.bias, index, dim=0)).detach() linearLayer.weight.stop_gradient = True linearLayer.weight.set_value(W) linearLayer.weight.stop_gradient = False if linearLayer.bias is not None: linearLayer.bias.stop_gradient = True linearLayer.bias.set_value(b) linearLayer.bias.stop_gradient = False
def pack_padded_sequence(self, x, mask, pad_index): """ Packs a padded sequences x. Args: x: input matrix mask: mask matrix pad_index: pad_index Returns: new_x: output batch_sizes: sort batch_size by step. sorted_indices: The index of x sorted by length >>> x [ [5, 6, 7, 0], [1, 2, 3, 4], [8, 9, 0, 0] ] >>> mask [ [True, True, True, False], [True, True, True, True], [True, True, False, False] ] >>> self.pack_padded_sequence(x, mask, 0) [1, 5, 8, 2, 6 ,9 , 3 , 7, 4] """ # sentence length mask = layers.cast(mask, 'int64') lens = layers.reduce_sum(mask, dim=-1) # Sort by sentence length in descending order _, sorted_indices = layers.argsort(lens, descending=True) sorted_x = layers.index_select(x, sorted_indices) sorted_mask = layers.index_select(mask, sorted_indices) # transpose t_x = layers.transpose(sorted_x, perm=[1, 0, 2]) t_mask = layers.transpose(sorted_mask, perm=[1, 0]) # mask_select new_x = nn.masked_select(t_x, t_mask) # Batch by step batch_sizes = layers.reduce_sum(t_mask, -1) # remove zero batch_sizes = nn.masked_select(batch_sizes, batch_sizes != 0) return new_x, batch_sizes.numpy().tolist(), sorted_indices
def pad_packed_sequence(self, x, batch_sizes, unsorted_indices): """Pads a packed sequences.""" h_size = x.shape[1] split_x = layers.split(x, batch_sizes, dim=0) max_bs = batch_sizes[0] step_embs = [] for step, cur_bs in enumerate(batch_sizes): pad_emb = layers.zeros(shape=(max_bs - cur_bs, h_size), dtype=x.dtype) step_emb = layers.concat(input=(split_x[step], pad_emb)) step_embs.append(step_emb) new_x = layers.stack(step_embs, axis=1) new_x = layers.index_select(new_x, unsorted_indices) return new_x
def _transpose_shift(E): """ -3 -2 -1 0 1 2 -30 -20 -10 00 10 20 -300 -200 -100 000 100 200 to 0 -10 -200 1 00 -100 2 10 000 :param E: batch_size x n_head x max_len x 2max_len :return: batch_size x n_head x max_len x max_len """ bsz, n_head, max_len, _ = E.size() zero_pad = layers.zeros(shape=(bsz, n_head, max_len, 1)) E = layers.reshape(x=layers.concat([E, zero_pad], axis=-1), shape=(bsz, n_head, -1, max_len)) indice = layers.arange(start=0, end=max_len, dtype=int) E = layers.index_select(input=E, index=indice, dim=-2) E = layers.transpose(E, perm=[0, 1, 3, 2]) return E
def reorder_head(layer, idx): n, a = layer.n_head, layer.d_key index = L.reshape(L.index_select(L.reshape(L.arange(0, n * a, dtype='int64'), shape=[n, a]), idx, dim=0), shape=[-1]) def reorder_head_matrix(linearLayer, index, dim=1): W = L.index_select(linearLayer.weight, index, dim=dim).detach() if linearLayer.bias is not None: if dim == 0: b = L.assign(linearLayer.bias).detach() else: b = L.assign(L.index_select(linearLayer.bias, index, dim=0)).detach() linearLayer.weight.stop_gradient = True linearLayer.weight.set_value(W) linearLayer.weight.stop_gradient = False if linearLayer.bias is not None: linearLayer.bias.stop_gradient = True linearLayer.bias.set_value(b) linearLayer.bias.stop_gradient = False reorder_head_matrix(layer.q.fn if hasattr(layer.q, 'fn') else layer.q, index) reorder_head_matrix(layer.k.fn if hasattr(layer.k, 'fn') else layer.k, index) reorder_head_matrix(layer.v.fn if hasattr(layer.v, 'fn') else layer.v, index) reorder_head_matrix(layer.o.fn if hasattr(layer.o, 'fn') else layer.o, index, dim=0)
def SinusoidalEmbedding(self, input): """ This function produces sinusoidal positional embeddings of any length. Padding symbols are ignored. Args: input: shaped like [bsz, seq_len]. embedding_dim: dimension for each position. padding_idx: init_size: """ bsz, seq_len = input.shape max_pos = self.padding_idx + seq_len if max_len > self.origin_shift: self.weights = self.get_embedding( max_pos * 2, self.embedding_dim, self.padding_idx ) positions = layers.arange(-seq_len, seq_len, dtype='long') + self.origin_shift embed = layers.index_select(input=self.weights, index=positions, dim=0) return emb