def lookup(self, data_index: nd.NDArray): """Return the distribution for the data batch.""" shape = self._mean_emb.shape self.mean = nd.Embedding(data_index, self._mean_emb.data(), *shape) variance_arg = nd.Embedding( data_index, self._variance_arg_emb.data(), *shape) self.variance = self.link_function(variance_arg) if hasattr(self._mean_emb, 'n_repeats'): self.mean_repeated = util.repeat_emb(self._mean_emb, self.mean) if hasattr(self._variance_arg_emb, 'n_repeats'): self.variance_repeated = self.link_function( util.repeat_emb(self._variance_arg_emb, variance_arg)) return self
def __getitem__(self, tokens): """Looks up embedding vectors of text tokens. Parameters ---------- tokens : str or list of strs A token or a list of tokens. Returns ------- mxnet.ndarray.NDArray: The embedding vector(s) of the token(s). According to numpy conventions, if `tokens` is a string, returns a 1-D NDArray (vector); if `tokens` is a list of strings, returns a 2-D NDArray (matrix) of shape=(len(tokens), vec_len). """ to_reduce = not isinstance(tokens, (list, tuple)) if to_reduce: tokens = [tokens] indices = [self._token_to_idx[token] for token in tokens] vecs = nd.Embedding(nd.array(indices), self.idx_to_vec, self.idx_to_vec.shape[0], self.idx_to_vec.shape[1]) return vecs[0] if to_reduce else vecs
def lookup(self, labels: nd.NDArray, repeat: bool = True): """Return the distribution for the data batch.""" shape = self._mean_arg_emb.shape mean_arg_emb = nd.Embedding(labels, self._mean_arg_emb.data(), *shape) self.mean = nd.maximum(5e-3, self.link_function(mean_arg_emb)) if hasattr(self._mean_arg_emb, 'n_repeats') and repeat: self.mean_repeated = self.link_function( util.repeat_emb(self._mean_arg_emb, mean_arg_emb)) return self
def forward(self, x, sampled_values, label): """Forward computation.""" sampled_candidates, _, _ = sampled_values # (batch_size,) label = label.reshape(shape=(-1, )) # (num_sampled+batch_size,) ids = nd.concat(sampled_candidates, label, dim=0) # lookup weights and biases weight = self.weight.row_sparse_data(ids) bias = self.bias.data(ids.context) # (num_sampled+batch_size, dim) w_all = nd.Embedding(data=ids, weight=weight, **self._kwargs) # (num_sampled+batch_size,) b_all = nd.take(bias, indices=ids) out, new_targets = self._logits(x, sampled_values, label, w_all, b_all) return out, new_targets
def __getitem__(self, tokens): """Looks up embedding vectors of text tokens. Parameters ---------- tokens : str or list of strs A token or a list of tokens. Returns ------- mxnet.ndarray.NDArray: The embedding vector(s) of the token(s). According to numpy conventions, if `tokens` is a string, returns a 1-D NDArray (vector); if `tokens` is a list of strings, returns a 2-D NDArray (matrix) of shape=(len(tokens), vec_len). """ to_reduce = not isinstance(tokens, (list, tuple)) if to_reduce: tokens = [tokens] if self.unknown_lookup is not None: if self.idx_to_vec is None: # May raise KeyError, but we cannot fallback to idx_to_vec's # unknown vector, as idx_to_vec has not been initialized yet. # Cannot initialize it, as we don't know the dimension. vecs = self.unknown_lookup[tokens] else: vecs = [ self.idx_to_vec[self.token_to_idx[token]] if (token in self.token_to_idx or token not in self.unknown_lookup) else self.unknown_lookup[token] for token in tokens ] vecs = nd.stack(*vecs, axis=0) else: indices = [self._token_to_idx[token] for token in tokens] vecs = nd.Embedding(nd.array(indices), self.idx_to_vec, self.idx_to_vec.shape[0], self.idx_to_vec.shape[1]) return vecs[0] if to_reduce else vecs
def __getitem__(self, tokens): """Looks up embedding vectors of text tokens. Parameters ---------- tokens : str or list of strs A token or a list of tokens. Returns ------- mxnet.ndarray.NDArray: The embedding vector(s) of the token(s). According to numpy conventions, if `tokens` is a string, returns a 1-D NDArray (vector); if `tokens` is a list of strings, returns a 2-D NDArray (matrix) of shape=(len(tokens), vec_len). """ to_reduce = not isinstance(tokens, (list, tuple)) if to_reduce: tokens = [tokens] if self.unknown_lookup is not None and (not self.allow_extend or not self.unknown_autoextend): vecs = [ self.idx_to_vec[self.token_to_idx[token]] if token in self.token_to_idx else self.unknown_lookup[token] for token in tokens ] vecs = nd.stack(*vecs, axis=0) else: if self.unknown_lookup is not None and self.allow_extend and self.unknown_autoextend: new_tokens = [t for t in tokens if t not in self.token_to_idx] self[new_tokens] = self.unknown_lookup[new_tokens] indices = [self._token_to_idx[token] for token in tokens] vecs = nd.Embedding( nd.array(indices), self.idx_to_vec, self.idx_to_vec.shape[0], self.idx_to_vec.shape[1]) return vecs[0] if to_reduce else vecs
def log_prob_sum(self, nonzero_index: nd.NDArray) -> nd.NDArray: """Returns log prob. Argument is batch of indices of nonzero classes. log p(x) = term_1 + term_2 term_1 = sum_c log p(x_c = 0) term_2 = sum_{c: x_c = 1} log p(x_c = 1) - log p(x_c = 0) term_1 takes O(CK) to calculate. term_2 takes O(CK) + O(sK) with s being the number of nonzero entries in x """ mean_arg = -(nd.dot(self._positive_latent, nd.exp(self._weight)) + nd.exp(self._bias)) assert mean_arg.shape[1] == 1, "Fast Bernoulli only supports batch size 1!" mean_arg = mean_arg[:, 0, :] term_1 = nd.sum(mean_arg, -1) n_factors, n_classes = self._weight.shape # weight_nonzero = nd.Embedding( # nonzero_index, self._weight.T, n_classes, n_factors).T # nonzero_arg = -nd.dot(self._positive_latent, nd.exp(weight_nonzero)) # raise NotImplementedError('need to add bias lookup!') batch_size = mean_arg.shape[0] nonzero_arg = nd.Embedding( nonzero_index, mean_arg.T, n_classes, batch_size).T term_2 = nd.sum(nd.log(1. - nd.exp(nonzero_arg)) - nonzero_arg, -1) res = term_1 + term_2 return nd.expand_dims(res, 1)
def forward(self, words, subwords, wordsmask=None, subwordsmask=None, words_to_unique_subwords_indices=None): """Compute embedding of words in batch. Parameters ---------- words : mx.nd.NDArray Array of token indices. subwords : mx.nd.NDArray The subwords associated with the tokens in `words`. If words_to_unique_subwords_indices is specified may contain the subwords of the unique tokens in `words` with `words_to_unique_subwords_indices` containing the reverse mapping. wordsmask : mx.nd.NDArray, optional Mask for embeddings returned by the word level embedding operator. subwordsmask : mx.nd.NDArray, optional A mask for the subword embeddings looked up from `subwords`. Applied before sum reducing the subword embeddings. words_to_unique_subwords_indices : mx.nd.NDArray, optional Mapping from the position in the `words` array to the position in the words_to_unique_subwords_indices` array. """ #pylint: disable=arguments-differ embeddings = self.embedding(words) if wordsmask is not None: wordsmask = nd.expand_dims(wordsmask, axis=-1) embeddings = nd.broadcast_mul(embeddings, wordsmask) else: wordsmask = 1 if words_to_unique_subwords_indices is None: assert words.shape[0] == subwords.shape[0] if subwordsmask is None: subwordsmask = nd.ones_like(subwords) num_embeddings = \ nd.sum(subwordsmask, axis=-1, keepdims=True) + wordsmask subword_embeddings = self.subword_embedding(subwords, subwordsmask) return nd.broadcast_div(embeddings + subword_embeddings, num_embeddings) else: if subwordsmask is None: subwordsmask = nd.ones_like(subwords) subword_embedding_weights = self.subword_embedding( subwords, subwordsmask) words_to_unique_subwords_indices = \ words_to_unique_subwords_indices.reshape(words.shape) subword_embeddings = nd.Embedding( data=words_to_unique_subwords_indices, weight=subword_embedding_weights, input_dim=subword_embedding_weights.shape[0], output_dim=self.embedding_size) num_embeddings = nd.Embedding( data=words_to_unique_subwords_indices, weight=nd.sum(subwordsmask, axis=-1, keepdims=True), input_dim=subword_embedding_weights.shape[0], output_dim=1).reshape(words.shape).expand_dims(-1) + wordsmask return nd.broadcast_div(embeddings + subword_embeddings, num_embeddings)
def forward(self, X): pos_seq = nd.arange(X.shape[1]).expand_dims(0) emb = nd.Embedding(pos_seq, self.position_weight, self._max_len, self._units) return self.dropout(X + emb)