def random_generate(self, num_images, path): # Generate from the uniform prior of the base model indices = F.randint(low=0, high=self.num_embedding, shape=[num_images] + self.latent_shape) indices = F.reshape(indices, (-1, ), inplace=True) quantized = F.embed(indices, self.base_model.vq.embedding_weight) quantized = F.transpose( quantized.reshape([num_images] + self.latent_shape + [quantized.shape[-1]]), (0, 3, 1, 2)) img_gen_uniform_prior = self.base_model(quantized, quantized_as_input=True, test=True) # Generate images using pixelcnn prior indices = nn.Variable.from_numpy_array( np.zeros(shape=[num_images] + self.latent_shape)) labels = F.randint(low=0, high=self.num_classes, shape=(num_images, 1)) labels = F.one_hot(labels, shape=(self.num_classes, )) # Sample from pixelcnn - pixel by pixel import torch # Numpy behavior is different and not giving correct output for i in range(self.latent_shape[0]): for j in range(self.latent_shape[1]): quantized = F.embed(indices.reshape((-1, )), self.base_model.vq.embedding_weight) quantized = F.transpose( quantized.reshape([num_images] + self.latent_shape + [quantized.shape[-1]]), (0, 3, 1, 2)) indices_sample = self.prior(quantized, labels) indices_prob = F.reshape(indices_sample, indices.shape + (indices_sample.shape[-1], ), inplace=True)[:, i, j] indices_prob = F.softmax(indices_prob) indices_prob_tensor = torch.from_numpy(indices_prob.d) sample = indices_prob_tensor.multinomial(1).squeeze().numpy() indices[:, i, j] = sample print(indices.d) quantized = F.embed(indices.reshape((-1, )), self.base_model.vq.embedding_weight) quantized = F.transpose( quantized.reshape([num_images] + self.latent_shape + [quantized.shape[-1]]), (0, 3, 1, 2)) img_gen_pixelcnn_prior = self.base_model(quantized, quantized_as_input=True, test=True) self.save_image(img_gen_uniform_prior, os.path.join(path, 'generate_uniform.png')) self.save_image(img_gen_pixelcnn_prior, os.path.join(path, 'generate_pixelcnn.png')) print('Random labels generated for pixelcnn prior:', list(F.max(labels, axis=1, only_index=True).d))
def embed(inp, n_inputs, n_features, initializer=None, fix_parameters=False, apply_w=None): """ Embed. Embed slices a matrix/tensor with indexing array/tensor. Weights are initialized with :obj:`nnabla.initializer.UniformInitializer` within the range of :math:`-\\sqrt{3}` and :math:`\\sqrt{3}`. Args: x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)` n_inputs : number of possible inputs, words or vocabraries n_features : number of embedding features fix_parameters (bool): When set to `True`, the embedding weight matrix will not be updated. apply_w (function): Lambda, function, or callable object applied to the weights. Returns: ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)` """ if initializer is None: initializer = UniformInitializer((-np.sqrt(3.), np.sqrt(3))) w = get_parameter_or_create("W", [n_inputs, n_features], initializer, True, not fix_parameters) if apply_w is not None: w = apply_w(w) return F.embed(inp, w)
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Inputs x0 = inputs[0].data w0 = inputs[1].data dy = inputs[2].data # Outputs dx0 = outputs[0].data dw0 = outputs[1].data # Grads of inputs g_x0 = inputs[0].grad g_w0 = inputs[1].grad g_dy = inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dw0 = outputs[1].grad # Computation if prop_down[2]: g_dy_ = F.embed(x0, g_dw0) if accum[2]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def embed(inp, n_inputs, n_features, initializer=None, itr=1, fix_parameters=False, sn=True, test=False): """ """ w = get_parameter_or_create("W", [n_inputs, n_features], initializer, not fix_parameters) w_sn = spectral_normalization_for_affine( w, itr=itr, test=test) if sn else w return F.embed(inp, w_sn)
def encode_text(text): param_dict = nn.get_parameters() embed_dim = param_dict['text_projection'].shape[1] context_length = param_dict['positional_embedding'].shape[0] vocab_size = param_dict['token_embedding/W'].shape[0] transformer_width = param_dict['ln_final/W'].shape[0] transformer_heads = transformer_width // 64 transformer_layers = len( set( k.split('/')[2] for k in param_dict.keys() if k.startswith(f'transformer/resblocks'))) token_embedding = nn.parameter.get_parameter_or_create( name='token_embedding/W', shape=(vocab_size, transformer_width)) x = F.embed(text, token_embedding) # [batch_size, n_ctx, d_model] positional_embedding = nn.parameter.get_parameter_or_create( name='positional_embedding', shape=(context_length, transformer_width)).reshape( (1, context_length, transformer_width)) x = x + positional_embedding x = F.transpose(x, (1, 0, 2)) # NLD -> LND x = transformer(x, transformer_width, transformer_layers, transformer_heads, attn_mask=build_attn_mask(context_length)) x = F.transpose(x, (1, 0, 2)) # LND -> NLD ln_final_W = nn.parameter.get_parameter_or_create( name='ln_final/W', shape=(transformer_width, )).reshape( (1, 1, transformer_width)) ln_final_b = nn.parameter.get_parameter_or_create( name='ln_final/b', shape=(transformer_width, )).reshape( (1, 1, transformer_width)) x = F.layer_normalization(x, ln_final_b, ln_final_W, batch_axis=(0, 1)) idx = F.max(text, axis=-1, only_index=True) idx.forward() x = x[list(range(x.shape[0])), idx.d].reshape((1, x.shape[0], -1)) text_projection = nn.parameter.get_parameter_or_create( name='text_projection', shape=(transformer_width, embed_dim)).reshape( (1, transformer_width, embed_dim)) x = F.batch_matmul(x, text_projection) x = x.reshape((-1, embed_dim)) return x
def embed_filter_grad_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdw = inputs[0] dy = inputs[1] x0 = inputs[2] gdy = F.embed(x0, gdw) return gdy, None
def embed(inp, n_inputs, n_features): """ Embed. Embed slices a matrix/tensor with indexing array/tensor Args: x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)` n_inputs : number of possible inputs, words or vocabraries n_features : number of embedding features Returns: ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)` """ w = get_parameter_or_create("W", [n_inputs, n_features], UniformInitializer((-np.sqrt(3.), np.sqrt(3))), True) return F.embed(inp, w)
def __call__(self, x, return_encoding_indices=False): x = F.transpose(x, (0, 2, 3, 1)) x_flat = x.reshape((-1, self.embedding_dim)) x_flat_squared = F.broadcast(F.sum(x_flat**2, axis=1, keepdims=True), (x_flat.shape[0], self.num_embedding)) emb_wt_squared = F.transpose( F.sum(self.embedding_weight**2, axis=1, keepdims=True), (1, 0)) distances = x_flat_squared + emb_wt_squared - 2 * \ F.affine(x_flat, F.transpose(self.embedding_weight, (1, 0))) encoding_indices = F.min(distances, only_index=True, axis=1, keepdims=True) encoding_indices.need_grad = False quantized = F.embed( encoding_indices.reshape(encoding_indices.shape[:-1]), self.embedding_weight).reshape(x.shape) if return_encoding_indices: return encoding_indices, F.transpose(quantized, (0, 3, 1, 2)) encodings = F.one_hot(encoding_indices, (self.num_embedding, )) e_latent_loss = F.mean( F.squared_error(quantized.get_unlinked_variable(need_grad=False), x)) q_latent_loss = F.mean( F.squared_error(quantized, x.get_unlinked_variable(need_grad=False))) loss = q_latent_loss + self.commitment_cost * e_latent_loss quantized = x + (quantized - x).get_unlinked_variable(need_grad=False) avg_probs = F.mean(encodings, axis=0) perplexity = F.exp(-F.sum(avg_probs * F.log(avg_probs + 1.0e-10))) return loss, F.transpose(quantized, (0, 3, 1, 2)), perplexity, encodings
def __call__(self, inp): return F.embed(inp, self.W)
def LSTMAttentionDecoder(inputs=None, encoder_output=None, initial_state=None, return_sequences=False, return_state=False, inference_params=None, name='lstm'): if inputs is None: assert inference_params is not None, 'if inputs is None, inference_params must not be None.' else: sentence_length = inputs.shape[1] assert type(initial_state) is tuple or type(initial_state) is list, \ 'initial_state must be a typle or a list.' assert len(initial_state) == 2, \ 'initial_state must have only two states.' c0, h0 = initial_state assert c0.shape == h0.shape, 'shapes of initial_state must be same.' batch_size, units = c0.shape cell = c0 hidden = h0 hs = [] if inference_params is None: xs = F.split(F.slice(inputs, stop=(batch_size, sentence_length - 1, units)), axis=1) pad = nn.Variable.from_numpy_array( np.array([w2i_source['pad']] * batch_size)) xs = [ PF.embed( pad, vocab_size_source, embedding_size, name='enc_embeddings') ] + list(xs) compute_context = GlobalAttention(encoder_output, 1024) for x in xs: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) context = compute_context(hidden) h_t = F.tanh( PF.affine(F.concatenate(context, hidden, axis=1), 1024, with_bias=False, name='Wc')) hs.append(h_t) else: assert batch_size == 1, 'batch size of inference mode must be 1.' embed_weight, output_weight, output_bias = inference_params pad = nn.Variable.from_numpy_array( np.array([w2i_source['pad']] * batch_size)) x = PF.embed(pad, vocab_size_source, embedding_size, name='enc_embeddings') compute_context = GlobalAttention(encoder_output, 1024) word_index = 0 ret = [] i = 0 while i2w_target[word_index] != '。' and i < 20: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) context = compute_context(hidden) h_t = F.tanh( PF.affine(F.concatenate(context, hidden, axis=1), 1024, with_bias=False, name='Wc')) output = F.affine(h_t, output_weight, bias=output_bias) word_index = np.argmax(output.d[0]) ret.append(word_index) x = nn.Variable.from_numpy_array( np.array([word_index], dtype=np.int32)) x = F.embed(x, embed_weight) i += 1 return ret if return_sequences: ret = F.stack(*hs, axis=1) else: ret = hs[-1] if return_state: return ret, cell, hidden else: return ret
def LSTMDecoder(inputs=None, initial_state=None, return_sequences=False, return_state=False, inference_params=None, name='lstm'): if inputs is None: assert inference_params is not None, 'if inputs is None, inference_params must not be None.' else: sentence_length = inputs.shape[1] assert type(initial_state) is tuple or type(initial_state) is list, \ 'initial_state must be a typle or a list.' assert len(initial_state) == 2, \ 'initial_state must have only two states.' c0, h0 = initial_state assert c0.shape == h0.shape, 'shapes of initial_state must be same.' batch_size, units = c0.shape cell = c0 hidden = h0 hs = [] if inference_params is None: xs = F.split(F.slice(inputs, stop=(batch_size, sentence_length - 1, units)), axis=1) xs = [nn.Variable.from_numpy_array(np.ones(xs[0].shape))] + list(xs) for x in xs: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) hs.append(hidden) else: assert batch_size == 1, 'batch size of inference mode must be 1.' embed_weight, output_weight, output_bias = inference_params x = nn.Variable.from_numpy_array(np.ones((1, embed_weight.shape[1]))) word_index = 0 ret = [] i = 0 while i2w_target[word_index] != period and i < 20: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) output = F.affine(hidden, output_weight, bias=output_bias) word_index = np.argmax(output.d[0]) ret.append(word_index) x = nn.Variable.from_numpy_array( np.array([word_index], dtype=np.int32)) x = F.embed(x, embed_weight) i += 1 return ret if return_sequences: ret = F.stack(*hs, axis=1) else: ret = hs[-1] if return_state: return ret, cell, hidden else: return ret