예제 #1
0
    def random_generate(self, num_images, path):

        # Generate from the uniform prior of the base model
        indices = F.randint(low=0,
                            high=self.num_embedding,
                            shape=[num_images] + self.latent_shape)
        indices = F.reshape(indices, (-1, ), inplace=True)
        quantized = F.embed(indices, self.base_model.vq.embedding_weight)
        quantized = F.transpose(
            quantized.reshape([num_images] + self.latent_shape +
                              [quantized.shape[-1]]), (0, 3, 1, 2))

        img_gen_uniform_prior = self.base_model(quantized,
                                                quantized_as_input=True,
                                                test=True)

        # Generate images using pixelcnn prior
        indices = nn.Variable.from_numpy_array(
            np.zeros(shape=[num_images] + self.latent_shape))
        labels = F.randint(low=0, high=self.num_classes, shape=(num_images, 1))
        labels = F.one_hot(labels, shape=(self.num_classes, ))

        # Sample from pixelcnn - pixel by pixel
        import torch  # Numpy behavior is different and not giving correct output
        for i in range(self.latent_shape[0]):
            for j in range(self.latent_shape[1]):
                quantized = F.embed(indices.reshape((-1, )),
                                    self.base_model.vq.embedding_weight)
                quantized = F.transpose(
                    quantized.reshape([num_images] + self.latent_shape +
                                      [quantized.shape[-1]]), (0, 3, 1, 2))
                indices_sample = self.prior(quantized, labels)
                indices_prob = F.reshape(indices_sample,
                                         indices.shape +
                                         (indices_sample.shape[-1], ),
                                         inplace=True)[:, i, j]
                indices_prob = F.softmax(indices_prob)

                indices_prob_tensor = torch.from_numpy(indices_prob.d)
                sample = indices_prob_tensor.multinomial(1).squeeze().numpy()
                indices[:, i, j] = sample

        print(indices.d)
        quantized = F.embed(indices.reshape((-1, )),
                            self.base_model.vq.embedding_weight)
        quantized = F.transpose(
            quantized.reshape([num_images] + self.latent_shape +
                              [quantized.shape[-1]]), (0, 3, 1, 2))

        img_gen_pixelcnn_prior = self.base_model(quantized,
                                                 quantized_as_input=True,
                                                 test=True)

        self.save_image(img_gen_uniform_prior,
                        os.path.join(path, 'generate_uniform.png'))
        self.save_image(img_gen_pixelcnn_prior,
                        os.path.join(path, 'generate_pixelcnn.png'))

        print('Random labels generated for pixelcnn prior:',
              list(F.max(labels, axis=1, only_index=True).d))
예제 #2
0
def embed(inp,
          n_inputs,
          n_features,
          initializer=None,
          fix_parameters=False,
          apply_w=None):
    """ Embed.

    Embed slices a matrix/tensor with indexing array/tensor. Weights are
    initialized with :obj:`nnabla.initializer.UniformInitializer` within
    the range of :math:`-\\sqrt{3}` and :math:`\\sqrt{3}`.

    Args:
        x(~nnabla.Variable): [Integer] Indices with shape
            :math:`(I_0, ..., I_N)`
        n_inputs : number of possible inputs, words or vocabraries
        n_features : number of embedding features
        fix_parameters (bool): When set to `True`, the embedding weight matrix
            will not be updated.
        apply_w (function): Lambda, function, or callable object applied to
            the weights.

    Returns:
        ~nnabla.Variable: Output with shape
            :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    if initializer is None:
        initializer = UniformInitializer((-np.sqrt(3.), np.sqrt(3)))
    w = get_parameter_or_create("W", [n_inputs, n_features], initializer, True,
                                not fix_parameters)
    if apply_w is not None:
        w = apply_w(w)
    return F.embed(inp, w)
예제 #3
0
파일: embed.py 프로젝트: zeta1999/nnabla
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Inputs
        x0 = inputs[0].data
        w0 = inputs[1].data
        dy = inputs[2].data
        # Outputs
        dx0 = outputs[0].data
        dw0 = outputs[1].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_w0 = inputs[1].grad
        g_dy = inputs[2].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad
        g_dw0 = outputs[1].grad

        # Computation
        if prop_down[2]:
            g_dy_ = F.embed(x0, g_dw0)
            if accum[2]:
                g_dy += g_dy_
            else:
                g_dy.copy_from(g_dy_)
예제 #4
0
def embed(inp, n_inputs, n_features, initializer=None,
          itr=1, fix_parameters=False, sn=True, test=False):
    """
    """
    w = get_parameter_or_create("W", [n_inputs, n_features],
                                initializer, not fix_parameters)
    w_sn = spectral_normalization_for_affine(
        w, itr=itr, test=test) if sn else w
    return F.embed(inp, w_sn)
예제 #5
0
def encode_text(text):
    param_dict = nn.get_parameters()

    embed_dim = param_dict['text_projection'].shape[1]
    context_length = param_dict['positional_embedding'].shape[0]
    vocab_size = param_dict['token_embedding/W'].shape[0]
    transformer_width = param_dict['ln_final/W'].shape[0]
    transformer_heads = transformer_width // 64
    transformer_layers = len(
        set(
            k.split('/')[2] for k in param_dict.keys()
            if k.startswith(f'transformer/resblocks')))

    token_embedding = nn.parameter.get_parameter_or_create(
        name='token_embedding/W', shape=(vocab_size, transformer_width))
    x = F.embed(text, token_embedding)  # [batch_size, n_ctx, d_model]

    positional_embedding = nn.parameter.get_parameter_or_create(
        name='positional_embedding',
        shape=(context_length, transformer_width)).reshape(
            (1, context_length, transformer_width))
    x = x + positional_embedding

    x = F.transpose(x, (1, 0, 2))  # NLD -> LND

    x = transformer(x,
                    transformer_width,
                    transformer_layers,
                    transformer_heads,
                    attn_mask=build_attn_mask(context_length))

    x = F.transpose(x, (1, 0, 2))  # LND -> NLD

    ln_final_W = nn.parameter.get_parameter_or_create(
        name='ln_final/W', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    ln_final_b = nn.parameter.get_parameter_or_create(
        name='ln_final/b', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    x = F.layer_normalization(x, ln_final_b, ln_final_W, batch_axis=(0, 1))

    idx = F.max(text, axis=-1, only_index=True)
    idx.forward()
    x = x[list(range(x.shape[0])), idx.d].reshape((1, x.shape[0], -1))
    text_projection = nn.parameter.get_parameter_or_create(
        name='text_projection', shape=(transformer_width, embed_dim)).reshape(
            (1, transformer_width, embed_dim))
    x = F.batch_matmul(x, text_projection)

    x = x.reshape((-1, embed_dim))

    return x
예제 #6
0
def embed_filter_grad_backward(inputs):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdw = inputs[0]
    dy = inputs[1]
    x0 = inputs[2]
    gdy = F.embed(x0, gdw)
    return gdy, None
예제 #7
0
def embed(inp, n_inputs, n_features):
    """ Embed.

    Embed slices a matrix/tensor with indexing array/tensor

    Args:
        x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)`
        n_inputs : number of possible inputs, words or vocabraries
        n_features : number of embedding features
    Returns:
        ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    w = get_parameter_or_create("W", [n_inputs, n_features],
                                UniformInitializer((-np.sqrt(3.), np.sqrt(3))), True)
    return F.embed(inp, w)
예제 #8
0
def embed(inp, n_inputs, n_features):
    """ Embed.

    Embed slices a matrix/tensor with indexing array/tensor

    Args:
        x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)`
        n_inputs : number of possible inputs, words or vocabraries
        n_features : number of embedding features
    Returns:
        ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    w = get_parameter_or_create("W", [n_inputs, n_features],
                                UniformInitializer((-np.sqrt(3.), np.sqrt(3))), True)
    return F.embed(inp, w)
예제 #9
0
    def __call__(self, x, return_encoding_indices=False):

        x = F.transpose(x, (0, 2, 3, 1))
        x_flat = x.reshape((-1, self.embedding_dim))

        x_flat_squared = F.broadcast(F.sum(x_flat**2, axis=1, keepdims=True),
                                     (x_flat.shape[0], self.num_embedding))
        emb_wt_squared = F.transpose(
            F.sum(self.embedding_weight**2, axis=1, keepdims=True), (1, 0))

        distances = x_flat_squared + emb_wt_squared - 2 * \
            F.affine(x_flat, F.transpose(self.embedding_weight, (1, 0)))

        encoding_indices = F.min(distances,
                                 only_index=True,
                                 axis=1,
                                 keepdims=True)
        encoding_indices.need_grad = False

        quantized = F.embed(
            encoding_indices.reshape(encoding_indices.shape[:-1]),
            self.embedding_weight).reshape(x.shape)

        if return_encoding_indices:
            return encoding_indices, F.transpose(quantized, (0, 3, 1, 2))

        encodings = F.one_hot(encoding_indices, (self.num_embedding, ))

        e_latent_loss = F.mean(
            F.squared_error(quantized.get_unlinked_variable(need_grad=False),
                            x))
        q_latent_loss = F.mean(
            F.squared_error(quantized,
                            x.get_unlinked_variable(need_grad=False)))
        loss = q_latent_loss + self.commitment_cost * e_latent_loss

        quantized = x + (quantized - x).get_unlinked_variable(need_grad=False)

        avg_probs = F.mean(encodings, axis=0)
        perplexity = F.exp(-F.sum(avg_probs * F.log(avg_probs + 1.0e-10)))

        return loss, F.transpose(quantized,
                                 (0, 3, 1, 2)), perplexity, encodings
예제 #10
0
 def __call__(self, inp):
     return F.embed(inp, self.W)
예제 #11
0
def LSTMAttentionDecoder(inputs=None,
                         encoder_output=None,
                         initial_state=None,
                         return_sequences=False,
                         return_state=False,
                         inference_params=None,
                         name='lstm'):

    if inputs is None:
        assert inference_params is not None, 'if inputs is None, inference_params must not be None.'
    else:
        sentence_length = inputs.shape[1]

    assert type(initial_state) is tuple or type(initial_state) is list, \
           'initial_state must be a typle or a list.'
    assert len(initial_state) == 2, \
           'initial_state must have only two states.'

    c0, h0 = initial_state

    assert c0.shape == h0.shape, 'shapes of initial_state must be same.'
    batch_size, units = c0.shape

    cell = c0
    hidden = h0

    hs = []

    if inference_params is None:
        xs = F.split(F.slice(inputs,
                             stop=(batch_size, sentence_length - 1, units)),
                     axis=1)
        pad = nn.Variable.from_numpy_array(
            np.array([w2i_source['pad']] * batch_size))
        xs = [
            PF.embed(
                pad, vocab_size_source, embedding_size, name='enc_embeddings')
        ] + list(xs)

        compute_context = GlobalAttention(encoder_output, 1024)

        for x in xs:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
                context = compute_context(hidden)
                h_t = F.tanh(
                    PF.affine(F.concatenate(context, hidden, axis=1),
                              1024,
                              with_bias=False,
                              name='Wc'))
            hs.append(h_t)
    else:
        assert batch_size == 1, 'batch size of inference mode must be 1.'
        embed_weight, output_weight, output_bias = inference_params
        pad = nn.Variable.from_numpy_array(
            np.array([w2i_source['pad']] * batch_size))
        x = PF.embed(pad,
                     vocab_size_source,
                     embedding_size,
                     name='enc_embeddings')

        compute_context = GlobalAttention(encoder_output, 1024)

        word_index = 0
        ret = []
        i = 0
        while i2w_target[word_index] != '。' and i < 20:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
                context = compute_context(hidden)
                h_t = F.tanh(
                    PF.affine(F.concatenate(context, hidden, axis=1),
                              1024,
                              with_bias=False,
                              name='Wc'))
            output = F.affine(h_t, output_weight, bias=output_bias)
            word_index = np.argmax(output.d[0])
            ret.append(word_index)
            x = nn.Variable.from_numpy_array(
                np.array([word_index], dtype=np.int32))
            x = F.embed(x, embed_weight)

            i += 1
        return ret

    if return_sequences:
        ret = F.stack(*hs, axis=1)
    else:
        ret = hs[-1]

    if return_state:
        return ret, cell, hidden
    else:
        return ret
예제 #12
0
def LSTMDecoder(inputs=None,
                initial_state=None,
                return_sequences=False,
                return_state=False,
                inference_params=None,
                name='lstm'):

    if inputs is None:
        assert inference_params is not None, 'if inputs is None, inference_params must not be None.'
    else:
        sentence_length = inputs.shape[1]

    assert type(initial_state) is tuple or type(initial_state) is list, \
           'initial_state must be a typle or a list.'
    assert len(initial_state) == 2, \
           'initial_state must have only two states.'

    c0, h0 = initial_state

    assert c0.shape == h0.shape, 'shapes of initial_state must be same.'
    batch_size, units = c0.shape

    cell = c0
    hidden = h0

    hs = []

    if inference_params is None:
        xs = F.split(F.slice(inputs,
                             stop=(batch_size, sentence_length - 1, units)),
                     axis=1)
        xs = [nn.Variable.from_numpy_array(np.ones(xs[0].shape))] + list(xs)
        for x in xs:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
            hs.append(hidden)
    else:
        assert batch_size == 1, 'batch size of inference mode must be 1.'
        embed_weight, output_weight, output_bias = inference_params
        x = nn.Variable.from_numpy_array(np.ones((1, embed_weight.shape[1])))

        word_index = 0
        ret = []
        i = 0
        while i2w_target[word_index] != period and i < 20:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
            output = F.affine(hidden, output_weight, bias=output_bias)
            word_index = np.argmax(output.d[0])
            ret.append(word_index)
            x = nn.Variable.from_numpy_array(
                np.array([word_index], dtype=np.int32))
            x = F.embed(x, embed_weight)

            i += 1
        return ret

    if return_sequences:
        ret = F.stack(*hs, axis=1)
    else:
        ret = hs[-1]

    if return_state:
        return ret, cell, hidden
    else:
        return ret