Python arangeの例、nnabla.functions.arange Pythonの例

コード例 #1

0

ファイルを表示

def sin_cos_positional_embedding(x,
                                 num_encoding_functions,
                                 include_input=True,
                                 log_sampling=True):
    """Given coordinate positions of sampling points as a (N,3) array, this functions returns embeds each point with the sine and cosine function

    Args:
        x (nn.Variable or nn.NdArray): Shape is (N, 3). 
        num_encoding_functions (int): number of frequencies to encode for each grid position
        include_input (bool, optional): Whether include the original grid position along with the encoding of the position. Defaults to True.
        log_sampling (bool, optional): Sample logarithmically and not linearly. Defaults to True.

    Returns:
        [nn.Variable or nn.NdArray]: (N, num_encoding_functions*3*2+3) if include_input is True else (N, num_encoding_functions*3*2)
    """

    encoding = [x] if include_input else []

    if log_sampling:
        frequency_increments = F.arange(0, num_encoding_functions)
        frequency_bands = F.pow2(
            F.constant(2, shape=frequency_increments.shape),
            frequency_increments)
    else:
        frequency_bands = F.arange(2**0,
                                   2**(num_encoding_functions - 1) + 1e-5,
                                   (2**(num_encoding_functions - 1) - 1) /
                                   (num_encoding_functions - 1.0))

    for freq in frequency_bands:
        for func in [F.sin, F.cos]:
            encoding.append(func(x * F.reshape(freq, (1, 1))))
    return F.concatenate(*encoding, axis=x.ndim - 1)

コード例 #2

0

ファイルを表示

def get_direction_grid(height, width, focal_length, return_ij_2d_grid=False):
    """Forms a mesh grid for a given height and width and assumes the camera position to be fixed at the center of the the grid 
    (with a sufficiently large enough offset in z direction). Based on the prefixed camera position, 
    computes ray direction for every point in the grid.

    Args:
        height (int): Height of the image/grid
        width (int): Width of the image/grid
        focal_length (float): Camera focal length (calibrated intrinsics)

    Returns:
        directions (nn.Variable or nn.NdArray): Shape is (height, width, 3) - direction of projected ray for every grid point.
    """
    x = F.arange(0, width)
    y = F.arange(0, height)

    xx, yy = F.meshgrid(x, y)

    if return_ij_2d_grid:
        return F.stack(*list(F.meshgrid(x, y, ij_indexing=True)), axis=2)

    directions = F.stack((xx - width * 0.5) / focal_length,
                         -(yy - height * 0.5) / focal_length,
                         F.constant(-1, xx.shape),
                         axis=2)
    return directions

コード例 #3

0

ファイルを表示

def yolov2_image_coordinate(t_xy, t_wh, biases):
    import numpy as np
    from nnabla.parameter import pop_parameter, set_parameter
    h, w = t_xy.shape[-2:]
    xs = pop_parameter('xs')
    ys = pop_parameter('ys')
    if xs is None or (h != xs.shape[-1]):
        xs = F.arange(0, w).reshape((1, 1, 1, w))
        xs.need_grad = False
        set_parameter('xs', xs)
    if ys is None or (h != ys.shape[-2]):
        ys = F.arange(0, h).reshape((1, 1, h, 1))
        ys.need_grad = False
        set_parameter('ys', ys)
    t_x, t_y = F.split(t_xy, axis=2)
    oshape = list(t_x.shape)
    oshape.insert(2, 1)
    t_x = F.reshape((t_x + xs) / w, oshape)
    t_y = F.reshape((t_y + ys) / h, oshape)
    pop_parameter('biases')
    biases = biases.reshape(1, biases.shape[0], biases.shape[1], 1,
                            1) / np.array([w, h]).reshape(1, 1, 2, 1, 1)
    b = nn.Variable.from_numpy_array(biases)
    b.need_grad = False
    set_parameter('biases', b)
    t_wh = t_wh * b
    return t_x, t_y, t_wh

コード例 #4

0

ファイルを表示

    def ray_march(self, camloc, raydir, t0, t1, N, n_chunks, t_argmin=False):
        # Points computation
        BR, _ = t0.shape
        t0 = F.reshape(t0, (BR, 1, 1))
        t1 = F.reshape(t1, (BR, 1, 1))
        camloc = F.reshape(camloc, (BR, 1, 3))
        raydir = F.reshape(raydir, (BR, 1, 3))
        step = (t1 - t0) / (N - 1)
        intervals = F.reshape(F.arange(0, N), (1, N, 1))
        ts = t0 + step * intervals
        points = camloc + ts * raydir
        points = F.reshape(points, (BR * N, 3))

        # SDF computation
        sdf_points = []
        batch = (BR * N) // n_chunks
        for r in range(0, BR * N, batch):
            sdf_points.append(self.sdf(points[r:r + batch, :]))
        sdf_points = F.reshape(F.concatenate(*sdf_points, axis=0), (BR, N, 1)) if n_chunks != 1 else \
            F.reshape(sdf_points[0], (BR, N, 1))

        # t_argmin computation
        if t_argmin:
            idx_min = F.min(sdf_points, axis=1, keepdims=True, only_index=True)
            t_argmin = F.reshape(F.gather(ts, idx_min, axis=1, batch_dims=1),
                                 (BR, 1))
            return t_argmin

        # Intersection check
        points = F.reshape(points, (BR, N, 3))
        sdf_pos = F.greater_equal_scalar(sdf_points[:, :-1, :], 0)
        sdf_neg = F.less_equal_scalar(sdf_points[:, 1:, :], 0)
        mask_hit = sdf_pos * sdf_neg

        decreasing_consts = F.reshape(F.arange(N, 1, -1), (1, N - 1, 1))
        vals = mask_hit * decreasing_consts
        idx_max = F.max(vals, axis=1, only_index=True)

        points = points[:, :-1, :]
        x_hit = F.gather(points, idx_max, axis=1, batch_dims=1)
        x_hit = F.reshape(x_hit, (BR, 3))
        mask_hit = F.greater_scalar(F.sum(mask_hit, axis=1), 0)
        mask_hit = F.reshape(mask_hit, (BR, 1))

        x_hit_rm0 = x_hit
        step = F.reshape(step, (BR, 1))
        raydir = F.reshape(raydir, (BR, 3))
        x_hit_rm1 = x_hit_rm0 + step * raydir

        return x_hit_rm0, x_hit_rm1, mask_hit

コード例 #5

0

ファイルを表示

def bert_embed(input_ids, token_type_ids=None, position_ids=None, vocab_size=30522, embed_dim=768,
               num_pos_ids=512, dropout_prob=0.1, test=True):
    """Construct the embeddings from word, position and token type."""

    batch_size = input_ids.shape[0]
    seq_len = input_ids.shape[1]
    if position_ids is None:
        position_ids = F.arange(0, seq_len)
        position_ids = F.broadcast(F.reshape(
            position_ids, (1,)+position_ids.shape), (batch_size,) + position_ids.shape)
    if token_type_ids is None:
        token_type_ids = F.constant(val=0, shape=(batch_size, seq_len))

    embeddings = PF.embed(input_ids, vocab_size,
                          embed_dim, name='word_embeddings')
    position_embeddings = PF.embed(
        position_ids, num_pos_ids, embed_dim, name='position_embeddings')
    token_type_embeddings = PF.embed(
        token_type_ids, 2, embed_dim, name='token_type_embeddings')

    embeddings += position_embeddings
    embeddings += token_type_embeddings
    embeddings = PF.layer_normalization(
        embeddings, batch_axis=(0, 1), eps=1e-12, name='embed')

    if dropout_prob > 0.0 and not test:
        embeddings = F.dropout(embeddings, dropout_prob)

    return embeddings

コード例 #6

0

ファイルを表示

def compute_sample_points_for_variable_depth(ray_origins,
                                             ray_directions,
                                             near_plane,
                                             far_plane,
                                             num_samples,
                                             randomize=False):

    depth_steps = F.arange(0, 1 + 1 / num_samples, 1 / (num_samples - 1))
    depth_steps = F.broadcast(depth_steps[None, :],
                              (far_plane.shape[0], depth_steps.shape[0]))
    depth_values = near_plane[:, None] * \
        (1-depth_steps) + far_plane[:, None] * depth_steps

    if randomize:
        depth_vals_mid = 0.5 * (depth_values[:, :-1] + depth_values[:, 1:])
        # get intervals between samples
        upper = F.concatenate(depth_vals_mid, depth_values[:, -1:], axis=-1)
        lower = F.concatenate(depth_values[:, :1], depth_vals_mid, axis=-1)

        noise = F.rand(shape=depth_values.shape)
        depth_values = lower + (upper - lower) * noise

    sample_points = ray_origins[..., None, :] + \
        ray_directions[..., None, :]*depth_values[..., :, None]

    return sample_points, depth_values

コード例 #7

0

ファイルを表示

ファイル: modules.py プロジェクト: saccadic/nnabla-examples

def make_coordinate_grid(spatial_size):
    assert isinstance(spatial_size, tuple)

    h, w = spatial_size
    x = F.arange(0, w)
    y = F.arange(0, h)

    x = (2 * (x / (w - 1)) - 1)
    y = (2 * (y / (h - 1)) - 1)

    yy = F.tile(F.reshape(y, (-1, 1)), (1, w))
    xx = F.tile(F.reshape(x, (1, -1)), (h, 1))

    meshed = F.concatenate(F.reshape(xx, xx.shape + (1, )),
                           F.reshape(yy, xx.shape + (1, )),
                           axis=2)

    return meshed

コード例 #8

0

ファイルを表示

def q_function(obs, num_actions, min_v, max_v, num_bins, scope):
    with nn.parameter_scope(scope):
        out = nature_head(obs)
        out = PF.affine(out, num_actions * num_bins, name='output')
        out = F.reshape(out, (-1, num_actions, num_bins))
    probs = F.exp(out) / F.sum(F.exp(out), axis=2, keepdims=True)
    dists = F.arange(0, num_bins) * (max_v - min_v) / (num_bins - 1) + min_v
    values = F.sum(probs * F.reshape(dists, (1, 1, num_bins)), axis=2)
    return values, probs, F.reshape(dists, (-1, 1))

コード例 #9

0

ファイルを表示

def sample_pdf(bins, weights, N_samples, det=False):
    """Sample additional points for training fine network

    Args:
      bins: int. Height in pixels.
      weights: int. Width in pixels.
      N_samples: float. Focal length of pinhole camera.
      det

    Returns:
      samples: array of shape [batch_size, 3]. Depth samples for fine network
    """
    weights += 1e-5
    pdf = weights / F.sum(weights, axis=-1, keepdims=True)

    cdf = F.cumsum(pdf, axis=-1)
    # if isinstance(pdf, nn.Variable):
    #     cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.d, axis=-1))
    # else:
    #     cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.data, axis=-1)).data
    cdf = F.concatenate(F.constant(0, cdf[..., :1].shape), cdf, axis=-1)

    if det:
        u = F.arange(0., 1., 1 / N_samples)
        u = F.broadcast(u[None, :], cdf.shape[:-1] + (N_samples, ))
        u = u.data if isinstance(cdf, nn.NdArray) else u
    else:
        u = F.rand(shape=cdf.shape[:-1] + (N_samples, ))

    indices = F.searchsorted(cdf, u, right=True)
    # if isinstance(cdf, nn.Variable):
    #     indices = nn.Variable.from_numpy_array(
    #         tf.searchsorted(cdf.d, u.d, side='right').numpy())
    # else:
    #     indices = nn.Variable.from_numpy_array(
    #         tf.searchsorted(cdf.data, u.data, side='right').numpy())
    below = F.maximum_scalar(indices - 1, 0)
    above = F.minimum_scalar(indices, cdf.shape[-1] - 1)
    indices_g = F.stack(below, above, axis=below.ndim)
    cdf_g = F.gather(cdf,
                     indices_g,
                     axis=-1,
                     batch_dims=len(indices_g.shape) - 2)
    bins_g = F.gather(bins,
                      indices_g,
                      axis=-1,
                      batch_dims=len(indices_g.shape) - 2)

    denom = (cdf_g[..., 1] - cdf_g[..., 0])
    denom = F.where(F.less_scalar(denom, 1e-5), F.constant(1, denom.shape),
                    denom)
    t = (u - cdf_g[..., 0]) / denom
    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])

    return samples

コード例 #10

0

ファイルを表示

ファイル: functions.py プロジェクト: satopirka/nlp-nnabla

def position_encoding(x: nn.Variable) -> nn.Variable:
    batch_size, sequence_length, dim = x.shape

    position = F.reshape(F.arange(0, sequence_length),
                         shape=(sequence_length, 1))
    # -> (sequence_length, 1)
    div_term = F.exp(F.arange(0, dim, 2) * -(np.log(10000.0) / dim))
    # -> (dim//2, )
    sin_val = F.sin(position * F.reshape(div_term, shape=(1, dim // 2)))
    # -> (sequence_length, dim//2)
    cos_val = F.cos(position * F.reshape(div_term, shape=(1, dim // 2)))
    # -> (sequence_length, dim//2)
    ret = []
    for i in range(dim):
        if i % 2 == 0:
            ret.append(sin_val[:, i // 2:i // 2 + 1])
        else:
            ret.append(cos_val[:, i // 2:i // 2 + 1])
    pe = F.reshape(F.concatenate(*ret, axis=1),
                   shape=(1, sequence_length, dim))
    return x + F.broadcast(pe, shape=x.shape)

コード例 #11

0

ファイルを表示

ファイル: modules.py プロジェクト: saccadic/nnabla-examples

def anti_alias_interpolate(input, channels, scale):
    # no trainable parameters exist.
    if scale == 1.0:
        # no interpolation executed
        return F.identity(input)

    sigma = (1 / scale - 1) / 2
    kernel_size = 2 * round(sigma * 4) + 1
    ka = kernel_size // 2
    if kernel_size % 2 == 0:
        kb = ka - 1
    else:
        kb = ka

    kernel_size = [kernel_size, kernel_size]
    sigma = [sigma, sigma]
    kernel = 1

    xa = F.reshape(F.arange(0, kernel_size[0]), (-1, 1))
    ya = F.reshape(F.arange(0, kernel_size[1]), (1, -1))
    meshgrids = (F.tile(xa,
                        (1, kernel_size[1])), F.tile(ya, (kernel_size[0], 1)))

    for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
        mean = (size - 1) / 2
        kernel *= F.exp(-(mgrid - mean)**2 / (2 * std**2))

    kernel = kernel / F.sum(kernel, keepdims=True)
    # Reshape to depthwise convolutional weight
    kernel = F.reshape(kernel, (1, 1) + kernel.shape)
    kernel = F.broadcast(kernel, (channels, 1) + tuple(kernel_size))
    # if using the pre-computed kernel, no need to compute here.

    out = F.pad(input, (ka, kb, ka, kb))
    out = F.convolution(out, weight=kernel, group=channels)
    out = F.interpolate(out, scale=(scale, scale), mode="nearest")

    return out

コード例 #12

0

ファイルを表示

def compute_sample_points_from_rays(ray_origins,
                                    ray_directions,
                                    near_plane,
                                    far_plane,
                                    num_samples,
                                    randomize=False):
    """Given a bundle of rays, this function samples points along each ray which is later used in volumetric rendering integration

    Args:
        ray_origins (nn.Variable or nn.NdArray): Shape is (height, width, 3) - Center of each ray from camera to grid point
        ray_directions (nn.Variable or nn.NdArray): Shape is (height, width, 3) - Direction of each projected ray from camera to grid point
        near_plane (float): Position of the near clipping plane
        far_plane (float): Position of the far clipping plane
        num_samples (int): Number of points to sample along each ray
        randomize (bool, optional): Defaults to True.

    Returns:
        sample_points: Shape is (height, width, num_samples, 3) - Sampled points along each ray
        depth_values: Shape is (num_samples, 1) - Depth values between the near and far plane at which point along each ray is sampled
    """

    if isinstance(near_plane, nn.Variable) or isinstance(
            near_plane, nn.NdArray):
        return compute_sample_points_for_variable_depth(
            ray_origins, ray_directions, near_plane, far_plane, num_samples,
            randomize)

    depth_values = F.arange(near_plane,
                            far_plane + (far_plane - near_plane) / num_samples,
                            (far_plane - near_plane) / (num_samples - 1))
    depth_values = F.reshape(depth_values, (1, ) + depth_values.shape)
    if randomize:
        noise_shape = ray_origins.shape[:-1] + (num_samples, )
        if len(noise_shape) == 3:
            depth_values = depth_values[None, :, :] + F.rand(
                shape=noise_shape) * (far_plane - near_plane) / num_samples
        else:
            depth_values = depth_values + \
                F.rand(shape=noise_shape) * \
                (far_plane-near_plane) / num_samples

    sample_points = ray_origins[..., None, :] + \
        ray_directions[..., None, :]*depth_values[..., :, None]

    return sample_points, depth_values

コード例 #13

0

ファイルを表示

def sinusoidal_embedding(timesteps, embedding_dim):
    """
    Sinusoidal embeddings originally proposed in "Attention Is All You Need" (https://arxiv.org/abs/1706.03762).
    """
    assert len(timesteps.shape) == 1

    half_dim = embedding_dim // 2
    denominator = -np.log(10000) / half_dim
    emb = F.exp(denominator * F.arange(start=0, stop=half_dim))
    emb = F.reshape(timesteps, (-1, 1)) * F.reshape(emb, (1, -1))
    emb = F.concatenate(F.cos(emb), F.sin(emb), axis=1)

    if embedding_dim & 1:  # zero pad to be divisible by two
        emb = F.pad(emb, [[0, 0], [0, 1]])

    assert emb.shape == (timesteps.shape[0], embedding_dim)

    return emb

コード例 #14

0

ファイルを表示

    def call(self, x, y):
        hp = self.hp
        results = []
        with nn.parameter_scope('layer_0'):
            x = F.pad(x, (0, 0, 7, 7), 'reflect')
            x = wn_conv(x, hp.ndf, (15,))
            x = F.leaky_relu(x, 0.2, inplace=True)
            results.append(x)

        nf = hp.ndf
        stride = hp.downsamp_factor

        for i in range(1, hp.n_layers_D + 1):
            nf_prev = nf
            nf = min(nf * stride, 1024)
            with nn.parameter_scope(f'layer_{i}'):
                x = wn_conv(
                    x, nf, (stride * 10 + 1,),
                    stride=(stride,),
                    pad=(stride * 5,),
                    group=nf_prev // 4,
                )
                x = F.leaky_relu(x, 0.2, inplace=True)
                results.append(x)

        with nn.parameter_scope(f'layer_{hp.n_layers_D + 1}'):
            nf = min(nf * 2, 1024)
            x = wn_conv(x, nf, kernel=(5,), pad=(2,))
            x = F.leaky_relu(x, 0.2, inplace=True)
            results.append(x)

        with nn.parameter_scope(f'layer_{hp.n_layers_D + 2}'):
            x = wn_conv(x, hp.n_speakers, kernel=(3,), pad=(1,))
            if y is not None:
                idx = F.stack(
                    F.arange(0, hp.batch_size),
                    y.reshape((hp.batch_size,))
                )
                x = F.gather_nd(x, idx)
            results.append(x)

        return results

コード例 #15

0

ファイルを表示

 def forward(self, output, inds, gt, reg_mask, channel_last=False):
     # TODO refactor loss implementation for channel_last without transposing
     if channel_last:
         output = F.transpose(output, (0, 3, 1, 2))
     b = inds.shape[0]
     c = output.shape[1]
     max_objs = inds.shape[1]
     # divide by number of :
     num_objs = F.sum(reg_mask) * 2
     f_map_size = output.shape[2] * output.shape[3]
     output = F.reshape(output, (-1, f_map_size))
     inds = F.broadcast(inds.reshape((b, 1, max_objs)), (b, c, max_objs))
     inds = inds.reshape((-1, max_objs))
     y = output[F.broadcast(F.reshape(F.arange(0, b * c), (b * c, 1)),
                            (b * c, max_objs)), inds].reshape(
                                (b, c, max_objs))
     y = F.transpose(y, (0, 2, 1))
     loss = F.sum(reg_mask * F.absolute_error(y, gt))
     loss = loss / (num_objs + 1e-4)
     return loss

コード例 #16

0

ファイルを表示

    def __call__(self,
                 batch_size,
                 style_noises,
                 truncation_psi=1.0,
                 return_latent=False,
                 mixing_layer_index=None,
                 dlatent_avg_beta=0.995):

        with nn.parameter_scope(self.global_scope):
            # normalize noise inputs
            for i in range(len(style_noises)):
                style_noises[i] = F.div2(
                    style_noises[i],
                    F.pow_scalar(F.add_scalar(F.mean(style_noises[i]**2.,
                                                     axis=1,
                                                     keepdims=True),
                                              1e-8,
                                              inplace=False),
                                 0.5,
                                 inplace=False))

            # get latent code
            w = [
                mapping_network(style_noises[0],
                                outmaps=self.mapping_network_dim,
                                num_layers=self.mapping_network_num_layers)
            ]
            w += [
                mapping_network(style_noises[1],
                                outmaps=self.mapping_network_dim,
                                num_layers=self.mapping_network_num_layers)
            ]

            dlatent_avg = nn.parameter.get_parameter_or_create(
                name="dlatent_avg", shape=(1, 512))

            # Moving average update of dlatent_avg
            batch_avg = F.mean((w[0] + w[1]) * 0.5, axis=0, keepdims=True)
            update_op = F.assign(
                dlatent_avg, lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
            update_op.name = 'dlatent_avg_update'
            dlatent_avg = F.identity(dlatent_avg) + 0 * update_op

            # truncation trick
            w = [lerp(dlatent_avg, _, truncation_psi) for _ in w]

            # generate output from generator
            constant_bc = nn.parameter.get_parameter_or_create(
                name="G_synthesis/4x4/Const/const",
                shape=(1, 512, 4, 4),
                initializer=np.random.randn(1, 512, 4, 4).astype(np.float32))
            constant_bc = F.broadcast(constant_bc,
                                      (batch_size, ) + constant_bc.shape[1:])

            if mixing_layer_index is None:
                mixing_layer_index_var = F.randint(1,
                                                   len(self.resolutions) * 2,
                                                   (1, ))
            else:
                mixing_layer_index_var = F.constant(val=mixing_layer_index,
                                                    shape=(1, ))
            mixing_switch_var = F.clip_by_value(
                F.arange(0,
                         len(self.resolutions) * 2) - mixing_layer_index_var,
                0, 1)
            mixing_switch_var_re = F.reshape(
                mixing_switch_var, (1, mixing_switch_var.shape[0], 1),
                inplace=False)
            w0 = F.reshape(w[0], (batch_size, 1, w[0].shape[1]), inplace=False)
            w1 = F.reshape(w[1], (batch_size, 1, w[0].shape[1]), inplace=False)
            w_mixed = w0 * mixing_switch_var_re + \
                w1 * (1 - mixing_switch_var_re)

            rgb_output = self.synthesis(w_mixed, constant_bc)

            if return_latent:
                return rgb_output, w_mixed
            else:
                return rgb_output

コード例 #17

0

ファイルを表示

ファイル: rnn.py プロジェクト: Pandinosaurus/nnabla

def pack_padded_sequence(padded_sequence,
                         lengths,
                         batch_first=False,
                         enforce_sorted=True):
    r"""Pack a padded variable-length sequences.

    This method packs a padded variable-length sequences.

    :math:`T` is the max length over the lengths of sequences.
    :math:`B` is the batch size equal to the length of the sequences.     
    :math:`*` is the remaining dimensions including none.

    .. note::
      This function **must** be used the dynamic computation mode.


    Example:

    .. code-block:: python

      import numpy as np
      import nnabla as nn
      import nnabla.functions as F
      import nnabla.utils.rnn as rnn_utils

      nn.set_auto_forward(True)

      l2v = lambda ldata: nn.Variable.from_numpy_array(np.asarray(ldata))
      a = l2v([1, 1, 1, 1])
      b = l2v([2, 2, 2])
      c = l2v([2, 2, 2])
      d = l2v([3, 3])
      e = l2v([3, 3])
      sequences = [a, b, c, d, e]
      lengths = l2v([seq.shape[0] for seq in sequences])

      padded_sequence = rnn_utils.pad_sequence(sequences)
      print(padded_sequence.d)

      packed_sequence = rnn_utils.pack_padded_sequence(padded_sequence, lengths)
      print(packed_sequence.data.d)
      print(packed_sequence.batch_sizes.d)

    Args: 
      padded_sequence (:obj:`nnabla.Variable`): Padded sequence of (:math:`T \times B \times *`)
                                                or (:math:`B \times T \times *`) shape.
      lengths (:obj:`nnabla.Variable`): Sequence length for each batch and always resides in CPU.
      batch_first (bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False,
                          otherwise (:math:`B`, :math:`T`, :math:`*`).
      enforce_sorted (bool): Sequences are sorted by the length in a decreasing order if True. Default is True.

    Returns: 
        :obj:`PackedSequence`
    """
    if enforce_sorted:
        sorted_indices = None
        unsorted_indices = None
    else:
        # TODO: replace cuda context when the bug fix of the sort
        with nn.context_scope(nn.Context()):
            lengths, sorted_indices = F.sort(lengths,
                                             axis=0,
                                             reverse=True,
                                             with_index=True)

        B = sorted_indices.shape[0]
        unsorted_indices = F.scatter_nd(F.arange(0, B),
                                        sorted_indices.reshape((1, B)),
                                        shape=(B, ))
        axis = 0 if batch_first else 1
        padded_sequence = F.gather(padded_sequence, sorted_indices, axis)

    packed_sequence, batch_sizes = F.pack_padded_sequence(
        padded_sequence, lengths, batch_first)
    packed_sequence0 = PackedSequence()
    packed_sequence0.data = packed_sequence
    packed_sequence0.batch_sizes = batch_sizes
    packed_sequence0.sorted_indices = sorted_indices
    packed_sequence0.unsorted_indices = unsorted_indices

    return packed_sequence0