def sin_cos_positional_embedding(x, num_encoding_functions, include_input=True, log_sampling=True): """Given coordinate positions of sampling points as a (N,3) array, this functions returns embeds each point with the sine and cosine function Args: x (nn.Variable or nn.NdArray): Shape is (N, 3). num_encoding_functions (int): number of frequencies to encode for each grid position include_input (bool, optional): Whether include the original grid position along with the encoding of the position. Defaults to True. log_sampling (bool, optional): Sample logarithmically and not linearly. Defaults to True. Returns: [nn.Variable or nn.NdArray]: (N, num_encoding_functions*3*2+3) if include_input is True else (N, num_encoding_functions*3*2) """ encoding = [x] if include_input else [] if log_sampling: frequency_increments = F.arange(0, num_encoding_functions) frequency_bands = F.pow2( F.constant(2, shape=frequency_increments.shape), frequency_increments) else: frequency_bands = F.arange(2**0, 2**(num_encoding_functions - 1) + 1e-5, (2**(num_encoding_functions - 1) - 1) / (num_encoding_functions - 1.0)) for freq in frequency_bands: for func in [F.sin, F.cos]: encoding.append(func(x * F.reshape(freq, (1, 1)))) return F.concatenate(*encoding, axis=x.ndim - 1)
def get_direction_grid(height, width, focal_length, return_ij_2d_grid=False): """Forms a mesh grid for a given height and width and assumes the camera position to be fixed at the center of the the grid (with a sufficiently large enough offset in z direction). Based on the prefixed camera position, computes ray direction for every point in the grid. Args: height (int): Height of the image/grid width (int): Width of the image/grid focal_length (float): Camera focal length (calibrated intrinsics) Returns: directions (nn.Variable or nn.NdArray): Shape is (height, width, 3) - direction of projected ray for every grid point. """ x = F.arange(0, width) y = F.arange(0, height) xx, yy = F.meshgrid(x, y) if return_ij_2d_grid: return F.stack(*list(F.meshgrid(x, y, ij_indexing=True)), axis=2) directions = F.stack((xx - width * 0.5) / focal_length, -(yy - height * 0.5) / focal_length, F.constant(-1, xx.shape), axis=2) return directions
def yolov2_image_coordinate(t_xy, t_wh, biases): import numpy as np from nnabla.parameter import pop_parameter, set_parameter h, w = t_xy.shape[-2:] xs = pop_parameter('xs') ys = pop_parameter('ys') if xs is None or (h != xs.shape[-1]): xs = F.arange(0, w).reshape((1, 1, 1, w)) xs.need_grad = False set_parameter('xs', xs) if ys is None or (h != ys.shape[-2]): ys = F.arange(0, h).reshape((1, 1, h, 1)) ys.need_grad = False set_parameter('ys', ys) t_x, t_y = F.split(t_xy, axis=2) oshape = list(t_x.shape) oshape.insert(2, 1) t_x = F.reshape((t_x + xs) / w, oshape) t_y = F.reshape((t_y + ys) / h, oshape) pop_parameter('biases') biases = biases.reshape(1, biases.shape[0], biases.shape[1], 1, 1) / np.array([w, h]).reshape(1, 1, 2, 1, 1) b = nn.Variable.from_numpy_array(biases) b.need_grad = False set_parameter('biases', b) t_wh = t_wh * b return t_x, t_y, t_wh
def ray_march(self, camloc, raydir, t0, t1, N, n_chunks, t_argmin=False): # Points computation BR, _ = t0.shape t0 = F.reshape(t0, (BR, 1, 1)) t1 = F.reshape(t1, (BR, 1, 1)) camloc = F.reshape(camloc, (BR, 1, 3)) raydir = F.reshape(raydir, (BR, 1, 3)) step = (t1 - t0) / (N - 1) intervals = F.reshape(F.arange(0, N), (1, N, 1)) ts = t0 + step * intervals points = camloc + ts * raydir points = F.reshape(points, (BR * N, 3)) # SDF computation sdf_points = [] batch = (BR * N) // n_chunks for r in range(0, BR * N, batch): sdf_points.append(self.sdf(points[r:r + batch, :])) sdf_points = F.reshape(F.concatenate(*sdf_points, axis=0), (BR, N, 1)) if n_chunks != 1 else \ F.reshape(sdf_points[0], (BR, N, 1)) # t_argmin computation if t_argmin: idx_min = F.min(sdf_points, axis=1, keepdims=True, only_index=True) t_argmin = F.reshape(F.gather(ts, idx_min, axis=1, batch_dims=1), (BR, 1)) return t_argmin # Intersection check points = F.reshape(points, (BR, N, 3)) sdf_pos = F.greater_equal_scalar(sdf_points[:, :-1, :], 0) sdf_neg = F.less_equal_scalar(sdf_points[:, 1:, :], 0) mask_hit = sdf_pos * sdf_neg decreasing_consts = F.reshape(F.arange(N, 1, -1), (1, N - 1, 1)) vals = mask_hit * decreasing_consts idx_max = F.max(vals, axis=1, only_index=True) points = points[:, :-1, :] x_hit = F.gather(points, idx_max, axis=1, batch_dims=1) x_hit = F.reshape(x_hit, (BR, 3)) mask_hit = F.greater_scalar(F.sum(mask_hit, axis=1), 0) mask_hit = F.reshape(mask_hit, (BR, 1)) x_hit_rm0 = x_hit step = F.reshape(step, (BR, 1)) raydir = F.reshape(raydir, (BR, 3)) x_hit_rm1 = x_hit_rm0 + step * raydir return x_hit_rm0, x_hit_rm1, mask_hit
def bert_embed(input_ids, token_type_ids=None, position_ids=None, vocab_size=30522, embed_dim=768, num_pos_ids=512, dropout_prob=0.1, test=True): """Construct the embeddings from word, position and token type.""" batch_size = input_ids.shape[0] seq_len = input_ids.shape[1] if position_ids is None: position_ids = F.arange(0, seq_len) position_ids = F.broadcast(F.reshape( position_ids, (1,)+position_ids.shape), (batch_size,) + position_ids.shape) if token_type_ids is None: token_type_ids = F.constant(val=0, shape=(batch_size, seq_len)) embeddings = PF.embed(input_ids, vocab_size, embed_dim, name='word_embeddings') position_embeddings = PF.embed( position_ids, num_pos_ids, embed_dim, name='position_embeddings') token_type_embeddings = PF.embed( token_type_ids, 2, embed_dim, name='token_type_embeddings') embeddings += position_embeddings embeddings += token_type_embeddings embeddings = PF.layer_normalization( embeddings, batch_axis=(0, 1), eps=1e-12, name='embed') if dropout_prob > 0.0 and not test: embeddings = F.dropout(embeddings, dropout_prob) return embeddings
def compute_sample_points_for_variable_depth(ray_origins, ray_directions, near_plane, far_plane, num_samples, randomize=False): depth_steps = F.arange(0, 1 + 1 / num_samples, 1 / (num_samples - 1)) depth_steps = F.broadcast(depth_steps[None, :], (far_plane.shape[0], depth_steps.shape[0])) depth_values = near_plane[:, None] * \ (1-depth_steps) + far_plane[:, None] * depth_steps if randomize: depth_vals_mid = 0.5 * (depth_values[:, :-1] + depth_values[:, 1:]) # get intervals between samples upper = F.concatenate(depth_vals_mid, depth_values[:, -1:], axis=-1) lower = F.concatenate(depth_values[:, :1], depth_vals_mid, axis=-1) noise = F.rand(shape=depth_values.shape) depth_values = lower + (upper - lower) * noise sample_points = ray_origins[..., None, :] + \ ray_directions[..., None, :]*depth_values[..., :, None] return sample_points, depth_values
def make_coordinate_grid(spatial_size): assert isinstance(spatial_size, tuple) h, w = spatial_size x = F.arange(0, w) y = F.arange(0, h) x = (2 * (x / (w - 1)) - 1) y = (2 * (y / (h - 1)) - 1) yy = F.tile(F.reshape(y, (-1, 1)), (1, w)) xx = F.tile(F.reshape(x, (1, -1)), (h, 1)) meshed = F.concatenate(F.reshape(xx, xx.shape + (1, )), F.reshape(yy, xx.shape + (1, )), axis=2) return meshed
def q_function(obs, num_actions, min_v, max_v, num_bins, scope): with nn.parameter_scope(scope): out = nature_head(obs) out = PF.affine(out, num_actions * num_bins, name='output') out = F.reshape(out, (-1, num_actions, num_bins)) probs = F.exp(out) / F.sum(F.exp(out), axis=2, keepdims=True) dists = F.arange(0, num_bins) * (max_v - min_v) / (num_bins - 1) + min_v values = F.sum(probs * F.reshape(dists, (1, 1, num_bins)), axis=2) return values, probs, F.reshape(dists, (-1, 1))
def sample_pdf(bins, weights, N_samples, det=False): """Sample additional points for training fine network Args: bins: int. Height in pixels. weights: int. Width in pixels. N_samples: float. Focal length of pinhole camera. det Returns: samples: array of shape [batch_size, 3]. Depth samples for fine network """ weights += 1e-5 pdf = weights / F.sum(weights, axis=-1, keepdims=True) cdf = F.cumsum(pdf, axis=-1) # if isinstance(pdf, nn.Variable): # cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.d, axis=-1)) # else: # cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.data, axis=-1)).data cdf = F.concatenate(F.constant(0, cdf[..., :1].shape), cdf, axis=-1) if det: u = F.arange(0., 1., 1 / N_samples) u = F.broadcast(u[None, :], cdf.shape[:-1] + (N_samples, )) u = u.data if isinstance(cdf, nn.NdArray) else u else: u = F.rand(shape=cdf.shape[:-1] + (N_samples, )) indices = F.searchsorted(cdf, u, right=True) # if isinstance(cdf, nn.Variable): # indices = nn.Variable.from_numpy_array( # tf.searchsorted(cdf.d, u.d, side='right').numpy()) # else: # indices = nn.Variable.from_numpy_array( # tf.searchsorted(cdf.data, u.data, side='right').numpy()) below = F.maximum_scalar(indices - 1, 0) above = F.minimum_scalar(indices, cdf.shape[-1] - 1) indices_g = F.stack(below, above, axis=below.ndim) cdf_g = F.gather(cdf, indices_g, axis=-1, batch_dims=len(indices_g.shape) - 2) bins_g = F.gather(bins, indices_g, axis=-1, batch_dims=len(indices_g.shape) - 2) denom = (cdf_g[..., 1] - cdf_g[..., 0]) denom = F.where(F.less_scalar(denom, 1e-5), F.constant(1, denom.shape), denom) t = (u - cdf_g[..., 0]) / denom samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) return samples
def position_encoding(x: nn.Variable) -> nn.Variable: batch_size, sequence_length, dim = x.shape position = F.reshape(F.arange(0, sequence_length), shape=(sequence_length, 1)) # -> (sequence_length, 1) div_term = F.exp(F.arange(0, dim, 2) * -(np.log(10000.0) / dim)) # -> (dim//2, ) sin_val = F.sin(position * F.reshape(div_term, shape=(1, dim // 2))) # -> (sequence_length, dim//2) cos_val = F.cos(position * F.reshape(div_term, shape=(1, dim // 2))) # -> (sequence_length, dim//2) ret = [] for i in range(dim): if i % 2 == 0: ret.append(sin_val[:, i // 2:i // 2 + 1]) else: ret.append(cos_val[:, i // 2:i // 2 + 1]) pe = F.reshape(F.concatenate(*ret, axis=1), shape=(1, sequence_length, dim)) return x + F.broadcast(pe, shape=x.shape)
def anti_alias_interpolate(input, channels, scale): # no trainable parameters exist. if scale == 1.0: # no interpolation executed return F.identity(input) sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 ka = kernel_size // 2 if kernel_size % 2 == 0: kb = ka - 1 else: kb = ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] kernel = 1 xa = F.reshape(F.arange(0, kernel_size[0]), (-1, 1)) ya = F.reshape(F.arange(0, kernel_size[1]), (1, -1)) meshgrids = (F.tile(xa, (1, kernel_size[1])), F.tile(ya, (kernel_size[0], 1))) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= F.exp(-(mgrid - mean)**2 / (2 * std**2)) kernel = kernel / F.sum(kernel, keepdims=True) # Reshape to depthwise convolutional weight kernel = F.reshape(kernel, (1, 1) + kernel.shape) kernel = F.broadcast(kernel, (channels, 1) + tuple(kernel_size)) # if using the pre-computed kernel, no need to compute here. out = F.pad(input, (ka, kb, ka, kb)) out = F.convolution(out, weight=kernel, group=channels) out = F.interpolate(out, scale=(scale, scale), mode="nearest") return out
def compute_sample_points_from_rays(ray_origins, ray_directions, near_plane, far_plane, num_samples, randomize=False): """Given a bundle of rays, this function samples points along each ray which is later used in volumetric rendering integration Args: ray_origins (nn.Variable or nn.NdArray): Shape is (height, width, 3) - Center of each ray from camera to grid point ray_directions (nn.Variable or nn.NdArray): Shape is (height, width, 3) - Direction of each projected ray from camera to grid point near_plane (float): Position of the near clipping plane far_plane (float): Position of the far clipping plane num_samples (int): Number of points to sample along each ray randomize (bool, optional): Defaults to True. Returns: sample_points: Shape is (height, width, num_samples, 3) - Sampled points along each ray depth_values: Shape is (num_samples, 1) - Depth values between the near and far plane at which point along each ray is sampled """ if isinstance(near_plane, nn.Variable) or isinstance( near_plane, nn.NdArray): return compute_sample_points_for_variable_depth( ray_origins, ray_directions, near_plane, far_plane, num_samples, randomize) depth_values = F.arange(near_plane, far_plane + (far_plane - near_plane) / num_samples, (far_plane - near_plane) / (num_samples - 1)) depth_values = F.reshape(depth_values, (1, ) + depth_values.shape) if randomize: noise_shape = ray_origins.shape[:-1] + (num_samples, ) if len(noise_shape) == 3: depth_values = depth_values[None, :, :] + F.rand( shape=noise_shape) * (far_plane - near_plane) / num_samples else: depth_values = depth_values + \ F.rand(shape=noise_shape) * \ (far_plane-near_plane) / num_samples sample_points = ray_origins[..., None, :] + \ ray_directions[..., None, :]*depth_values[..., :, None] return sample_points, depth_values
def sinusoidal_embedding(timesteps, embedding_dim): """ Sinusoidal embeddings originally proposed in "Attention Is All You Need" (https://arxiv.org/abs/1706.03762). """ assert len(timesteps.shape) == 1 half_dim = embedding_dim // 2 denominator = -np.log(10000) / half_dim emb = F.exp(denominator * F.arange(start=0, stop=half_dim)) emb = F.reshape(timesteps, (-1, 1)) * F.reshape(emb, (1, -1)) emb = F.concatenate(F.cos(emb), F.sin(emb), axis=1) if embedding_dim & 1: # zero pad to be divisible by two emb = F.pad(emb, [[0, 0], [0, 1]]) assert emb.shape == (timesteps.shape[0], embedding_dim) return emb
def call(self, x, y): hp = self.hp results = [] with nn.parameter_scope('layer_0'): x = F.pad(x, (0, 0, 7, 7), 'reflect') x = wn_conv(x, hp.ndf, (15,)) x = F.leaky_relu(x, 0.2, inplace=True) results.append(x) nf = hp.ndf stride = hp.downsamp_factor for i in range(1, hp.n_layers_D + 1): nf_prev = nf nf = min(nf * stride, 1024) with nn.parameter_scope(f'layer_{i}'): x = wn_conv( x, nf, (stride * 10 + 1,), stride=(stride,), pad=(stride * 5,), group=nf_prev // 4, ) x = F.leaky_relu(x, 0.2, inplace=True) results.append(x) with nn.parameter_scope(f'layer_{hp.n_layers_D + 1}'): nf = min(nf * 2, 1024) x = wn_conv(x, nf, kernel=(5,), pad=(2,)) x = F.leaky_relu(x, 0.2, inplace=True) results.append(x) with nn.parameter_scope(f'layer_{hp.n_layers_D + 2}'): x = wn_conv(x, hp.n_speakers, kernel=(3,), pad=(1,)) if y is not None: idx = F.stack( F.arange(0, hp.batch_size), y.reshape((hp.batch_size,)) ) x = F.gather_nd(x, idx) results.append(x) return results
def forward(self, output, inds, gt, reg_mask, channel_last=False): # TODO refactor loss implementation for channel_last without transposing if channel_last: output = F.transpose(output, (0, 3, 1, 2)) b = inds.shape[0] c = output.shape[1] max_objs = inds.shape[1] # divide by number of : num_objs = F.sum(reg_mask) * 2 f_map_size = output.shape[2] * output.shape[3] output = F.reshape(output, (-1, f_map_size)) inds = F.broadcast(inds.reshape((b, 1, max_objs)), (b, c, max_objs)) inds = inds.reshape((-1, max_objs)) y = output[F.broadcast(F.reshape(F.arange(0, b * c), (b * c, 1)), (b * c, max_objs)), inds].reshape( (b, c, max_objs)) y = F.transpose(y, (0, 2, 1)) loss = F.sum(reg_mask * F.absolute_error(y, gt)) loss = loss / (num_objs + 1e-4) return loss
def __call__(self, batch_size, style_noises, truncation_psi=1.0, return_latent=False, mixing_layer_index=None, dlatent_avg_beta=0.995): with nn.parameter_scope(self.global_scope): # normalize noise inputs for i in range(len(style_noises)): style_noises[i] = F.div2( style_noises[i], F.pow_scalar(F.add_scalar(F.mean(style_noises[i]**2., axis=1, keepdims=True), 1e-8, inplace=False), 0.5, inplace=False)) # get latent code w = [ mapping_network(style_noises[0], outmaps=self.mapping_network_dim, num_layers=self.mapping_network_num_layers) ] w += [ mapping_network(style_noises[1], outmaps=self.mapping_network_dim, num_layers=self.mapping_network_num_layers) ] dlatent_avg = nn.parameter.get_parameter_or_create( name="dlatent_avg", shape=(1, 512)) # Moving average update of dlatent_avg batch_avg = F.mean((w[0] + w[1]) * 0.5, axis=0, keepdims=True) update_op = F.assign( dlatent_avg, lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) update_op.name = 'dlatent_avg_update' dlatent_avg = F.identity(dlatent_avg) + 0 * update_op # truncation trick w = [lerp(dlatent_avg, _, truncation_psi) for _ in w] # generate output from generator constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4), initializer=np.random.randn(1, 512, 4, 4).astype(np.float32)) constant_bc = F.broadcast(constant_bc, (batch_size, ) + constant_bc.shape[1:]) if mixing_layer_index is None: mixing_layer_index_var = F.randint(1, len(self.resolutions) * 2, (1, )) else: mixing_layer_index_var = F.constant(val=mixing_layer_index, shape=(1, )) mixing_switch_var = F.clip_by_value( F.arange(0, len(self.resolutions) * 2) - mixing_layer_index_var, 0, 1) mixing_switch_var_re = F.reshape( mixing_switch_var, (1, mixing_switch_var.shape[0], 1), inplace=False) w0 = F.reshape(w[0], (batch_size, 1, w[0].shape[1]), inplace=False) w1 = F.reshape(w[1], (batch_size, 1, w[0].shape[1]), inplace=False) w_mixed = w0 * mixing_switch_var_re + \ w1 * (1 - mixing_switch_var_re) rgb_output = self.synthesis(w_mixed, constant_bc) if return_latent: return rgb_output, w_mixed else: return rgb_output
def pack_padded_sequence(padded_sequence, lengths, batch_first=False, enforce_sorted=True): r"""Pack a padded variable-length sequences. This method packs a padded variable-length sequences. :math:`T` is the max length over the lengths of sequences. :math:`B` is the batch size equal to the length of the sequences. :math:`*` is the remaining dimensions including none. .. note:: This function **must** be used the dynamic computation mode. Example: .. code-block:: python import numpy as np import nnabla as nn import nnabla.functions as F import nnabla.utils.rnn as rnn_utils nn.set_auto_forward(True) l2v = lambda ldata: nn.Variable.from_numpy_array(np.asarray(ldata)) a = l2v([1, 1, 1, 1]) b = l2v([2, 2, 2]) c = l2v([2, 2, 2]) d = l2v([3, 3]) e = l2v([3, 3]) sequences = [a, b, c, d, e] lengths = l2v([seq.shape[0] for seq in sequences]) padded_sequence = rnn_utils.pad_sequence(sequences) print(padded_sequence.d) packed_sequence = rnn_utils.pack_padded_sequence(padded_sequence, lengths) print(packed_sequence.data.d) print(packed_sequence.batch_sizes.d) Args: padded_sequence (:obj:`nnabla.Variable`): Padded sequence of (:math:`T \times B \times *`) or (:math:`B \times T \times *`) shape. lengths (:obj:`nnabla.Variable`): Sequence length for each batch and always resides in CPU. batch_first (bool): `padded_sequence` is of (:math:`T`, :math:`B`, :math:`*`) shape if False, otherwise (:math:`B`, :math:`T`, :math:`*`). enforce_sorted (bool): Sequences are sorted by the length in a decreasing order if True. Default is True. Returns: :obj:`PackedSequence` """ if enforce_sorted: sorted_indices = None unsorted_indices = None else: # TODO: replace cuda context when the bug fix of the sort with nn.context_scope(nn.Context()): lengths, sorted_indices = F.sort(lengths, axis=0, reverse=True, with_index=True) B = sorted_indices.shape[0] unsorted_indices = F.scatter_nd(F.arange(0, B), sorted_indices.reshape((1, B)), shape=(B, )) axis = 0 if batch_first else 1 padded_sequence = F.gather(padded_sequence, sorted_indices, axis) packed_sequence, batch_sizes = F.pack_padded_sequence( padded_sequence, lengths, batch_first) packed_sequence0 = PackedSequence() packed_sequence0.data = packed_sequence packed_sequence0.batch_sizes = batch_sizes packed_sequence0.sorted_indices = sorted_indices packed_sequence0.unsorted_indices = unsorted_indices return packed_sequence0