def min_backward(inputs, axes=None, keep_dims=False, with_index=False, only_index=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] y0 = get_output(x0, "Min") if keep_dims: y0 = F.broadcast(y0, x0.shape) dy = F.broadcast(dy, x0.shape) else: axes = [i for i in range(x0.ndim)] if axes is None else force_list(axes) shape = [1 if i in axes else s for i, s in enumerate(x0.shape)] y0 = F.broadcast(F.reshape(y0, shape, inplace=False), x0.shape) dy = F.broadcast(F.reshape(dy, shape, inplace=False), x0.shape) m0 = F.equal(x0, y0) m0 = no_grad(m0) dx0 = dy * m0 if not with_index and not only_index: return dx0 elif with_index: return dx0, None elif only_index: return None
def top_k_error(target_action, target_action_type, target_action_mask, rule_prob, terminal_gen_action_prob, token_prob, copy_prob, k=5): batch_size, max_action_length, _ = target_action.shape _, _, rule_num = rule_prob.shape _, _, token_num = token_prob.shape _, _, max_query_length = copy_prob.shape # (batch_size, max_action_length) rule_mask, token_mask, copy_mask = F.split(target_action_type, axis=2) # (batch_size, max_action_length) target_rule, target_token, target_copy = F.split(target_action, axis=2) target_rule = F.reshape(target_rule, (batch_size, max_action_length, 1)) # (batch_size, max_action_length) gen_token_prob, copy_token_prob = F.split(terminal_gen_action_prob, axis=2) gen_token_prob = F.reshape(gen_token_prob, (batch_size, max_action_length, 1)) gen_token_prob = F.broadcast(gen_token_prob, (batch_size, max_action_length, token_num)) copy_token_prob = F.reshape(copy_token_prob, (batch_size, max_action_length, 1)) copy_token_prob = F.broadcast( copy_token_prob, (batch_size, max_action_length, max_query_length)) # (batch_size, max_action_length, token_num) token_prob = gen_token_prob * token_prob # (batch_size, max_action_length, max_query_length) copy_prob = copy_token_prob * copy_prob # (batch_size, max_action_length, token_num + max_query_length) gen_or_copy = F.concatenate(token_prob, copy_prob, axis=2) # (batch_size, max_action_length) token_label = token_mask * target_token + (copy_mask * (target_copy + token_num)) token_label = F.reshape(token_label, (batch_size, max_action_length, 1)) # (batch_size, max_action_length, 1) rule_err = F.top_n_error(rule_prob, target_rule, axis=2, n=k) rule_err = F.reshape(rule_err, (batch_size, max_action_length)) # (batch_size, max_action_length, 1) token_err = F.top_n_error(gen_or_copy, token_label, axis=2, n=k) token_err = F.reshape(token_err, (batch_size, max_action_length)) # (batch_size, max_action_length) err = rule_mask * rule_err + (token_mask + copy_mask) * token_err # (batch_size,) num = F.sum(rule_mask, axis=1) + F.sum(token_mask, axis=1) + F.sum( copy_mask, axis=1) # (batch_size,) err = F.sum(err, axis=1) # (batch_size,) err = err / (num + 1e-7) return F.mean(err)
def pred(decoder_hidden_states, ctx_vectors, query_embed, query_embed_mask, rule_num, token_num, embedding_size, hidden_size): """ decoder_hidden_states: (batch_size, max_action_length, decoder_state_size) ctx_vectors: (batch_size, max_action_length, encoder_state_size) """ batch_size, max_action_length, _ = decoder_hidden_states.shape dc = concatenate(decoder_hidden_states, ctx_vectors, axis=2) with nn.parameter_scope("decoder_state_rule"): # (batch_size, max_action_length, embedding_size) decoder_hidden_state_trans_rule = dense(decoder_hidden_states, embedding_size, base_axis=2) with nn.parameter_scope("decoder_state_token"): # (batch_size, max_action_length, decoder_state_size + encoder_state_size) # (batch_size, max_action_length, embedding_size) decoder_hidden_state_trans_token = dense(dc, embedding_size, base_axis=2) with nn.parameter_scope("rule_embedding"): # (batch_size, max_action_length, rule_num) rule_predict = embed_inverse(decoder_hidden_state_trans_rule, rule_num, embedding_size, base_axis=2) embed_b = nn.parameter.get_parameter_or_create("embed/b", (rule_num, ), need_grad=True) embed_b.data.zero() embed_b = F.reshape(embed_b, (1, 1, rule_num), inplace=False) embed_b = F.broadcast(embed_b, (batch_size, max_action_length, rule_num)) rule_predict = F.softmax(rule_predict + embed_b) with nn.parameter_scope("gen_action"): terminal_gen_action_prob = dense(decoder_hidden_states, 2, base_axis=2, activation=F.softmax) with nn.parameter_scope("token_embedding"): # (batch_size, max_action_length, token_num) token_predict = embed_inverse(decoder_hidden_state_trans_token, token_num, embedding_size, base_axis=2) embed_b = nn.parameter.get_parameter_or_create("embed/b", (token_num, ), need_grad=True) embed_b.data.zero() embed_b = F.reshape(embed_b, (1, 1, token_num), inplace=False) embed_b = F.broadcast(embed_b, (batch_size, max_action_length, token_num)) token_predict = F.softmax(token_predict + embed_b) with nn.parameter_scope("copy_token"): # (batch_size, max_action_length, max_query_length) copy_prob = pointer_net(query_embed, query_embed_mask, dc, hidden_size) return rule_predict, terminal_gen_action_prob, token_predict, copy_prob
def f_layer_normalization(inp, beta, gamma): use_axis = [x for x in range(1, inp.ndim)] inp = F.sub2(inp, F.mean(inp, axis=use_axis, keepdims=True)) inp = F.div2( inp, F.pow_scalar( F.mean(F.pow_scalar(inp, 2), axis=use_axis, keepdims=True), 0.5)) return inp * F.broadcast(gamma, inp.shape) + F.broadcast(beta, inp.shape)
def CCBN(h, y, n_classes, decay_rate=0.999, test=False, fix_parameters=False, coefs=[1.0]): """Categorical Conditional Batch Normaliazation""" # Call the batch normalization once shape_stat = [1 for _ in h.shape] shape_stat[1] = h.shape[1] gamma_tmp = nn.Variable.from_numpy_array(np.ones(shape_stat)) beta_tmp = nn.Variable.from_numpy_array(np.zeros(shape_stat)) mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0.0), False) var = get_parameter_or_create("var", shape_stat, ConstantInitializer(1.0), False) h = F.batch_normalization(h, beta_tmp, gamma_tmp, mean, var, decay_rate=decay_rate, batch_stat=not test) # Condition the gamma and beta with the class label b, c = h.shape[0:2] def embed_func(y, initializer): if type(y) != list: o = embed(y, n_classes, c, initializer=initializer, sn=False, test=test) else: y_list = y o = reduce(lambda x, y: x + y, [ coef * embed(y, n_classes, c, initializer=initializer, sn=False, test=test) for coef, y in zip(coefs, y_list) ]) return o with nn.parameter_scope("gamma"): gamma = embed_func(y, ConstantInitializer(1.0)) gamma = F.reshape(gamma, [b, c] + [1 for _ in range(len(h.shape[2:]))]) gamma = F.broadcast(gamma, h.shape) with nn.parameter_scope("beta"): beta = embed_func(y, ConstantInitializer(0.0)) beta = F.reshape(beta, [b, c] + [1 for _ in range(len(h.shape[2:]))]) beta = F.broadcast(beta, h.shape) return gamma * h + beta
def minibatch_stddev(x, eps=1e-8): b, _, h, w = x.shape mean = F.mean(x, axis=0, keepdims=True) std = F.pow_scalar( F.mean(F.pow_scalar(F.sub2(x, F.broadcast(mean, x.shape)), 2.), axis=0, keepdims=True) + eps, 0.5) std_chanel = F.broadcast(F.mean(std, keepdims=True), (b, 1, h, w)) x = F.concatenate(x, std_chanel, axis=1) return x
def sum_backward(inputs, axes=None, keep_dims=False): dy = inputs[0] x0 = inputs[1] axes = [i for i in range(x0.ndim)] if axes is None else force_list(axes) if keep_dims: dx0 = F.broadcast(dy, x0.shape) else: shape = [1 if i in axes else s for i, s in enumerate(x0.shape)] dx0 = F.broadcast(F.reshape(dy, shape), x0.shape) return dx0
def pointer_net(query_embed, query_embed_mask, decoder_states, hidden_dim): """ query_embed: (batch_size, max_query_length, E1) decoder_states: (batch_size, max_action_length, E2) """ with nn.parameter_scope("pointer_net"): batch_size, max_query_length, _ = query_embed.shape _, max_action_length, _ = decoder_states.shape with nn.parameter_scope("layer1_input"): query_embed_trans = dense(query_embed, hidden_dim, base_axis=2, activation=lambda x: x) with nn.parameter_scope("layer1_h"): h_trans = dense(decoder_states, hidden_dim, base_axis=2, activation=lambda x: x) query_embed_trans = F.reshape( query_embed_trans, (batch_size, 1, max_query_length, hidden_dim)) query_embed_trans = F.broadcast( query_embed_trans, (batch_size, max_action_length, max_query_length, hidden_dim)) h_trans = F.reshape(h_trans, (batch_size, max_action_length, 1, hidden_dim)) h_trans = F.broadcast( h_trans, (batch_size, max_action_length, max_query_length, hidden_dim)) dense1_trans = F.tanh(query_embed_trans + h_trans) with nn.parameter_scope("layer2"): # scores: (batch_size, max_action_length, max_query_length, 1) scores = dense(dense1_trans, 1, base_axis=3, activation=lambda x: x) # scores: (batch_size, max_action_length, max_query_length) scores = F.reshape(scores, (batch_size, max_action_length, max_query_length)) scores = F.exp(scores - F.max(scores, axis=2, keepdims=True)) mask = F.reshape(query_embed_mask, (batch_size, 1, max_query_length)) mask = F.broadcast(mask, (batch_size, max_action_length, max_query_length)) scores = scores * mask scores = scores / F.sum(scores, axis=2, keepdims=True) return scores
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args shape = self.forward_func.info.args["shape"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Computation if prop_down[1]: g_dy_ = F.broadcast(g_dx0, shape) if accum[1]: g_dy += g_dy_ else: g_dy.copy_from(g_dy_)
def bool_scatter_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] m0 = inputs[2] o0 = inputs[3] if len(inputs) == 4 else None dx = F.bool_gather(dy, m0) dm = None if o0 is None: return dx, dm else: m1 = F.equal_scalar(m0, 0) m1 = F.reshape(m1, m1.shape + (1, ) * (dy.ndim - m1.ndim)) m1 = F.broadcast(m1, dy.shape) m1 = no_grad(m1) do = dy * m1 return dx, dm, do
def compute_sample_points_for_variable_depth(ray_origins, ray_directions, near_plane, far_plane, num_samples, randomize=False): depth_steps = F.arange(0, 1 + 1 / num_samples, 1 / (num_samples - 1)) depth_steps = F.broadcast(depth_steps[None, :], (far_plane.shape[0], depth_steps.shape[0])) depth_values = near_plane[:, None] * \ (1-depth_steps) + far_plane[:, None] * depth_steps if randomize: depth_vals_mid = 0.5 * (depth_values[:, :-1] + depth_values[:, 1:]) # get intervals between samples upper = F.concatenate(depth_vals_mid, depth_values[:, -1:], axis=-1) lower = F.concatenate(depth_values[:, :1], depth_vals_mid, axis=-1) noise = F.rand(shape=depth_values.shape) depth_values = lower + (upper - lower) * noise sample_points = ray_origins[..., None, :] + \ ray_directions[..., None, :]*depth_values[..., :, None] return sample_points, depth_values
def compute_context(prev_state): batch_size = prev_state.shape[0] ht = PF.affine(prev_state, attention_units, with_bias=False, name='Waht') # -> (batch_size, attention_units) ht = F.reshape(ht, (batch_size, 1, attention_units)) # -> (batch_size, 1, attention_units) ht = F.broadcast(ht, (batch_size, sentence_length_source, attention_units)) # -> (batch_size, sentence_length_source, attention_units) attention = F.tanh(hs + ht) # -> (batch_size, sentence_length_source, attention_units) attention = time_distributed(PF.affine)(attention, 1, with_bias=False, name='attention') # -> (batch_size, sentence_length_source, 1) attention = F.softmax(attention, axis=1) # -> (batch_size, sentence_length_source, 1) context = F.batch_matmul(hs, attention, transpose_a=True) context = F.reshape(context, (batch_size, attention_units)) return context
def generate(batch_size, style_noises, noise_seed, mix_after, truncation_psi=0.5): """ given style noises, noise seed and truncation value, generate an image. """ # normalize noise inputs style_noises_normalized = [] for style_noise in style_noises: noise_std = (F.mean(style_noise**2., axis=1, keepdims=True) + 1e-8)**0.5 style_noise_normalized = F.div2(style_noise, noise_std) style_noises_normalized.append(style_noise_normalized) # get latent code w = [mapping_network(_, outmaps=512) for _ in style_noises_normalized] # truncation trick dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg", shape=(1, 512)) w = [lerp(dlatent_avg, _, truncation_psi) for _ in w] constant = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant, (batch_size, ) + constant.shape[1:]) rgb_output = synthesis(w, constant_bc, noise_seed, mix_after) return rgb_output
def bert_embed(input_ids, token_type_ids=None, position_ids=None, vocab_size=30522, embed_dim=768, num_pos_ids=512, dropout_prob=0.1, test=True): """Construct the embeddings from word, position and token type.""" batch_size = input_ids.shape[0] seq_len = input_ids.shape[1] if position_ids is None: position_ids = F.arange(0, seq_len) position_ids = F.broadcast(F.reshape( position_ids, (1,)+position_ids.shape), (batch_size,) + position_ids.shape) if token_type_ids is None: token_type_ids = F.constant(val=0, shape=(batch_size, seq_len)) embeddings = PF.embed(input_ids, vocab_size, embed_dim, name='word_embeddings') position_embeddings = PF.embed( position_ids, num_pos_ids, embed_dim, name='position_embeddings') token_type_embeddings = PF.embed( token_type_ids, 2, embed_dim, name='token_type_embeddings') embeddings += position_embeddings embeddings += token_type_embeddings embeddings = PF.layer_normalization( embeddings, batch_axis=(0, 1), eps=1e-12, name='embed') if dropout_prob > 0.0 and not test: embeddings = F.dropout(embeddings, dropout_prob) return embeddings
def stack(xs, axis=0): if len(xs) == 1: s = list(xs[0].shape) s.insert(axis, 1) xs[0] = F.broadcast(xs[0], xs[0].shape) return F.reshape(xs[0], s) else: return F.stack(*xs, axis=axis)
def split(x, axis=0): if x.shape[axis] == 1: s = list(x.shape) s.pop(axis) x = F.broadcast(x, x.shape) return [F.reshape(x, s)] else: return F.split(x, axis=axis)
def make_broadcast_matrix(_x): # input # _x : type=nn.Variable(), _x.shape=(batch_size, *, *, *) # output # i_vector : type=nn.Variable(), i_vector.shape=(batch_size, batch_size - 1, *, *, *) return F.broadcast(F.reshape(_x, [_x.shape[0], 1] + list(_x.shape[1:])), [_x.shape[0], _x.shape[0] - 1] + list(_x.shape[1:]))
def prod_backward(inputs, axes=None, keep_dims=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] axes = [i for i in range(x0.ndim)] if axes is None else force_list(axes) y0 = F.prod(x0, axes, keep_dims) if keep_dims: dx0 = F.broadcast(dy * y0 / x0, x0.shape) else: shape = [1 if i in axes else s for i, s in enumerate(x0.shape)] dx0 = F.broadcast(F.reshape(dy * y0, shape) / x0, x0.shape) return dx0
def sample_pdf(bins, weights, N_samples, det=False): """Sample additional points for training fine network Args: bins: int. Height in pixels. weights: int. Width in pixels. N_samples: float. Focal length of pinhole camera. det Returns: samples: array of shape [batch_size, 3]. Depth samples for fine network """ weights += 1e-5 pdf = weights / F.sum(weights, axis=-1, keepdims=True) cdf = F.cumsum(pdf, axis=-1) # if isinstance(pdf, nn.Variable): # cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.d, axis=-1)) # else: # cdf = nn.Variable.from_numpy_array(tf.math.cumsum(pdf.data, axis=-1)).data cdf = F.concatenate(F.constant(0, cdf[..., :1].shape), cdf, axis=-1) if det: u = F.arange(0., 1., 1 / N_samples) u = F.broadcast(u[None, :], cdf.shape[:-1] + (N_samples, )) u = u.data if isinstance(cdf, nn.NdArray) else u else: u = F.rand(shape=cdf.shape[:-1] + (N_samples, )) indices = F.searchsorted(cdf, u, right=True) # if isinstance(cdf, nn.Variable): # indices = nn.Variable.from_numpy_array( # tf.searchsorted(cdf.d, u.d, side='right').numpy()) # else: # indices = nn.Variable.from_numpy_array( # tf.searchsorted(cdf.data, u.data, side='right').numpy()) below = F.maximum_scalar(indices - 1, 0) above = F.minimum_scalar(indices, cdf.shape[-1] - 1) indices_g = F.stack(below, above, axis=below.ndim) cdf_g = F.gather(cdf, indices_g, axis=-1, batch_dims=len(indices_g.shape) - 2) bins_g = F.gather(bins, indices_g, axis=-1, batch_dims=len(indices_g.shape) - 2) denom = (cdf_g[..., 1] - cdf_g[..., 0]) denom = F.where(F.less_scalar(denom, 1e-5), F.constant(1, denom.shape), denom) t = (u - cdf_g[..., 0]) / denom samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) return samples
def forward(self, output, inds, gt, reg_mask, channel_last=False): # TODO refactor loss implementation for channel_last without transposing if channel_last: output = F.transpose(output, (0, 3, 1, 2)) b = inds.shape[0] c = output.shape[1] max_objs = inds.shape[1] # divide by number of : num_objs = F.sum(reg_mask) * 2 f_map_size = output.shape[2] * output.shape[3] output = F.reshape(output, (-1, f_map_size)) inds = F.broadcast(inds.reshape((b, 1, max_objs)), (b, c, max_objs)) inds = inds.reshape((-1, max_objs)) y = output[F.broadcast(F.reshape(F.arange(0, b * c), (b * c, 1)), (b * c, max_objs)), inds].reshape( (b, c, max_objs)) y = F.transpose(y, (0, 2, 1)) loss = F.sum(reg_mask * F.absolute_error(y, gt)) loss = loss / (num_objs + 1e-4) return loss
def create_sparse_motions(source_image, kp_driving, kp_source, num_kp): bs, _, h, w = source_image.shape identity_grid = make_coordinate_grid((h, w)) identity_grid = F.reshape(identity_grid, (1, 1, h, w, 2)) # (1, 1, h, w, 2) coordinate_grid = identity_grid - \ F.reshape(kp_driving['value'], (bs, num_kp, 1, 1, 2), inplace=False) if 'jacobian' in kp_driving: jacobian = F.batch_matmul( kp_source['jacobian'], F.reshape( F.batch_inv( F.reshape(kp_driving['jacobian'], (-1, ) + kp_driving['jacobian'].shape[-2:], inplace=False)), kp_driving['jacobian'].shape)) # what it does # batched_driving_jacobian = F.reshape(kp_driving['jacobian'], (-1) + kp_driving['jacobian'].shape[-2:]) # batched_inverse_jacobian = F.batch_inv(batched_driving_jacobian) # inverse_jacobian = F.reshape(batched_inverse_jacobian, kp_driving['jacobian'].shape) jacobian = F.reshape( jacobian, jacobian.shape[:-2] + (1, 1) + jacobian.shape[-2:]) jacobian = F.broadcast( jacobian, jacobian.shape[:2] + (h, w) + jacobian.shape[-2:]) coordinate_grid = F.batch_matmul( jacobian, F.reshape(coordinate_grid, coordinate_grid.shape + (1, ))) coordinate_grid = F.reshape(coordinate_grid, coordinate_grid.shape[:-1]) driving_to_source = coordinate_grid + \ F.reshape(kp_source['value'], (bs, num_kp, 1, 1, 2), inplace=False) # background feature identity_grid = F.broadcast(identity_grid, (bs, 1, h, w, 2)) sparse_motions = F.concatenate(identity_grid, driving_to_source, axis=1) return sparse_motions
def __call__(self, gen_rgb_out): out = conv_layer(gen_rgb_out, inmaps=3, outmaps=self.channels[0], kernel_size=1, name_scope='Discriminator/Convinitial') inmaps = self.channels[0] for i in range(1, len(self.resolutions)): res = out.shape[2] outmaps = self.channels[i] out = res_block(out, res=res, outmaps=outmaps, inmaps=inmaps) inmaps = outmaps N, C, H, W = out.shape group = min(N, self.stddev_group) stddev_mean = F.reshape( out, (group, -1, self.stddev_feat, C // self.stddev_feat, H, W), inplace=False) # mean = F.mean(stddev_mean, axis=0, keepdims=True) mean = F.mul_scalar(F.sum(stddev_mean, axis=0, keepdims=True), 1.0/stddev_mean.shape[0], inplace=False) stddev_mean = F.mean(F.pow_scalar(F.sub2(stddev_mean, F.broadcast( mean, stddev_mean.shape)), 2.), axis=0, keepdims=False) stddev_mean = F.pow_scalar(F.add_scalar( stddev_mean, 1e-8, inplace=False), 0.5, inplace=False) stddev_mean = F.mean(stddev_mean, axis=[2, 3, 4], keepdims=True) stddev_mean = F.reshape( stddev_mean, stddev_mean.shape[:2]+stddev_mean.shape[3:], inplace=False) out = F.concatenate(out, F.tile(stddev_mean, (group, 1, H, W)), axis=1) out = conv_layer(out, inmaps=out.shape[1], outmaps=self.channels[-1], kernel_size=3, name_scope='Discriminator/Convfinal') out = F.reshape(out, (N, -1), inplace=False) # Linear Layers lrmul = 1 scale = 1/(out.shape[1]**0.5)*lrmul W, bias = weight_init_fn( (out.shape[-1], self.channels[-1]), weight_var='Discriminator/final_linear_1/affine') out = F.affine(out, W*scale, bias*lrmul) out = F.mul_scalar(F.leaky_relu( out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False) scale = 1/(out.shape[1]**0.5)*lrmul W, bias = weight_init_fn( (out.shape[-1], 1), weight_var='Discriminator/final_linear_2/affine') out = F.affine(out, W*scale, bias*lrmul) return out
def create_deformed_source_image(source_image, sparse_motions, num_kp): bs, c, h, w = source_image.shape source_repeat = F.reshape(source_image, (bs, 1, 1, c, h, w)) source_repeat = F.broadcast(source_repeat, (bs, num_kp + 1, 1, c, h, w)) source_repeat = F.reshape(source_repeat, (bs * (num_kp + 1), -1, h, w)) sparse_motions = F.reshape(sparse_motions, (bs * (num_kp + 1), h, w, -1)) sparse_deformed = F.warp_by_grid(source_repeat, sparse_motions, align_corners=True) sparse_deformed = F.reshape(sparse_deformed, (bs, num_kp + 1, -1, h, w)) return sparse_deformed
def bool_fill_backward(inputs, value=0): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] m0 = inputs[2] m1 = F.equal_scalar(m0, 0.0) m1 = F.broadcast(m1, dy.shape) m1 = no_grad(m1) dx = dy * m1 dm = None return dx, dm
def __call__(self, x, return_encoding_indices=False): x = F.transpose(x, (0, 2, 3, 1)) x_flat = x.reshape((-1, self.embedding_dim)) x_flat_squared = F.broadcast(F.sum(x_flat**2, axis=1, keepdims=True), (x_flat.shape[0], self.num_embedding)) emb_wt_squared = F.transpose( F.sum(self.embedding_weight**2, axis=1, keepdims=True), (1, 0)) distances = x_flat_squared + emb_wt_squared - 2 * \ F.affine(x_flat, F.transpose(self.embedding_weight, (1, 0))) encoding_indices = F.min(distances, only_index=True, axis=1, keepdims=True) encoding_indices.need_grad = False quantized = F.embed( encoding_indices.reshape(encoding_indices.shape[:-1]), self.embedding_weight).reshape(x.shape) if return_encoding_indices: return encoding_indices, F.transpose(quantized, (0, 3, 1, 2)) encodings = F.one_hot(encoding_indices, (self.num_embedding, )) e_latent_loss = F.mean( F.squared_error(quantized.get_unlinked_variable(need_grad=False), x)) q_latent_loss = F.mean( F.squared_error(quantized, x.get_unlinked_variable(need_grad=False))) loss = q_latent_loss + self.commitment_cost * e_latent_loss quantized = x + (quantized - x).get_unlinked_variable(need_grad=False) avg_probs = F.mean(encodings, axis=0) perplexity = F.exp(-F.sum(avg_probs * F.log(avg_probs + 1.0e-10))) return loss, F.transpose(quantized, (0, 3, 1, 2)), perplexity, encodings
def spectral_normalization_for_affine(w, itr=1, eps=1e-12, input_axis=1, test=False): W_sn = get_parameter_or_create("W_sn", w.shape, ConstantInitializer(0), False) if test: return W_sn d0 = np.prod(w.shape[0:-1]) # In d1 = np.prod(w.shape[-1]) # Out u0 = get_parameter_or_create("singular-vector", [d1], NormalInitializer(), False) u = F.reshape(u0, [d1, 1]) # Power method for _ in range(itr): # v v = F.affine(w, u) v = F.div2( v, F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5)) v = F.reshape(v, [1, d0]) # u u = F.affine(v, w) u = F.div2( u, F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5)) u = F.reshape(u, [d1, 1]) # Iterate u = F.identity(u, outputs=[u0.data]) u.persistent = True # No grad u.need_grad = False v.need_grad = False # Spectral normalization wv = F.affine(v, w) sigma = F.affine(wv, u) sigma = F.broadcast(F.reshape(sigma, [1 for _ in range(len(w.shape))]), w.shape) w_sn = F.div2(w, sigma, outputs=[W_sn.data]) w_sn.persistent = True return w_sn
def position_encoding(x: nn.Variable) -> nn.Variable: batch_size, sequence_length, dim = x.shape position = F.reshape(F.arange(0, sequence_length), shape=(sequence_length, 1)) # -> (sequence_length, 1) div_term = F.exp(F.arange(0, dim, 2) * -(np.log(10000.0) / dim)) # -> (dim//2, ) sin_val = F.sin(position * F.reshape(div_term, shape=(1, dim // 2))) # -> (sequence_length, dim//2) cos_val = F.cos(position * F.reshape(div_term, shape=(1, dim // 2))) # -> (sequence_length, dim//2) ret = [] for i in range(dim): if i % 2 == 0: ret.append(sin_val[:, i // 2:i // 2 + 1]) else: ret.append(cos_val[:, i // 2:i // 2 + 1]) pe = F.reshape(F.concatenate(*ret, axis=1), shape=(1, sequence_length, dim)) return x + F.broadcast(pe, shape=x.shape)
def kp2gaussian(kp, spatial_size, kp_variance): mean = kp['value'] coordinate_grid = make_coordinate_grid(spatial_size) number_of_leading_dimensions = len(mean.shape) - 1 shape = (1, ) * number_of_leading_dimensions + coordinate_grid.shape coordinate_grid = F.reshape(coordinate_grid, shape) coordinate_grid = F.broadcast( coordinate_grid, mean.shape[:number_of_leading_dimensions] + coordinate_grid.shape[number_of_leading_dimensions:]) # Preprocess kp shape shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 2) mean = F.reshape(mean, shape, inplace=False) mean_sub = coordinate_grid - mean out = F.exp(-0.5 * F.sum( (mean_sub**2), axis=mean_sub.ndim - 1) / kp_variance) return out
def anti_alias_interpolate(input, channels, scale): # no trainable parameters exist. if scale == 1.0: # no interpolation executed return F.identity(input) sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 ka = kernel_size // 2 if kernel_size % 2 == 0: kb = ka - 1 else: kb = ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] kernel = 1 xa = F.reshape(F.arange(0, kernel_size[0]), (-1, 1)) ya = F.reshape(F.arange(0, kernel_size[1]), (1, -1)) meshgrids = (F.tile(xa, (1, kernel_size[1])), F.tile(ya, (kernel_size[0], 1))) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= F.exp(-(mgrid - mean)**2 / (2 * std**2)) kernel = kernel / F.sum(kernel, keepdims=True) # Reshape to depthwise convolutional weight kernel = F.reshape(kernel, (1, 1) + kernel.shape) kernel = F.broadcast(kernel, (channels, 1) + tuple(kernel_size)) # if using the pre-computed kernel, no need to compute here. out = F.pad(input, (ka, kb, ka, kb)) out = F.convolution(out, weight=kernel, group=channels) out = F.interpolate(out, scale=(scale, scale), mode="nearest") return out
def get_ray_bundle(height, width, focal_length, cam2world_mat): """Computed direction and center of each ray from camera to each pixel coordinate in the image (1 ray per pixel) Args: height (int): Height of the image/grid width (int): Width of the image/grid focal_length (float): Camera focal length (calibrated intrinsics) cam2world_mat (nn.Variable or nn.NdArray): Transformation matrix from camera coordinate system to world coordinate system Returns: ray_directions (nn.Variable or nn.NdArray): Shape is (height, width, 3) - Direction of each projected ray from camera to grid point ray_origins (nn.Variable or nn.NdArray): Shape is (height, width, 3) - Center of each ray from camera to grid point """ if cam2world_mat.ndim == 3: cam2world_mat = cam2world_mat[0, :, :] directions = get_direction_grid(height, width, focal_length) ray_directions = F.sum(directions[..., None, :] * cam2world_mat[None, None, :3, :3], axis=-1) ray_origins = F.broadcast(F.reshape(cam2world_mat[:3, -1], (1, 1, 3)), ray_directions.shape) return ray_directions, ray_origins
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = int(n_train_data / batch_size) n_iter = n_epoch * iter_epoch extension_module = args.context alpha = args.alpha # Supervised Model ## ERM batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l_0 = nn.Variable((batch_size, m, h, w)) y_l_0 = nn.Variable((batch_size, 1)) pred = cnn_model_003(ctx, x_l_0) loss_ce = ce_loss(ctx, pred, y_l_0) loss_er = er_loss(ctx, pred) loss_supervised = loss_ce + loss_er ## VRM (mixup) x_l_1 = nn.Variable((batch_size, m, h, w)) y_l_1 = nn.Variable((batch_size, 1)) coef = nn.Variable() coef_b = F.broadcast(coef.reshape([1]*x_l_0.ndim, unlink=True), x_l_0.shape) x_l_m = coef_b * x_l_0 + (1 - coef_b) * x_l_1 coef_b = F.broadcast(coef.reshape([1]*pred.ndim, unlink=True), pred.shape) y_l_m = coef_b * F.one_hot(y_l_0, (n_cls, )) \ + (1-coef_b) * F.one_hot(y_l_1, (n_cls, )) x_l_m.need_grad, y_l_m.need_grad = False, False pred_m = cnn_model_003(ctx, x_l_m) loss_er_m = er_loss(ctx, pred_m) #todo: need? loss_ce_m = ce_loss_soft(ctx, pred, y_l_m) loss_supervised_m = loss_ce_m #+ loss_er_m # Semi-Supervised Model ## ERM x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0 = cnn_model_003(ctx, x_u0) pred_x_u1 = cnn_model_003(ctx, x_u1) pred_x_u0.persistent, pred_x_u1.persistent = True, True loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) loss_unsupervised = loss_sr + loss_er0 + loss_er1 ## VRM (mixup) x_u2 = nn.Variable((batch_size, m, h, w)) # not to overwrite x_u1.d coef_u = nn.Variable() coef_u_b = F.broadcast(coef_u.reshape([1]*x_u0.ndim, unlink=True), x_u0.shape) x_u_m = coef_u_b * x_u0 + (1-coef_u_b) * x_u2 pred_x_u0_ = nn.Variable(pred_x_u0.shape) # unlink forward pass but reuse result pred_x_u1_ = nn.Variable(pred_x_u1.shape) pred_x_u0_.data = pred_x_u0.data pred_x_u1_.data = pred_x_u1.data coef_u_b = F.broadcast(coef_u.reshape([1]*pred_x_u0.ndim, unlink=True), pred_x_u0.shape) y_u_m = coef_u_b * pred_x_u0_ + (1-coef_u_b) * pred_x_u1_ x_u_m.need_grad, y_u_m.need_grad = False, False pred_x_u_m = cnn_model_003(ctx, x_u_m) loss_er_u_m = er_loss(ctx, pred_x_u_m) #todo: need? loss_ce_u_m = ce_loss_soft(ctx, pred_x_u_m, y_u_m) loss_unsupervised_m = loss_ce_u_m #+ loss_er_u_m # Evaluatation Model batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. ve_best = 1. save_path_prev = "" for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l_0.d, _ , y_l_0.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train ## forward (supervised and its mixup) loss_supervised.forward(clear_no_need_grad=True) coef_data = np.random.beta(alpha, alpha) coef.d = coef_data x_l_1.d = np.random.permutation(x_l0_data) y_l_1.d = np.random.permutation(y_l_data) loss_supervised_m.forward(clear_no_need_grad=True) ## forward (unsupervised and its mixup) loss_unsupervised.forward(clear_no_need_grad=True) coef_data = np.random.beta(alpha, alpha) coef_u.d = coef_data x_u2.d = np.random.permutation(x_u1_data) loss_unsupervised_m.forward(clear_no_need_grad=True) ## backward solver.zero_grad() loss_supervised.backward(clear_buffer=False) loss_supervised_m.backward(clear_buffer=False) loss_unsupervised.backward(clear_buffer=False) loss_unsupervised_m.backward(clear_buffer=True) solver.update() # Evaluate if int((i+1) % iter_epoch) == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 ve /= iter_val msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve) * 100) print(msg) if ve < ve_best: if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) if save_path_prev != "": os.remove(save_path_prev) save_path = os.path.join( args.model_save_path, 'params_%06d.h5' % epoch) nn.save_parameters(save_path) save_path_prev = save_path ve_best = ve st = time.time() epoch +=1