def gan_model(label_ph, pred, conf): """ Define GAN model with adversarial and discriminator losses and their orchestration """ # Define Discriminator _, d_real_logits, d_real_fm_list = discriminator_fm( label_ph, conf.scaling_factor, scope="Discriminator_FM") # output of D for fake images _, d_fake_logits, d_fake_fm_list = discriminator_fm( pred, conf.scaling_factor, scope="Discriminator_FM") # Define Detail Discriminator # compute the detail layers for the dicriminator (reuse) base_gt = guided_filter(label_ph, 5, 0.01) detail_gt = F.div2(label_ph, base_gt + 1e-15) base_pred = guided_filter(pred, 5, 0.01) detail_pred = F.div2(pred, base_pred + 1e-15) # detail layer output of D for real images _, d_detail_real_logits, d_detail_real_fm_list = \ discriminator_fm(detail_gt, conf.scaling_factor, scope="Discriminator_Detail") # detail layer output of D for fake images _, d_detail_fake_logits, d_detail_fake_fm_list = \ discriminator_fm(detail_pred, conf.scaling_factor, scope="Discriminator_Detail") # Loss # original GAN (hinge GAN) d_adv_loss = discriminator_loss(d_real_logits, d_fake_logits) d_adv_loss.persistent = True g_adv_loss = generator_loss(d_real_logits, d_fake_logits) g_adv_loss.persistent = True # detail GAN (hinge GAN) d_detail_adv_loss = conf.detail_lambda * \ discriminator_loss(d_detail_real_logits, d_detail_fake_logits) d_detail_adv_loss.persistent = True g_detail_adv_loss = conf.detail_lambda * \ generator_loss(d_detail_real_logits, d_detail_fake_logits) g_detail_adv_loss.persistent = True # feature matching (FM) loss fm_loss = feature_matching_loss(d_real_fm_list, d_fake_fm_list, 4) fm_loss.persistent = True fm_detail_loss = conf.detail_lambda * feature_matching_loss( d_detail_real_fm_list, d_detail_fake_fm_list, 4) fm_detail_loss.persistent = True jsigan = namedtuple('jsigan', [ 'd_adv_loss', 'd_detail_adv_loss', 'g_adv_loss', 'g_detail_adv_loss', 'fm_loss', 'fm_detail_loss' ]) return jsigan(d_adv_loss, d_detail_adv_loss, g_adv_loss, g_detail_adv_loss, fm_loss, fm_detail_loss)
def generate(batch_size, style_noises, noise_seed, mix_after, truncation_psi=0.5): """ given style noises, noise seed and truncation value, generate an image. """ # normalize noise inputs style_noises_normalized = [] for style_noise in style_noises: noise_std = (F.mean(style_noise**2., axis=1, keepdims=True) + 1e-8)**0.5 style_noise_normalized = F.div2(style_noise, noise_std) style_noises_normalized.append(style_noise_normalized) # get latent code w = [mapping_network(_, outmaps=512) for _ in style_noises_normalized] # truncation trick dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg", shape=(1, 512)) w = [lerp(dlatent_avg, _, truncation_psi) for _ in w] constant = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant, (batch_size, ) + constant.shape[1:]) rgb_output = synthesis(w, constant_bc, noise_seed, mix_after) return rgb_output
def normalize(x, resize_size): mean_std = MeanStd(resize_size, x.shape[0]) ch_mean = mean_std.ch_mean ch_std = mean_std.ch_std x = F.sub2(x, ch_mean) x = F.div2(x, ch_std) return x
def f_layer_normalization(inp, beta, gamma): use_axis = [x for x in range(1, inp.ndim)] inp = F.sub2(inp, F.mean(inp, axis=use_axis, keepdims=True)) inp = F.div2( inp, F.pow_scalar( F.mean(F.pow_scalar(inp, 2), axis=use_axis, keepdims=True), 0.5)) return inp * F.broadcast(gamma, inp.shape) + F.broadcast(beta, inp.shape)
def model(img, sf): """ Define JSInet model """ with nn.parameter_scope('Network'): with nn.parameter_scope('local_contrast_enhancement'): ## ================= Local Contrast Enhancement Subnet ============================ ## ch = 64 b = guided_filter(img, 5, 0.01) n1 = conv_2d(b, ch, kernel=(3, 3), name='conv/0') for i in range(4): n1 = res_block(n1, ch, 'res_block/%d' % i) n1 = F.relu(n1, inplace=True) local_filter_2d = conv_2d( n1, (9**2) * (sf**2), kernel=(3, 3), name='conv_k') # [B, H, W, (9x9)*(sfxsf)] # dynamic 2D upsampling with 2D local filters pred_C = dyn_2d_up_operation(b, local_filter_2d, (9, 9), sf) # local contrast mask pred_C = 2 * F.sigmoid(pred_C) ## ================= Detail Restoration Subnet ============================ ## ch = 64 d = F.div2(img, b + 1e-15) with nn.parameter_scope('detail_restoration'): n3 = conv_2d(d, ch, kernel=(3, 3), name='conv/0') for i in range(4): n3 = res_block(n3, ch, 'res_block/%d' % i) if i == 0: d_feature = n3 n3 = F.relu(n3, inplace=True) # separable 1D filters dr_k_h = conv_2d(n3, 41 * sf**2, kernel=(3, 3), name='conv_k_h') dr_k_v = conv_2d(n3, 41 * sf**2, kernel=(3, 3), name='conv_k_v') # dynamic separable upsampling with with separable 1D local filters pred_D = dyn_sep_up_operation(d, dr_k_v, dr_k_h, 41, sf) ## ================= Image Reconstruction Subnet ============================ ## with nn.parameter_scope('image_reconstruction'): n4 = conv_2d(img, ch, kernel=(3, 3), name='conv/0') for i in range(4): if i == 1: n4 = F.concatenate(n4, d_feature, axis=3) n4 = res_block_concat(n4, ch, 'res_block/%d' % i) else: n4 = res_block(n4, ch, 'res_block/%d' % i) n4 = F.relu(n4, inplace=True) n4 = F.relu(conv_2d(n4, ch * sf * sf, kernel=(3, 3), name='conv/1'), inplace=True) # (1,100,170,1024) -> (1,100,170,4,4,64) -> (1,100,4,170,4,64) # pixel shuffle n4 = depth_to_space(n4, sf) pred_I = conv_2d(n4, 3, kernel=(3, 3), name='conv/2') pred = F.add2(pred_I, pred_D, inplace=True) * pred_C jsinet = namedtuple('jsinet', ['pred']) return jsinet(pred)
def spectral_normalization_for_affine(w, itr=1, eps=1e-12, input_axis=1, test=False): W_sn = get_parameter_or_create("W_sn", w.shape, ConstantInitializer(0), False) if test: return W_sn d0 = np.prod(w.shape[0:-1]) # In d1 = np.prod(w.shape[-1]) # Out u0 = get_parameter_or_create("singular-vector", [d1], NormalInitializer(), False) u = F.reshape(u0, [d1, 1]) # Power method for _ in range(itr): # v v = F.affine(w, u) v = F.div2( v, F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5)) v = F.reshape(v, [1, d0]) # u u = F.affine(v, w) u = F.div2( u, F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5)) u = F.reshape(u, [d1, 1]) # Iterate u = F.identity(u, outputs=[u0.data]) u.persistent = True # No grad u.need_grad = False v.need_grad = False # Spectral normalization wv = F.affine(v, w) sigma = F.affine(wv, u) sigma = F.broadcast(F.reshape(sigma, [1 for _ in range(len(w.shape))]), w.shape) w_sn = F.div2(w, sigma, outputs=[W_sn.data]) w_sn.persistent = True return w_sn
def spectral_normalization_for_conv(w, itr=1, eps=1e-12, test=False): w_shape = w.shape W_sn = get_parameter_or_create("W_sn", w_shape, ConstantInitializer(0), False) if test: return W_sn d0 = w.shape[0] # Out d1 = np.prod(w.shape[1:]) # In w = F.reshape(w, [d0, d1], inplace=False) u0 = get_parameter_or_create("singular-vector", [d0], NormalInitializer(), False) u = F.reshape(u0, [1, d0]) # Power method for _ in range(itr): # v v = F.affine(u, w) v = F.div2( v, F.pow_scalar(F.sum(F.pow_scalar(v, 2.), keepdims=True) + eps, 0.5)) v = F.reshape(v, [d1, 1]) # u u = F.affine(w, v) u = F.div2( u, F.pow_scalar(F.sum(F.pow_scalar(u, 2.), keepdims=True) + eps, 0.5)) u = F.reshape(u, [1, d0]) # Iterate u = F.identity(u, outputs=[u0.data]) u.persistent = True # No grad u.need_grad = False v.need_grad = False # Spectral normalization wv = F.affine(w, v) sigma = F.affine(u, wv) w_sn = F.div2(w, sigma) w_sn = F.reshape(w_sn, w_shape) w_sn = F.identity(w_sn, outputs=[W_sn.data]) w_sn.persistent = True return w_sn
def drop_path(x): """ The same implementation as PyTorch versions. rate: Variable. drop rate. if the random value drawn from uniform distribution is less than the drop_rate, corresponding element becomes 0. """ drop_prob = nn.parameter.get_parameter_or_create("drop_rate", shape=(1, 1, 1, 1), need_grad=False) mask = F.rand(shape=(x.shape[0], 1, 1, 1)) mask = F.greater_equal(mask, drop_prob) x = F.div2(x, 1 - drop_prob) x = F.mul2(x, mask) return x
def __rdiv__(self, other): """ Element-wise division. Part of the implementation of the division operator. Args: other (float or ~nnabla.Variable): Internally calling :func:`~nnabla.functions.sub2` or :func:`~nnabla.functions.add_scalar` according to the type. Returns: :class:`nnabla.Variable` """ import nnabla.functions as F if isinstance(other, Variable): return F.div2(other, self) return F.r_div_scalar(self, other)
def build_model(train=True, get_embeddings=False): x = nn.Variable((batch_size, sentence_length, ptb_dataset.word_length)) mask = expand_dims(F.sign(x), axis=-1) t = nn.Variable((batch_size, sentence_length)) with nn.parameter_scope('char_embedding'): h = PF.embed(x, char_vocab_size, char_embedding_dim) * mask h = F.transpose(h, (0, 3, 1, 2)) output = [] for f, f_size in zip(filters, filster_sizes): _h = PF.convolution(h, f, kernel=(1, f_size), pad=(0, f_size//2), name='conv_{}'.format(f_size)) _h = F.max_pooling(_h, kernel=(1, ptb_dataset.word_length)) output.append(_h) h = F.concatenate(*output, axis=1) h = F.transpose(h, (0, 2, 1, 3)) mask = get_mask(F.sum(x, axis=2)) embeddings = F.reshape(h, (batch_size, sentence_length, sum(filters))) * mask if get_embeddings: return x, embeddings with nn.parameter_scope('highway1'): h = time_distributed(highway)(embeddings) with nn.parameter_scope('highway2'): h = time_distributed(highway)(h) with nn.parameter_scope('lstm1'): h = lstm(h, lstm_size, mask=mask, return_sequences=True) with nn.parameter_scope('lstm2'): h = lstm(h, lstm_size, mask=mask, return_sequences=True) with nn.parameter_scope('hidden'): h = F.relu(time_distributed(PF.affine)(h, lstm_size)) if train: h = F.dropout(h, p=dropout_ratio) with nn.parameter_scope('output'): y = time_distributed(PF.affine)(h, word_vocab_size) mask = F.sign(t) # do not predict 'pad'. entropy = time_distributed_softmax_cross_entropy(y, expand_dims(t, axis=-1)) * mask count = F.sum(mask, axis=1) loss = F.mean(F.div2(F.sum(entropy, axis=1), count)) return x, t, loss
def __div__(self, other): """ Element-wise division. Implements the division operator expression ``A / B``, together with :func:`~nnabla.variable.__rdiv__` . When a scalar is specified for ``other``, this function performs an element-wise operation for all elements in ``self``. Args: other (float or ~nnabla.Variable): Internally calling :func:`~nnabla.functions.div2` or :func:`~nnabla.functions.mul_scalar` according to the type. Returns: :class:`nnabla.Variable` """ import nnabla.functions as F if isinstance(other, Variable): return F.div2(self, other) return F.mul_scalar(self, 1. / other)
def __call__(self, x): with nn.parameter_scope("VGG19"): self.x = F.div2(F.sub2(x, self.mean), self.std) return vgg_prediction(self.x, finetune=True)
def conv_block(input, w, noise=None, res=4, outmaps=512, inmaps=512, kernel_size=3, pad_size=1, demodulate=True, namescope="Conv", up=False, act=F.leaky_relu): """ single convoluiton block used in each resolution. """ batch_size = input.shape[0] with nn.parameter_scope(f"G_synthesis/{res}x{res}/{namescope}"): runtime_coef = 1. / np.sqrt(512) W, bias = weight_init_fn(shape=(w.shape[1], inmaps)) runtime_coef = 1. / np.sqrt(512) s = F.affine(w, W * runtime_coef, bias) + 1.0 runtime_coef_for_conv = 1 / \ np.sqrt(np.prod([inmaps, kernel_size, kernel_size])) if up: conv_weight = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/conv/W", shape=(inmaps, outmaps, kernel_size, kernel_size)) else: conv_weight = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/conv/W", shape=(outmaps, inmaps, kernel_size, kernel_size)) conv_weight = conv_weight * runtime_coef_for_conv if up: scale = F.reshape(s, (s.shape[0], s.shape[1], 1, 1, 1), inplace=True) else: scale = F.reshape(s, (s.shape[0], 1, s.shape[1], 1, 1), inplace=True) mod_w = F.mul2( F.reshape(conv_weight, (1, ) + conv_weight.shape, inplace=True), scale) if demodulate: if up: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[1, 3, 4], keepdims=True) + 1e-8, 0.5) else: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[2, 3, 4], keepdims=True) + 1e-8, 0.5) demod_w = F.div2(mod_w, denom_w) else: demod_w = mod_w input = F.reshape(input, (1, -1, input.shape[2], input.shape[3]), inplace=True) demod_w = F.reshape( demod_w, (-1, demod_w.shape[2], demod_w.shape[3], demod_w.shape[4]), inplace=True) if up: k = [1, 3, 3, 1] conv_out = upsample_conv_2d(input, demod_w, k, factor=2, gain=1, group=batch_size) else: conv_out = F.convolution(input, demod_w, pad=(pad_size, pad_size), group=batch_size) conv_out = F.reshape( conv_out, (batch_size, -1, conv_out.shape[2], conv_out.shape[3]), inplace=True) if noise is not None: noise_coeff = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/noise_strength", shape=()) output = conv_out + noise * \ F.reshape(noise_coeff, (1, 1, 1, 1), inplace=False) else: output = conv_out bias = nn.parameter.get_parameter_or_create( name=f"G_synthesis/{res}x{res}/{namescope}/conv/b", shape=(outmaps, )) output = output + F.reshape(bias, (1, outmaps, 1, 1), inplace=False) if act == F.leaky_relu: output = F.leaky_relu(output, alpha=0.2) * np.sqrt(2) else: output = act(output) return output
def main(): random.seed(args.seed) np.random.seed(args.seed) # Prepare for CUDA. ctx = get_extension_context('cudnn', device_id=args.gpus) nn.set_default_context(ctx) start_full_time = time.time() from iterator import data_iterator # Data list for sceneflow data set train_list = "./dataset/kitti_train.csv" test_list = "./dataset/kitti_test.csv" train = True validation = False # Set monitor path. monitor_path = './nnmonitor' + str(datetime.now().strftime("%Y%m%d%H%M%S")) img_left, img_right, disp_img = read_csv(train_list) img_left_test, img_right_test, disp_img_test = read_csv(test_list) train_samples = len(img_left) test_samples = len(img_left_test) train_size = int(len(img_left) / args.batchsize_train) test_size = int(len(img_left_test) / args.batchsize_test) # Create data iterator. data_iterator_train = data_iterator( train_samples, args.batchsize_train, img_left, img_right, disp_img, train, shuffle=True, dataset=args.dataset) data_iterator_test = data_iterator( test_samples, args.batchsize_test, img_left_test, img_right_test, disp_img_test, validation, shuffle=False, dataset=args.dataset) # Set data size print(train_size, test_size) # Clrear patameters nn.clear_parameters() # Define data shape for training. var_left = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_right = nn.Variable( (args.batchsize_train, 3, args.crop_height, args.crop_width)) var_disp = nn.Variable( (args.batchsize_train, 1, args.crop_height, args.crop_width)) # Define data shape for testing. var_left_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_right_test = nn.Variable( (args.batchsize_test, 3, args.im_height, args.im_width)) var_disp_test = nn.Variable( (args.batchsize_test, 1, args.im_height, args.im_width)) if args.loadmodel is not None: # Loading CNN pretrained parameters. nn.load_parameters(args.loadmodel) # === for Training === # Definition of pred pred1, pred2, pred3 = psm_net(var_left, var_right, args.maxdisp, True) mask_train = F.greater_scalar(var_disp, 0) sum_mask = F.maximum_scalar(F.sum(mask_train), 1) print(sum_mask.d, "sum_mask_first") # Definition of loss loss = 0.5 * (0.5 * F.sum(F.huber_loss(pred1, var_disp)*mask_train)/(sum_mask) + 0.7 * F.sum(F.huber_loss( pred2, var_disp)*mask_train)/(sum_mask) + F.sum(F.huber_loss(pred3, var_disp)*mask_train)/(sum_mask)) # === for Testing === # Definition of pred pred_test = psm_net(var_left_test, var_right_test, args.maxdisp, False) var_gt = var_disp_test + F.less_equal_scalar(var_disp_test, 0) * -1 var_pred = pred_test + F.less_equal_scalar(pred_test, 0) * -1 E = F.abs(var_pred - var_gt) n_err = F.sum(F.logical_and(F.logical_and(F.greater_scalar(var_gt, 0.0), F.greater_scalar(E, 3.0)), F.greater_scalar(F.div2(E, F.abs(var_gt)), 0.05))) n_total = F.sum(F.greater_scalar(var_gt, 0)) test_loss = F.div2(n_err, n_total) # Prepare monitors. monitor = Monitor(monitor_path) monitor_train = MonitorSeries('Training loss', monitor, interval=1) monitor_test = MonitorSeries('Validation loss', monitor, interval=1) monitor_time_train = MonitorTimeElapsed( "Training time/epoch", monitor, interval=1) # Create a solver (parameter updater) solver = S.Adam(alpha=0.001, beta1=0.9, beta2=0.999) # Set Parameters params = nn.get_parameters() solver.set_parameters(params) params2 = nn.get_parameters(grad_only=False) solver.set_parameters(params2) for epoch in range(1, args.epochs+1): print('This is %d-th epoch' % (epoch)) total_train_loss = 0 index = 0 lr = adjust_learning_rate(epoch) ###Training### while index < train_size: # Get mini batch # Preprocess var_left.d, var_right.d, var_disp.d = data_iterator_train.next() loss.forward(clear_no_need_grad=True) # Initialize gradients solver.zero_grad() # Backward execution loss.backward(clear_buffer=True) # Update parameters by computed gradients solver.set_learning_rate(lr) solver.update() print('Iter %d training loss = %.3f' % (index, loss.d)) total_train_loss += loss.d index += 1 train_error = total_train_loss/train_size print('epoch %d total training loss = %.3f' % (epoch, train_error)) monitor_time_train.add(epoch) # ## teting ## total_test_loss = 0 max_acc = 0 index_test = 0 while index_test < test_size: var_left_test.d, var_right_test.d, var_disp_test.d = data_iterator_test.next() test_loss.forward(clear_buffer=True) total_test_loss += test_loss.d print('Iter %d test loss = %.3f' % (index_test, test_loss.d*100)) index_test += 1 test_error = total_test_loss/test_size print('epoch %d total 3-px error in val = %.3f' % (epoch, test_error*100)) if test_error > max_acc: max_acc = test_error*100 print('MAX epoch %d total test error = %.3f' % (epoch, max_acc)) # Pass validation loss to a monitor. monitor_test.add(epoch, test_error*100) # Pass training loss to a monitor. monitor_train.add(epoch, train_error) print('full training time = %.2f HR' % ((time.time() - start_full_time)/3600)) # Save Parameter out_param_file = os.path.join( args.savemodel, 'psmnet_trained_param_' + str(epoch) + '.h5') nn.save_parameters(out_param_file)
def feature_normalize(feature_in): feature_in_norm = F.norm(feature_in, p=2, axis=1, keepdims=True) + sys.float_info.epsilon feature_in_norm = F.div2(feature_in, feature_in_norm) return feature_in_norm
def styled_conv_block(conv_input, w, noise=None, res=4, inmaps=512, outmaps=512, kernel_size=3, pad_size=1, demodulate=True, namescope="Conv", up=False, act=F.leaky_relu): """ Conv block with skip connection for Generator """ batch_size = conv_input.shape[0] with nn.parameter_scope(f'G_synthesis/{res}x{res}/{namescope}'): W, bias = weight_init_fn(shape=(w.shape[1], inmaps)) runtime_coef = (1. / np.sqrt(512)).astype(np.float32) style = F.affine(w, W * runtime_coef, bias) + 1.0 runtime_coef_for_conv = ( 1 / np.sqrt(np.prod([inmaps, kernel_size, kernel_size]))).astype( np.float32) if up: init_function = weight_init_fn(shape=(inmaps, outmaps, kernel_size, kernel_size), return_init=True) conv_weight = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/conv/W', shape=(inmaps, outmaps, kernel_size, kernel_size), initializer=init_function) else: init_function = weight_init_fn(shape=(outmaps, inmaps, kernel_size, kernel_size), return_init=True) conv_weight = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/conv/W', shape=(outmaps, inmaps, kernel_size, kernel_size), initializer=init_function) conv_weight = F.mul_scalar(conv_weight, runtime_coef_for_conv) if up: scale = F.reshape(style, (style.shape[0], style.shape[1], 1, 1, 1), inplace=False) else: scale = F.reshape(style, (style.shape[0], 1, style.shape[1], 1, 1), inplace=False) mod_w = F.mul2( F.reshape(conv_weight, (1, ) + conv_weight.shape, inplace=False), scale) if demodulate: if up: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[1, 3, 4], keepdims=True) + 1e-8, 0.5) else: denom_w = F.pow_scalar( F.sum(F.pow_scalar(mod_w, 2.), axis=[2, 3, 4], keepdims=True) + 1e-8, 0.5) demod_w = F.div2(mod_w, denom_w) else: demod_w = mod_w conv_input = F.reshape(conv_input, (1, -1, conv_input.shape[2], conv_input.shape[3]), inplace=False) demod_w = F.reshape( demod_w, (-1, demod_w.shape[2], demod_w.shape[3], demod_w.shape[4]), inplace=False) if up: k = [1, 3, 3, 1] conv_out = upsample_conv_2d(conv_input, demod_w, k, factor=2, gain=1, group=batch_size) else: conv_out = F.convolution(conv_input, demod_w, pad=(pad_size, pad_size), group=batch_size) conv_out = F.reshape( conv_out, (batch_size, -1, conv_out.shape[2], conv_out.shape[3]), inplace=False) if noise is not None: noise_coeff = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/noise_strength', shape=()) conv_out = F.add2(conv_out, noise * F.reshape(noise_coeff, (1, 1, 1, 1))) else: conv_out = conv_out bias = nn.parameter.get_parameter_or_create( name=f'G_synthesis/{res}x{res}/{namescope}/conv/b', shape=(outmaps, ), initializer=np.random.randn(outmaps, ).astype(np.float32)) conv_out = F.add2(conv_out, F.reshape(bias, (1, outmaps, 1, 1), inplace=False)) if act == F.leaky_relu: conv_out = F.mul_scalar(F.leaky_relu(conv_out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False) else: conv_out = act(conv_out) return conv_out
def nonlocal_net(B_lab_map, relu_layers, temperature=0.001 * 5, detach_flag=False, WTA_scale_weight=1, feature_noise=0): batch_size = B_lab_map.shape[0] channel = B_lab_map.shape[1] image_height = B_lab_map.shape[2] image_width = B_lab_map.shape[3] feature_height = int(image_height / 4) feature_width = int(image_width / 4) feature_channel = 64 in_channels = feature_channel * 4 inter_channels = 256 # layer2_1 A_feature2_1 = layer2_1(relu_layers[0]) B_feature2_1 = layer2_1(relu_layers[4]) # layer3_1 A_feature3_1 = layer3_1(relu_layers[1]) B_feature3_1 = layer3_1(relu_layers[5]) # layer4_1 A_feature4_1 = layer4_1(relu_layers[2]) B_feature4_1 = layer4_1(relu_layers[6]) # layer5_1 A_feature5_1 = layer5_1(relu_layers[3]) B_feature5_1 = layer5_1(relu_layers[7]) if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]: A_feature5_1 = pad_replicate(A_feature5_1) B_feature5_1 = pad_replicate(B_feature5_1) A_features = layer( F.concatenate( A_feature2_1, A_feature3_1, A_feature4_1, A_feature5_1, axis=1), feature_channel * 4) B_features = layer( F.concatenate( B_feature2_1, B_feature3_1, B_feature4_1, B_feature5_1, axis=1), feature_channel * 4) # pairwise cosine similarity theta = PF.convolution( A_features, inter_channels, kernel=( 1, 1), stride=( 1, 1), name='theta') theta_re = F.reshape(theta, (batch_size, inter_channels, -1)) theta_re = theta_re - F.mean(theta_re, axis=2, keepdims=True) # center the feature theta_norm = F.norm( theta_re, p=2, axis=1, keepdims=True) + sys.float_info.epsilon theta_re = F.div2(theta_re, theta_norm) # 2*(feature_height*feature_width)*256 theta_permute = F.transpose(theta_re, (0, 2, 1)) phi = PF.convolution( B_features, inter_channels, kernel=( 1, 1), stride=( 1, 1), name='phi') phi_re = F.reshape(phi, (batch_size, inter_channels, -1)) # center the feature phi_re = phi_re - F.mean(phi_re, axis=2, keepdims=True) phi_norm = F.norm(phi_re, p=2, axis=1, keepdims=True) + \ sys.float_info.epsilon phi_re = F.div2(phi_re, phi_norm) # 2*(feature_height*feature_width)*(feature_height*feature_width) f = F.batch_matmul(theta_permute, phi_re) f_shape = f.shape f = F.reshape(f, (1,) + f_shape) f_similarity = F.reshape(f, (1,) + f_shape) similarity_map = F.max(f_similarity, axis=3, keepdims=True) similarity_map = F.reshape( similarity_map, (batch_size, 1, feature_height, feature_width)) # f can be negative # if WTA_scale_weight == 1: f_WTA = f f_WTA = f_WTA / temperature f_WTA_sp = f_WTA.shape f_WTA = F.reshape(f_WTA, (f_WTA_sp[1], f_WTA_sp[2], f_WTA_sp[3])) # 2*1936*1936; softmax along the horizontal line (dim=-1) f_div_C = F.softmax(f_WTA, axis=2) # downsample the reference color B_lab = F.average_pooling(B_lab_map, (4, 4)) B_lab = F.reshape(B_lab, (batch_size, channel, -1)) B_lab = F.transpose(B_lab, (0, 2, 1)) # 2*1936*channel # multiply the corr map with color y = F.batch_matmul(f_div_C, B_lab) # 2*1936*channel y = F.transpose(y, (0, 2, 1)) y = F.reshape( y, (batch_size, channel, feature_height, feature_width)) # 2*3*44*44 y = F.interpolate(y, scale=(4, 4), mode='nearest', align_corners=False) similarity_map = F.interpolate( similarity_map, scale=( 4, 4), mode='nearest', align_corners=False) return y, similarity_map
def __call__(self, batch_size, style_noises, truncation_psi=1.0, return_latent=False, mixing_layer_index=None, dlatent_avg_beta=0.995): with nn.parameter_scope(self.global_scope): # normalize noise inputs for i in range(len(style_noises)): style_noises[i] = F.div2( style_noises[i], F.pow_scalar(F.add_scalar(F.mean(style_noises[i]**2., axis=1, keepdims=True), 1e-8, inplace=False), 0.5, inplace=False)) # get latent code w = [ mapping_network(style_noises[0], outmaps=self.mapping_network_dim, num_layers=self.mapping_network_num_layers) ] w += [ mapping_network(style_noises[1], outmaps=self.mapping_network_dim, num_layers=self.mapping_network_num_layers) ] dlatent_avg = nn.parameter.get_parameter_or_create( name="dlatent_avg", shape=(1, 512)) # Moving average update of dlatent_avg batch_avg = F.mean((w[0] + w[1]) * 0.5, axis=0, keepdims=True) update_op = F.assign( dlatent_avg, lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) update_op.name = 'dlatent_avg_update' dlatent_avg = F.identity(dlatent_avg) + 0 * update_op # truncation trick w = [lerp(dlatent_avg, _, truncation_psi) for _ in w] # generate output from generator constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4), initializer=np.random.randn(1, 512, 4, 4).astype(np.float32)) constant_bc = F.broadcast(constant_bc, (batch_size, ) + constant_bc.shape[1:]) if mixing_layer_index is None: mixing_layer_index_var = F.randint(1, len(self.resolutions) * 2, (1, )) else: mixing_layer_index_var = F.constant(val=mixing_layer_index, shape=(1, )) mixing_switch_var = F.clip_by_value( F.arange(0, len(self.resolutions) * 2) - mixing_layer_index_var, 0, 1) mixing_switch_var_re = F.reshape( mixing_switch_var, (1, mixing_switch_var.shape[0], 1), inplace=False) w0 = F.reshape(w[0], (batch_size, 1, w[0].shape[1]), inplace=False) w1 = F.reshape(w[1], (batch_size, 1, w[0].shape[1]), inplace=False) w_mixed = w0 * mixing_switch_var_re + \ w1 * (1 - mixing_switch_var_re) rgb_output = self.synthesis(w_mixed, constant_bc) if return_latent: return rgb_output, w_mixed else: return rgb_output
def generate_data(args): if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')): os.makedirs(args.weights_path, exist_ok=True) print( "Downloading the pretrained tf-converted weights. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, os.path.join(args.weights_path, 'gen_params.h5'), False) nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5')) print('Loaded pretrained weights from tensorflow!') os.makedirs(args.save_image_path, exist_ok=True) batches = [ args.batch_size for _ in range(args.num_images // args.batch_size) ] if args.num_images % args.batch_size != 0: batches.append(args.num_images - (args.num_images // args.batch_size) * args.batch_size) for idx, batch_size in enumerate(batches): z = [ F.randn(shape=(batch_size, 512)).data, F.randn(shape=(batch_size, 512)).data ] for i in range(len(z)): z[i] = F.div2( z[i], F.pow_scalar(F.add_scalar( F.mean(z[i]**2., axis=1, keepdims=True), 1e-8), 0.5, inplace=True)) # get latent code w = [mapping_network(z[0], outmaps=512, num_layers=8)] w += [mapping_network(z[1], outmaps=512, num_layers=8)] # truncation trick dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg", shape=(1, 512)) w = [lerp(dlatent_avg, _, 0.7) for _ in w] # Load direction if not args.face_morph: attr_delta = nn.NdArray.from_numpy_array( np.load(args.attr_delta_path)) attr_delta = F.reshape(attr_delta[0], (1, -1)) w_plus = [w[0] + args.coeff * attr_delta, w[1]] w_minus = [w[0] - args.coeff * attr_delta, w[1]] else: w_plus = [w[0], w[0]] # content w_minus = [w[1], w[1]] # style constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant_bc, (batch_size, ) + constant_bc.shape[1:]) gen_plus = synthesis(w_plus, constant_bc, noise_seed=100, mix_after=8) gen_minus = synthesis(w_minus, constant_bc, noise_seed=100, mix_after=8) gen = synthesis(w, constant_bc, noise_seed=100, mix_after=8) image_plus = convert_images_to_uint8(gen_plus, drange=[-1, 1]) image_minus = convert_images_to_uint8(gen_minus, drange=[-1, 1]) image = convert_images_to_uint8(gen, drange=[-1, 1]) for j in range(batch_size): filepath = os.path.join(args.save_image_path, f'image_{idx*batch_size+j}') imsave(f'{filepath}_o.png', image_plus[j], channel_first=True) imsave(f'{filepath}_y.png', image_minus[j], channel_first=True) imsave(f'{filepath}.png', image[j], channel_first=True) print(f"Genetated. Saved {filepath}")
def sample_from_controller(args): """ 2-layer RNN(LSTM) based controller which outputs an architecture of CNN, represented as a sequence of integers and its list. Given the number of layers, for each layer, it executes 2 types of computation, one for sampling the operation at that layer, another for sampling the skip connection patterns. """ entropys = nn.Variable([1, 1], need_grad=True) log_probs = nn.Variable([1, 1], need_grad=True) skip_penaltys = nn.Variable([1, 1], need_grad=True) entropys.d = log_probs.d = skip_penaltys.d = 0.0 # initialize them all num_layers = args.num_layers lstm_size = args.lstm_size state_size = args.state_size lstm_num_layers = args.lstm_layers skip_target = args.skip_prob temperature = args.temperature tanh_constant = args.tanh_constant num_branch = args.num_ops arc_seq = [] initializer = I.UniformInitializer((-0.1, 0.1)) prev_h = [ nn.Variable([1, lstm_size], need_grad=True) for _ in range(lstm_num_layers) ] prev_c = [ nn.Variable([1, lstm_size], need_grad=True) for _ in range(lstm_num_layers) ] for i in range(len(prev_h)): prev_h[i].d = 0 # initialize variables in lstm layers. prev_c[i].d = 0 inputs = nn.Variable([1, lstm_size]) inputs.d = np.random.normal(0, 0.5, [1, lstm_size]) g_emb = nn.Variable([1, lstm_size]) g_emb.d = np.random.normal(0, 0.5, [1, lstm_size]) skip_targets = nn.Variable([1, 2]) skip_targets.d = np.array([[1.0 - skip_target, skip_target]]) for layer_id in range(num_layers): # One-step stacked LSTM. with nn.parameter_scope("controller_lstm"): next_h, next_c = stack_lstm(inputs, prev_h, prev_c, state_size) prev_h, prev_c = next_h, next_c # shape:(1, lstm_size) # Compute for operation. with nn.parameter_scope("ops"): logit = PF.affine(next_h[-1], num_branch, w_init=initializer, with_bias=False) if temperature is not None: logit = F.mul_scalar(logit, (1 / temperature)) if tanh_constant is not None: logit = F.mul_scalar(F.tanh(logit), tanh_constant) # (1, num_branch) # normalizing logits. normed_logit = np.e**logit.d normed_logit = normed_logit / np.sum(normed_logit) # Sampling operation id from multinomial distribution. ops_id = np.random.multinomial(1, normed_logit[0], 1).nonzero()[1] ops_id = nn.Variable.from_numpy_array(ops_id) # (1, ) arc_seq.append(ops_id.d) # log policy for operation. log_prob = F.softmax_cross_entropy(logit, F.reshape(ops_id, shape=(1, 1))) # (1, ) # accumulate log policy as log probs log_probs = F.add2(log_probs, log_prob) entropy = log_prob * F.exp(-log_prob) entropys = F.add2(entropys, entropy) # accumulate entropy as entropys. w_emb = nn.parameter.get_parameter_or_create("w_emb", [num_branch, lstm_size], initializer, need_grad=False) inputs = F.reshape(w_emb[int(ops_id.d)], (1, w_emb.shape[1])) # (1, lstm_size) with nn.parameter_scope("controller_lstm"): next_h, next_c = stack_lstm(inputs, prev_h, prev_c, lstm_size) prev_h, prev_c = next_h, next_c # (1, lstm_size) with nn.parameter_scope("skip_affine_3"): adding_w_1 = PF.affine(next_h[-1], lstm_size, w_init=initializer, with_bias=False) # (1, lstm_size) if layer_id == 0: inputs = g_emb # (1, lstm_size) anchors = next_h[-1] # (1, lstm_size) anchors_w_1 = adding_w_1 # then goes back to the entry point of the loop else: # (layer_id, lstm_size) this shape during the process query = anchors_w_1 with nn.parameter_scope("skip_affine_1"): query = F.tanh( F.add2( query, PF.affine(next_h[-1], lstm_size, w_init=initializer, with_bias=False))) # (layer_id, lstm_size) + (1, lstm_size) # broadcast occurs here. resulting shape is; (layer_id, lstm_size) with nn.parameter_scope("skip_affine_2"): query = PF.affine(query, 1, w_init=initializer, with_bias=False) # (layer_id, 1) # note that each weight for skip_affine_X is shared across all steps of LSTM. # re-define logits, now its shape is;(layer_id, 2) logit = F.concatenate(-query, query, axis=1) if temperature is not None: logit = F.mul_scalar(logit, (1 / temperature)) if tanh_constant is not None: logit = F.mul_scalar(F.tanh(logit), tanh_constant) skip_prob_unnormalized = F.exp(logit) # (layer_id, 2) # normalizing skip_prob_unnormalized. summed = F.sum(skip_prob_unnormalized, axis=1, keepdims=True).apply(need_grad=False) summed = F.concatenate(summed, summed, axis=1) skip_prob_normalized = F.div2(skip_prob_unnormalized, summed) # (layer_id, 2) # Sampling skip_pattern from multinomial distribution. skip_pattern = np.random.multinomial( 1, skip_prob_normalized.d[0], layer_id).nonzero()[1] # (layer_id, 1) arc_seq.append(skip_pattern) skip = nn.Variable.from_numpy_array(skip_pattern) # compute skip penalty. # (layer_id, 2) broadcast occurs here too kl = F.mul2(skip_prob_normalized, F.log(F.div2(skip_prob_normalized, skip_targets))) kl = F.sum(kl, keepdims=True) # get the mean value here in advance. kl = kl * (1.0 / (num_layers - 1)) # accumulate kl divergence as skip penalty. skip_penaltys = F.add2(skip_penaltys, kl) # log policy for connection. log_prob = F.softmax_cross_entropy( logit, F.reshape(skip, shape=(skip.shape[0], 1))) log_probs = F.add2(log_probs, F.sum(log_prob, keepdims=True)) entropy = F.sum(log_prob * F.exp(-log_prob), keepdims=True) # accumulate entropy as entropys. entropys = F.add2(entropys, entropy) skip = F.reshape(skip, (1, layer_id)) inputs = F.affine(skip, anchors).apply(need_grad=False) # (1, lstm_size) inputs = F.mul_scalar(inputs, (1.0 / (1.0 + (np.sum(skip.d))))) # add new row for the next computation # (layer_id + 1, lstm_size) anchors = F.concatenate(anchors, next_h[-1], axis=0) # (layer_id + 1, lstm_size) anchors_w_1 = F.concatenate(anchors_w_1, adding_w_1, axis=0) return arc_seq, log_probs, entropys, skip_penaltys
def generate_attribute_direction(args, attribute_prediction_model): if not os.path.isfile(os.path.join(args.weights_path, 'gen_params.h5')): os.makedirs(args.weights_path, exist_ok=True) print( "Downloading the pretrained tf-converted weights. Please wait...") url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/stylegan2/styleGAN2_G_params.h5" from nnabla.utils.data_source_loader import download download(url, os.path.join(args.weights_path, 'gen_params.h5'), False) nn.load_parameters(os.path.join(args.weights_path, 'gen_params.h5')) print('Loaded pretrained weights from tensorflow!') nn.load_parameters(args.classifier_weight_path) print(f'Loaded {args.classifier_weight_path}') batches = [ args.batch_size for _ in range(args.num_images // args.batch_size) ] if args.num_images % args.batch_size != 0: batches.append(args.num_images - (args.num_images // args.batch_size) * args.batch_size) w_plus, w_minus = 0.0, 0.0 w_plus_count, w_minus_count = 0.0, 0.0 pbar = trange(len(batches)) for i in pbar: batch_size = batches[i] z = [F.randn(shape=(batch_size, 512)).data] z = [z[0], z[0]] for i in range(len(z)): z[i] = F.div2( z[i], F.pow_scalar(F.add_scalar( F.mean(z[i]**2., axis=1, keepdims=True), 1e-8), 0.5, inplace=True)) # get latent code w = [mapping_network(z[0], outmaps=512, num_layers=8)] w += [mapping_network(z[1], outmaps=512, num_layers=8)] # truncation trick dlatent_avg = nn.parameter.get_parameter_or_create(name="dlatent_avg", shape=(1, 512)) w = [lerp(dlatent_avg, _, 0.7) for _ in w] constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant_bc, (batch_size, ) + constant_bc.shape[1:]) gen = synthesis(w, constant_bc, noise_seed=100, mix_after=7) classifier_score = F.softmax(attribute_prediction_model(gen, True)) confidence, class_pred = F.max(classifier_score, axis=1, with_index=True, keepdims=True) w_plus += np.sum(w[0].data * (class_pred.data == 0) * (confidence.data > 0.65), axis=0, keepdims=True) w_minus += np.sum(w[0].data * (class_pred.data == 1) * (confidence.data > 0.65), axis=0, keepdims=True) w_plus_count += np.sum( (class_pred.data == 0) * (confidence.data > 0.65)) w_minus_count += np.sum( (class_pred.data == 1) * (confidence.data > 0.65)) pbar.set_description(f'{w_plus_count} {w_minus_count}') # save attribute direction attribute_variation_direction = (w_plus / w_plus_count) - (w_minus / w_minus_count) print(w_plus_count, w_minus_count) np.save(f'{args.classifier_weight_path.split("/")[0]}/direction.npy', attribute_variation_direction)
def pixel_wise_feature_vector_normalization(h, eps=1e-8): mean = F.mean(F.pow_scalar(h, 2), axis=1, keepdims=True) deno = F.pow_scalar(mean + eps, 0.5) return F.div2(h, F.broadcast(deno, h.shape))
len(x_valid), batch_size, shuffle=True, with_file_cache=False) x = nn.Variable((batch_size, sentence_length)) t = nn.Variable((batch_size, sentence_length, 1)) h = PF.embed(x, vocab_size, embedding_size) h = LSTM(h, hidden, return_sequences=True) h = TimeDistributed(PF.affine)(h, hidden, name='hidden') y = TimeDistributed(PF.affine)(h, vocab_size, name='output') mask = F.sum(F.sign(t), axis=2) # do not predict 'pad'. entropy = TimeDistributedSoftmaxCrossEntropy(y, t) * mask count = F.sum(mask, axis=1) loss = F.mean(F.div2(F.sum(entropy, axis=1), count)) # Create solver. solver = S.Momentum(1e-2, momentum=0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor('./tmp-lstmlm') monitor_perplexity = MonitorSeries('perplexity', monitor, interval=1) monitor_perplexity_valid = MonitorSeries('perplexity_valid', monitor, interval=1) for epoch in range(max_epoch): train_loss_set = []
def __call__(self, input): out = F.mul_scalar(input, self._scale) out = F.sub2(out, self._mean) out = F.div2(out, self._std) return out