def test_graph_unlink_backward(seed): rng = np.random.RandomState(seed) x0 = nn.Variable([2, 4], need_grad=True) x1 = nn.Variable([2, 4], need_grad=True) x0.d = rng.randn(*x0.shape) x1.d = rng.randn(*x1.shape) x0.grad.zero() x1.grad.zero() with nn.parameter_scope("fc0"): h0 = PF.affine(x0, 2) h0.need_grad = False with nn.parameter_scope("fc1"): h1 = PF.affine(x1, 2) h = h0 + h1 with nn.parameter_scope("fc"): y1 = PF.affine(h, 1) y2 = PF.affine(h, 1) nn.forward_all([y1, y2]) y1.backward(clear_buffer=True) assert np.all(x0.g == 0) assert not np.all(x1.g == 0) y2.backward(clear_buffer=True) assert np.all(x0.g == 0) assert not np.all(x1.g == 0)
def test_stft(window_size, stride, fft_size, window_type): # clear all previous STFT conv/deconv kernels nn.clear_parameters() # Compare to `scipy.signal.stft` - only done if SciPy available x = np.random.randn(1, window_size * 10) nx = nn.Variable.from_numpy_array(x) nyr, nyi = F.stft(nx, window_size=window_size, stride=stride, fft_size=fft_size, window_type=window_type, center=False) nn.forward_all([nyr, nyi]) stft_nnabla = nyr.d + 1j * nyi.d _f, _t, stft_scipy = sig.stft(x, window=window_type, nperseg=window_size, noverlap=window_size - stride, nfft=fft_size, boundary=None, padded=False) # scipy does a different scaling - take care here stft_nnabla /= fft_size // 2 assert (np.allclose(stft_nnabla, stft_scipy, atol=1e-5, rtol=1e-5))
def test_intermediate_outputs(clear_buffer, clear_no_need_grad): rng = np.random.RandomState(311) # unuse cached array to clear buffers immediately nn.prefer_cached_array(False) x = nn.Variable.from_numpy_array(rng.randn(2, 10)) h1 = x + 1 y1 = h1 + 1 h2 = x + 1 h2.persistent = True y2 = h2 + 1 nn.forward_all([h1, y1], clear_buffer=clear_buffer, clear_no_need_grad=clear_no_need_grad) nn.forward_all([h2, y2], clear_buffer=clear_buffer, clear_no_need_grad=clear_no_need_grad) assert_allclose(h1.d, h2.d) assert_allclose(y1.d, y2.d) # revert perference (this is also done in conftest.py, but just in case) nn.prefer_cached_array(True)
def test_graph_more_than_2_outputs(seed, clear_buffer): count = 0 def func_hook(f): nonlocal count if f.name == 'Split': count += 1 nn.clear_parameters() a = nn.Variable.from_numpy_array(np.ones((10, ))) b = nn.Variable.from_numpy_array(np.ones((10, ))) c = F.add2(a, b, inplace=True, outputs=[a.data]) y = F.split(c, axis=0) nn.forward_all(y, function_pre_hook=func_hook) assert count == 1 res = [x.d for x in y] assert_allclose(res, [2.0] * 10) a = nn.Variable.from_numpy_array(np.ones((10, ))) b = nn.Variable.from_numpy_array(np.ones((10, ))) c = F.add2(a, b, inplace=True, outputs=[a.data]) y = F.split(c, axis=0) for yy in y: yy.forward() res = [x.d for x in y] assert_allclose(res, [11.0] * 10)
def run(self, epoch): lr = self._update_learning_rate(epoch) # Training loop epoch_loss = 0.0 epoch_error = 0 pbar = trange(self.num_iter_per_epoch, desc='Train at epoch %d' % epoch, disable=self.comm.rank > 0) # pbar = range(self.num_iter_per_epoch) self.reporter.reset(epoch, pbar) for i in pbar: # nvtx.range_push("train_{}".format(i)) # wait here until back-prop has been finished self.stream_event_handler.event_synchronize() next_image, next_label = self.data.next() self.model.image.data = next_image self.model.label.data = next_label # Synchronizing null-stream and host here makes update faster. I'm not sure why. self.stream_event_handler.default_stream_synchronize() self.reporter(lr * self.loss_scaling) nn.forward_all([self.model.loss, self.model.error], clear_no_need_grad=True) # self.model.loss.forward(clear_no_need_grad=True, function_pre_hook=None) comm_callback = None if self.comm.n_procs > 1: params = [x.grad for x in nn.get_parameters().values()] comm_callback = self.comm.comm.all_reduce_callback( params, 1024 * 1024 * 2) self.solver.zero_grad() self.model.loss.backward( self.loss_scaling, clear_buffer=True, communicator_callbacks=comm_callback ) # Subscript event self.stream_event_handler.add_default_stream_event() # # Update self.solver.weight_decay(self.weight_decay) self.solver.update() self.reporter.update() # if i == 10: # import sys # nvtx.range_pop() self.reporter(lr * self.loss_scaling, force=True) self.reporter.on_epoch_end()
def test_graph_logreg(seed): rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4], need_grad=True) w1 = nn.Variable([12, 5], need_grad=True) w2 = nn.Variable([12, 5], need_grad=True) b1 = nn.Variable([5], need_grad=True) b2 = nn.Variable([5], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) w1.d = rng.randn(*w1.shape) w2.d = rng.randn(*w2.shape) b1.d = rng.randn(*b1.shape) b2.d = rng.randn(*b2.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion z1 = F.affine(x, w1, b1, 1) z2 = F.affine(x, w2, b2, 1) l1 = F.softmax_cross_entropy(z1, t, 1) L1 = F.mean(l1) l2 = F.softmax_cross_entropy(z2, t, 1) L2 = F.mean(l2) nn.forward_all([L1, L2]) # Backprop for z1 # Diff should be initialized since they are always accumulated x.g = 0 w1.g = 0 b1.g = 0 L1.backward(clear_buffer=True) inputs = [x, w1, b1] from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L1, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1e-2) # Backprop for z2 # Diff should be initialized since they are always accumulated x.g = 0 w2.g = 0 b2.g = 0 L2.backward(clear_buffer=True) inputs = [x, w2, b2] from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L2, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1e-2)
def test_graph_clear_buffer(seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4]) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) # Network definition nn.set_default_context(nn.Context()) nn.clear_parameters() x1 = x + 1 x2 = x1 - 1 with nn.parameter_scope('conv1'): z = PF.convolution(x2, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) z4 = PF.affine(z2, 5) l1 = F.softmax_cross_entropy(z3, t, 1) L1 = F.mean(l1) l2 = F.softmax_cross_entropy(z4, t, 1) L2 = F.mean(l2) # Forwardprop import tempfile import os tmpd = tempfile.mkdtemp() nn.save_parameters(os.path.join(tmpd, 'parameter.h5')) first = False for cnng in [False, True]: for cb in [False, True]: _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5')) for v in nn.get_parameters().values(): v.grad.zero() nn.forward_all([L1, L2], clear_no_need_grad=cnng) # for now, the first backward cannot be # called with clear_buffer=True L1.backward(clear_buffer=False) L2.backward(clear_buffer=cb) if not first: first = True g = list(nn.get_parameters().values())[0].g.copy() else: g2 = list(nn.get_parameters().values())[0].g.copy() import platform if platform.machine() == 'ppc64le': pytest.skip("This test fails on ppc64le") assert np.all(g == g2)
def test_stft(audio, nb_channels, nfft, hop): # clear STFT kernels (from previous tests with different frame size) nn.clear_parameters() # compute STFT using NNabla X_real, X_imag = model.STFT(audio, n_fft=nfft, n_hop=hop, center=True) nn.forward_all([ X_real, X_imag ]) # forward both at the same time to not create new random `audio` X = X_real.d + X_imag.d * 1j # compute iSTFT using Scipy out = test.istft(X, n_fft=nfft, n_hopsize=hop) assert np.sqrt(np.mean((audio.d - out)**2)) < 1e-6
def test_stft(ctx, window_size, stride, fft_size, window_type): backend = ctx.backend[0].split(":")[0] if backend == 'cuda': pytest.skip('CUDA Convolution N-D is only supported in CUDNN extension') # clear all previous STFT conv/deconv kernels nn.clear_parameters() # Compare to `scipy.signal.stft` - only done if SciPy available x = np.random.randn(1, window_size * 10) nx = nn.Variable.from_numpy_array(x) with nn.context_scope(ctx): nyr, nyi = F.stft(nx, window_size=window_size, stride=stride, fft_size=fft_size, window_type=window_type, center=False) nn.forward_all([nyr, nyi]) stft_nnabla = nyr.d + 1j * nyi.d window_type_scipy = window_type if window_type == 'rectangular' or window_type is None: window_type_scipy = 'boxcar' _f, _t, stft_scipy = sig.stft(x, window=window_type_scipy, nperseg=window_size, noverlap=window_size-stride, nfft=fft_size, boundary=None, padded=False) # scipy does a different scaling - take care here stft_nnabla /= fft_size // 2 assert(np.allclose(stft_nnabla, stft_scipy, atol=1e-5, rtol=1e-5))
def test_function_hook(): ''' Testing function hooks in forward and backward ''' x = nn.Variable.from_numpy_array(np.zeros( (2, 3), dtype=np.float32)).apply(need_grad=True) x.grad.zero() h = x + 2 h.data.zero() h.grad.zero() y = h * 0.5 y.data.zero() def forward_pre_hook(f): assert_allclose(f.outputs[0].d, 0) def forward_post_hook(f): if f.info.type_name == 'AddScalar': assert_allclose(f.outputs[0].d, 2) if f.info.type_name == 'MulScalar': assert_allclose(f.outputs[0].d, 1) def backward_pre_hook(f): assert_allclose(f.inputs[0].g, 0) def backward_post_hook(f): # Both h and x grad will be 0.5 assert_allclose(f.inputs[0].g, 0.5) y.forward(function_pre_hook=forward_pre_hook, function_post_hook=forward_post_hook) y.backward(function_pre_hook=backward_pre_hook, function_post_hook=backward_post_hook) x.grad.zero() z = x * 0.1 # Just calling test nn.forward_all((y, z), function_pre_hook=lambda f: None, function_post_hook=lambda f: None)
def run(self, epoch): pbar = trange(self.num_iter_per_epoch, desc='Val at epoch %d' % epoch, disable=self.comm.rank > 0) self.reporter.reset(epoch, pbar) for i in pbar: # wait here until forward-prop has been finished self.stream_event_handler.event_synchronize() next_image, next_label = self.data.next() self.reporter(0) self.model.image.data = next_image self.model.label.data = next_label nn.forward_all([self.model.loss, self.model.error], clear_buffer=True) self.stream_event_handler.add_default_stream_event() self.reporter.update() self.reporter(0, force=True) self.reporter.on_epoch_end()
def test_graph_forward_clear_buffer(seed, clear_buffer): nn.clear_parameters() x = nn.Variable((2, 10)) h = PF.affine(x, 10, name='hidden') y1 = PF.affine(h, 10, name='out1') y2 = PF.affine(h, 10, name='out2') # input rng = np.random.RandomState(seed) data = rng.randn(*x.shape) # reference values x.d = data y1.forward() y2.forward() ref_y1 = y1.d.copy() ref_y2 = y2.d.copy() # check nn.forward_all([y1, y2], clear_buffer=clear_buffer) assert_allclose(y1.d, ref_y1) assert_allclose(y2.d, ref_y2)
def reconstruct(args): # get context ctx = get_extension_context(args.context) nn.set_default_context(ctx) logger.setLevel(logging.ERROR) # to supress minor messages config = read_yaml(args.config) dataset_params = config.dataset_params model_params = config.model_params if args.detailed: vis_params = config.visualizer_params visualizer = Visualizer(**vis_params) if not args.params: assert "log_dir" in config, "no log_dir found in config. therefore failed to locate pretrained parameters." param_file = os.path.join( config.log_dir, config.saved_parameters) else: param_file = args.params nn.load_parameters(param_file) bs, h, w, c = [1] + dataset_params.frame_shape source = nn.Variable((bs, c, h, w)) driving_initial = nn.Variable((bs, c, h, w)) driving = nn.Variable((bs, c, h, w)) with nn.parameter_scope("kp_detector"): kp_source = detect_keypoint(source, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_source) with nn.parameter_scope("kp_detector"): kp_driving = detect_keypoint(driving, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_driving) with nn.parameter_scope("generator"): generated = occlusion_aware_generator(source, kp_source=unlink_all(kp_source), kp_driving=kp_driving, **model_params.generator_params, **model_params.common_params, test=True, comm=False) if not args.full and 'sparse_deformed' in generated: del generated['sparse_deformed'] # remove needless info persistent_all(generated) generated['kp_driving'] = kp_driving generated['kp_source'] = kp_source # generated contains these values; # 'mask': <Variable((bs, num_kp+1, h/4, w/4)) when scale_factor=0.25 # 'sparse_deformed': <Variable((bs, num_kp+1, num_channel, h/4, w/4)) # (bs, num_kp + 1, c, h, w) # 'occlusion_map': <Variable((bs, 1, h/4, w/4)) # 'deformed': <Variable((bs, c, h, w)) # 'prediction': <Variable((bs, c, h, w)) mode = "reconstruction" if "log_dir" in config: result_dir = os.path.join(args.out_dir, os.path.basename(config.log_dir), f"{mode}") else: result_dir = os.path.join(args.out_dir, "test_result", f"{mode}") # create an empty directory to save generated results _ = nm.Monitor(result_dir) if args.eval: os.makedirs(os.path.join(result_dir, "png"), exist_ok=True) # load the header images. header = imread("imgs/header_combined.png", channel_first=True) filenames = sorted(glob.glob(os.path.join( dataset_params.root_dir, "test", "*"))) recon_loss_list = list() for filename in tqdm(filenames): # process repeated until all the test data is used driving_video = read_video( filename, dataset_params.frame_shape) # (#frames, h, w, 3) driving_video = np.transpose( driving_video, (0, 3, 1, 2)) # (#frames, 3, h, w) generated_images = list() source_img = driving_video[0] source.d = np.expand_dims(source_img, 0) driving_initial.d = driving_video[0] # compute these in advance and reuse nn.forward_all( [kp_source["value"], kp_source["jacobian"]], clear_buffer=True) num_of_driving_frames = driving_video.shape[0] for frame_idx in tqdm(range(num_of_driving_frames)): driving.d = driving_video[frame_idx] nn.forward_all([generated["prediction"], generated["deformed"]], clear_buffer=True) if args.detailed: # visualize source w/kp, driving w/kp, deformed source, generated w/kp, generated image, occlusion map visualization = visualizer.visualize( source=source.d, driving=driving.d, out=generated) if args.full: visualization = reshape_result(visualization) # (H, W, C) combined_image = visualization.transpose(2, 0, 1) # (C, H, W) elif args.only_generated: combined_image = np.clip( generated["prediction"].d[0], 0.0, 1.0) combined_image = ( 255*combined_image).astype(np.uint8) # (C, H, W) else: # visualize source, driving, and generated image driving_fake = np.concatenate([np.clip(driving.d[0], 0.0, 1.0), np.clip(generated["prediction"].d[0], 0.0, 1.0)], axis=2) header_source = np.concatenate([np.clip(header / 255., 0.0, 1.0), np.clip(source.d[0], 0.0, 1.0)], axis=2) combined_image = np.concatenate( [header_source, driving_fake], axis=1) combined_image = (255*combined_image).astype(np.uint8) generated_images.append(combined_image) # compute L1 distance per frame. recon_loss_list.append( np.mean(np.abs(generated["prediction"].d[0] - driving.d[0]))) # post process only for reconstruction evaluation. if args.eval: # crop generated images region only. if args.only_generated: eval_images = generated_images elif args.full: eval_images = [_[:, :h, 4*w:5*w] for _ in generated_images] elif args.detailed: assert generated_images[0].shape == (c, h, 5*w) eval_images = [_[:, :, 3*w:4*w] for _ in generated_images] else: eval_images = [_[:, h:, w:] for _ in generated_images] # place them horizontally and save for evaluation. image_for_eval = np.concatenate( eval_images, axis=2).transpose(1, 2, 0) imsave(os.path.join(result_dir, "png", f"{os.path.basename(filename)}.png"), image_for_eval) # once each video is generated, save it. output_filename = f"{os.path.splitext(os.path.basename(filename))[0]}.mp4" if args.output_png: monitor_vis = nm.MonitorImage(output_filename, nm.Monitor(result_dir), interval=1, num_images=1, normalize_method=lambda x: x) for frame_idx, img in enumerate(generated_images): monitor_vis.add(frame_idx, img) else: generated_images = [_.transpose(1, 2, 0) for _ in generated_images] # you might need to change ffmpeg_params according to your environment. mimsave(f'{os.path.join(result_dir, output_filename)}', generated_images, fps=args.fps, ffmpeg_params=["-pix_fmt", "yuv420p", "-vcodec", "libx264", "-f", "mp4", "-q", "0"]) print(f"Reconstruction loss: {np.mean(recon_loss_list)}") return
def run(self, epoch): # Update epoch counter of lr scheduler self.learning_rate_scheduler.set_epoch(epoch) # Training loop pbar = trange(self.num_iter_per_epoch, desc='Train at epoch %d' % epoch, disable=self.comm.rank > 0) # pbar = range(self.num_iter_per_epoch) self.reporter.reset(epoch, pbar) for i in pbar: # nvtx.range_push("train_{}".format(i)) # Update learning rate lr = self.learning_rate_scheduler.get_lr_and_update() self.solver.set_learning_rate(lr * self.lr_factor) # wait here until back-prop has been finished self.stream_event_handler.event_synchronize() next_image, next_label = self.data.next() self.model.image.data = next_image self.model.label.data = next_label # Sample mixup ratios if self.mixup is not None: self.mixup.reset_mixup_ratio() # Synchronizing null-stream and host here makes update faster. I'm not sure why. self.stream_event_handler.default_stream_synchronize() self.reporter(lr) nn.forward_all([self.model.loss, self.model.error], clear_no_need_grad=True) # self.model.loss.forward(clear_no_need_grad=True, function_pre_hook=None) comm_callback = self.comm.get_all_reduce_callback() self.solver.zero_grad() self.model.loss.backward(self.loss_scaling, clear_buffer=True, communicator_callbacks=comm_callback) # Subscript event self.stream_event_handler.add_default_stream_event() # # Update self.solver.weight_decay(self.weight_decay) self.solver.update() self.reporter.update() # if i == 10: # import sys # nvtx.range_pop() self.reporter(lr, force=True) self.reporter.on_epoch_end()
def train(args, train_dataset, tokenizer): """ Train the model """ # Load the pretrianed model nn.load_parameters(args.pretrained_model) # Drop final layer for task-specific fine-tuning nn.parameter.pop_parameter('affine_seq_class/affine/W') nn.parameter.pop_parameter('affine_seq_class/affine/b') train_dataloader = data_iterator( train_dataset, batch_size=args.train_batch_size) global_step = 0 train_loss = 0.0 model = BertForSequenceClassification() input_ids = nn.Variable((args.train_batch_size, args.max_seq_length)) attention_mask = nn.Variable((args.train_batch_size, args.max_seq_length)) token_type_ids = nn.Variable((args.train_batch_size, args.max_seq_length)) labels = nn.Variable((args.train_batch_size, )) input_ids_eval = nn.Variable((args.eval_batch_size, args.max_seq_length)) attention_mask_eval = nn.Variable( (args.eval_batch_size, args.max_seq_length)) token_type_ids_eval = nn.Variable( (args.eval_batch_size, args.max_seq_length)) labels_eval = nn.Variable((args.eval_batch_size, )) activation = F.gelu if args.activation == 'relu': activation = F.relu loss, _, train_error = model(args, input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=labels, num_labels=args.num_labels, vocab_size=args.vocab_size, num_embed_dim=args.num_embed_dim, num_pos_ids=args.num_position_ids, num_attention_layers=args.num_attention_layers, num_attention_embed_dim=args.num_attention_embed_dim, num_attention_heads=args.num_attention_heads, num_attention_dim_feedforward=args.num_attention_dim_feedforward, attention_activation=activation, pool_outmap=args.num_pool_outmap, embed_dropout_prob=args.embed_dropout, attention_dropout_prob=args.attention_dropout, dropout_prob=args.last_dropout, test=False) loss.persistent = True if args.solver == 'Adam': solver = S.Adam(args.learning_rate, eps=args.adam_epsilon) else: solver = S.AdamW(args.learning_rate, eps=args.adam_epsilon) solver.set_parameters(nn.get_parameters()) monitor = Monitor(args.output_dir) monitor_loss = MonitorSeries( "Training Loss", monitor, interval=10) monitor_eloss = MonitorSeries( "Evaluation Loss", monitor, interval=10) monitor_train_error = MonitorSeries( "Training Error Rate", monitor, interval=10) monitor_lr = MonitorSeries( "learning Rate", monitor, interval=10) total_steps = train_dataloader.size // args.train_batch_size var_linear = total_steps * args.num_train_epochs var_warmup = total_steps * (args.num_train_epochs - 1) for epoch in range(args.num_train_epochs): logger.info("Starting Epoch %d out of %d", epoch+1, args.num_train_epochs) for it in range(total_steps): batch = train_dataloader.next() input_ids.d = batch[0] attention_mask.d = batch[1] token_type_ids.d = batch[2] labels.d = batch[3] learning_rate_linear = lr_linear(global_step, var_linear) learning_rate = args.learning_rate * learning_rate_linear if epoch == 0: learning_rate = args.learning_rate * (global_step/total_steps) if epoch > 0: learning_rate_linear = lr_linear( (global_step-total_steps), var_warmup) learning_rate = args.learning_rate * learning_rate_linear solver.zero_grad() nn.forward_all([loss, train_error], clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.clip_grad_by_norm(args.max_grad_norm) solver.set_learning_rate(learning_rate) solver.update() monitor_loss.add( (train_dataloader.size//args.train_batch_size)*epoch+it, loss.d.copy()) monitor_train_error.add( (train_dataloader.size//args.train_batch_size)*epoch+it, train_error.d.copy()) monitor_lr.add(global_step, learning_rate) global_step += 1 train_loss += F.mean(loss.data) eval_task_names = ( "mnli", "mnli-mm") if args.task_name == "mnli" else (args.task_name,) eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,) results = {} for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs): print(eval_task) eval_dataset = BERTDataSource( args, tokenizer, evaluate=True, shuffle=False) if not os.path.exists(eval_output_dir): os.makedirs(eval_output_dir) eval_dataloader = data_iterator( eval_dataset, batch_size=args.eval_batch_size) total_eval_steps = eval_dataloader.size // args.eval_batch_size eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None tmp_eval_loss, logits, eval_error = model(args, input_ids=input_ids_eval, attention_mask=attention_mask_eval, token_type_ids=token_type_ids_eval, labels=labels_eval, num_labels=args.num_labels, vocab_size=args.vocab_size, num_embed_dim=args.num_embed_dim, num_pos_ids=args.num_position_ids, num_attention_layers=args.num_attention_layers, num_attention_embed_dim=args.num_attention_embed_dim, num_attention_heads=args.num_attention_heads, num_attention_dim_feedforward=args.num_attention_dim_feedforward, attention_activation=activation, pool_outmap=args.num_pool_outmap, embed_dropout_prob=args.embed_dropout, attention_dropout_prob=args.attention_dropout, dropout_prob=args.last_dropout, test=True) tmp_eval_loss.persistent = True eval_loss += F.mean(tmp_eval_loss) for it in range(total_eval_steps): print(it, " ", total_eval_steps) batch_eval = eval_dataloader.next() input_ids_eval.d = batch_eval[0] attention_mask_eval.d = batch_eval[1] token_type_ids_eval.d = batch_eval[2] labels_eval.d = batch_eval[3] nb_eval_steps += 1 eval_loss.forward() monitor_eloss.add(it, eval_loss.d.copy()) if preds is None: preds = logits.d.copy() out_label_ids = labels_eval.d.copy() else: preds = np.append(preds, logits.d.copy(), axis=0) out_label_ids = np.append( out_label_ids, labels_eval.d.copy(), axis=0) eval_loss = eval_loss.d / nb_eval_steps if args.output_mode == "classification": preds = np.argmax(preds, axis=1) elif args.output_mode == "regression": preds = np.squeeze(preds) result = compute_metrics(eval_task, preds, out_label_ids) results.update(result) output_eval_file = os.path.join( eval_output_dir, "", "eval_results.txt") with open(output_eval_file, "a") as writer: logger.info("***** Evaluation results {} *****".format("")) for key in sorted(result.keys()): logger.info("%d %s = %s\n", epoch + 1, key, str(result[key])) writer.write("%d %s = %s\n" % (epoch+1, key, str(result[key]))) print("results", results) return results
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) z4 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 4) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 4) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) z4 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) z4 = PF.affine(z2, 5) else: raise ValueError() l1 = F.softmax_cross_entropy(z3, t, 1) L1 = F.mean(l1) l2 = F.softmax_cross_entropy(z4, t, 1) L2 = F.mean(l2) # Forwardprop nn.forward_all([L1, L2]) parameters = nn.get_parameters() # Backprop for L1 # Diff should be initialized since they are always accumulated x.grad.zero() initialize_grad(parameters) L1.backward(clear_buffer=True) inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L1, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1.05e-2) # Backprop for L2 # Diff should be initialized since they are always accumulated x.grad.zero() initialize_grad(parameters) L2.backward(clear_buffer=True) inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L2, inputs, 1e-3, False) assert_allclose(ngrad, agrad, atol=1.05e-2)
def colorize_video(conf, ref): ''' Colorize the input frames and save the output as colorized frames and video Args: conf: conf object ref: refrence image ''' def load_weights(): nn.load_parameters(f'{conf.checkpoint.path}/{conf.checkpoint.vgg19}') nn.load_parameters( f'{conf.checkpoint.path}/{conf.checkpoint.non_local}') nn.load_parameters( f'{conf.checkpoint.path}/{conf.checkpoint.colornet}') reference_file = os.path.join(conf.data.ref_path, ref) output_path = os.path.join(conf.data.output_path, 'out_' + ref.split(".")[0]) if not os.path.exists(output_path): os.makedirs(output_path) filenames = [ f for f in os.listdir(conf.data.input_path) if os.path.isfile(os.path.join(conf.data.input_path, f)) ] print(f"processing the folder: {conf.data.input_path}") # sort the frames in order as in video filenames.sort(key=lambda f: int("".join(filter(str.isdigit, f) or -1))) # read reference name from reference input else first frame assuming it's # colorized ref_name = conf.data.input_path + \ filenames[0] if conf.data.frame_propagation else reference_file i_last_lab_predict = None # Load the Weights nn.clear_parameters() load_weights() print(f"reference = {ref_name}") # Preprocess reference image frame_ref = np.array(Image.open(ref_name)) ib_lab_large = nn.Variable.from_numpy_array( transform(frame_ref, conf.data.image_size)) for iter_num, frame_name in enumerate(filenames): print("input =", frame_name) frame = Image.open(os.path.join(conf.data.input_path, frame_name)) ia_lab_large = nn.Variable.from_numpy_array( transform(np.array(frame), conf.data.image_size)) ia_lab, ib_lab = interpolate_nn(ia_lab_large, ib_lab_large, scale=(0.5, 0.5)) ia_l = ia_lab[:, 0:1, :, :] if i_last_lab_predict is None: if conf.data.frame_propagation: i_last_lab_predict = ib_lab else: i_last_lab_predict = nn.Variable(ia_lab.shape) i_reference_l = ib_lab[:, 0:1, :, :] i_reference_ab = ib_lab[:, 1:3, :, :] i_reference_rgb = preprocess.lab2rgb( F.concatenate(preprocess.uncenter_l(i_reference_l, conf), i_reference_ab, axis=1)) if type(i_last_lab_predict).__module__ == "numpy": i_last_lab_predict_nn = nn.Variable.from_numpy_array( i_last_lab_predict) else: i_last_lab_predict_nn = i_last_lab_predict t_start = time.time() features_b_nn = vgg_net(i_reference_rgb, pre_process=True, fix=True) i_current_ab_predict, _i_current_nonlocal_lab, _features_gray = frame_colorization( ia_lab, ib_lab, i_last_lab_predict_nn, features_b_nn, feature_noise=0, temperature=1e-10) # forward the network nn.forward_all([i_current_ab_predict]) i_last_lab_predict = np.concatenate( (ia_l.data.data, i_current_ab_predict.data.data), axis=1) print(f"Runtime: {time.time() - t_start:.2g} second") rgb_frame = get_rgb_frame(ia_lab_large.d, i_current_ab_predict, conf) preprocess.save_frames(rgb_frame, output_path, iter_num) iter_num = iter_num + 1 # save the video preprocess.frames2vid(frame_folder=output_path, frame_shape=conf.data.image_size, output_dir=output_path, filename=conf.data.output_video)
def test_graph_rewire(seed, clear_buffer): nn.clear_parameters() # A. defining graph definition utility def mlp2(x, scope): with nn.parameter_scope(scope): h = F.tanh(PF.affine(x, 10, name='a1')) h = F.tanh(PF.affine(h, 10, name='a1')) return h # A. Create a graph A. xa = nn.Variable((2, 10), need_grad=True) ya = mlp2(xa, 'a') # B. Create a graph B. xb = nn.Variable((2, 10), need_grad=True) yb1 = mlp2(xb, 'b1') yb2 = mlp2(xb, 'b2') # C. Create directly connected graph. xc = nn.Variable((2, 10)) h = mlp2(xc, 'a') yc1 = mlp2(h, 'b1') yc2 = mlp2(h, 'b2') # D. Rewire the graphs A and B. xb.rewire_on(ya) # E. Check whether the results are the same. rng = np.random.RandomState(seed) data = rng.randn(*xa.shape) xa.d = data xc.d = data params = nn.get_parameters() def zero_grad(): for p in params.values(): p.grad.zero() def backup_params(): return [p.g.copy() for p in params.values()] # Checking forward nn.forward_all([yb1, yb2, yc1, yc2], clear_no_need_grad=clear_buffer) assert_allclose(yb1.d, yc1.d) assert_allclose(yb2.d, yc2.d) # Checking backward for yb1 and yc1 # for now, the first backward cannot be called with clear_buffer=True zero_grad() yb1.backward(clear_buffer=False) gb = backup_params() zero_grad() yc1.backward(clear_buffer=False) gc = backup_params() assert_allclose(xa.d, xc.d) for b, c in zip(gb, gc): assert_allclose(b, c) # Checking backward for yb2 and yc2 zero_grad() yb2.backward(clear_buffer=clear_buffer) gb = backup_params() zero_grad() yc2.backward(clear_buffer=clear_buffer) gc = backup_params() assert_allclose(xa.d, xc.d) for b, c in zip(gb, gc): assert_allclose(b, c)
def train(args): # Variable size. bs, ch, h, w = args.batch_size, 3, args.loadSizeH, args.loadSizeW # Determine normalization method. if args.norm == "instance": norm_layer = functools.partial(PF.instance_normalization, fix_parameters=True, no_bias=True, no_scale=True) else: norm_layer = PF.batch_normalization # Prepare Generator and Discriminator based on user config. generator = functools.partial(models.generator, input_nc=args.input_nc, output_nc=args.output_nc, ngf=args.ngf, norm_layer=norm_layer, use_dropout=False, n_blocks=9, padding_type='reflect') discriminator = functools.partial(models.discriminator, input_nc=args.output_nc, ndf=args.ndf, n_layers=args.n_layers_D, norm_layer=norm_layer, use_sigmoid=False) # --------------------- Computation Graphs -------------------- # Input images and masks of both source / target domain x = nn.Variable([bs, ch, h, w], need_grad=False) a = nn.Variable([bs, 1, h, w], need_grad=False) y = nn.Variable([bs, ch, h, w], need_grad=False) b = nn.Variable([bs, 1, h, w], need_grad=False) # Apply image augmentation and get an unlinked variable xa_aug = image_augmentation(args, x, a) xa_aug.persistent = True xa_aug_unlinked = xa_aug.get_unlinked_variable() yb_aug = image_augmentation(args, y, b) yb_aug.persistent = True yb_aug_unlinked = yb_aug.get_unlinked_variable() # variables used for Image Pool x_history = nn.Variable([bs, ch, h, w]) a_history = nn.Variable([bs, 1, h, w]) y_history = nn.Variable([bs, ch, h, w]) b_history = nn.Variable([bs, 1, h, w]) # Generate Images (x -> y') with nn.parameter_scope("gen_x2y"): yb_fake = generator(xa_aug_unlinked) yb_fake.persistent = True yb_fake_unlinked = yb_fake.get_unlinked_variable() # Generate Images (y -> x') with nn.parameter_scope("gen_y2x"): xa_fake = generator(yb_aug_unlinked) xa_fake.persistent = True xa_fake_unlinked = xa_fake.get_unlinked_variable() # Reconstruct Images (y' -> x) with nn.parameter_scope("gen_y2x"): xa_recon = generator(yb_fake_unlinked) xa_recon.persistent = True # Reconstruct Images (x' -> y) with nn.parameter_scope("gen_x2y"): yb_recon = generator(xa_fake_unlinked) yb_recon.persistent = True # Use Discriminator on y' and x' with nn.parameter_scope("dis_y"): d_y_fake = discriminator(yb_fake_unlinked) d_y_fake.persistent = True with nn.parameter_scope("dis_x"): d_x_fake = discriminator(xa_fake_unlinked) d_x_fake.persistent = True # Use Discriminator on y and x with nn.parameter_scope("dis_y"): d_y_real = discriminator(yb_aug_unlinked) with nn.parameter_scope("dis_x"): d_x_real = discriminator(xa_aug_unlinked) # Identity Mapping (x -> x) with nn.parameter_scope("gen_y2x"): xa_idt = generator(xa_aug_unlinked) # Identity Mapping (y -> y) with nn.parameter_scope("gen_x2y"): yb_idt = generator(yb_aug_unlinked) # -------------------- Loss -------------------- # (LS)GAN Loss (for Discriminator) loss_dis_x = (loss.lsgan_loss(d_y_fake, False) + loss.lsgan_loss(d_y_real, True)) * 0.5 loss_dis_y = (loss.lsgan_loss(d_x_fake, False) + loss.lsgan_loss(d_x_real, True)) * 0.5 loss_dis = loss_dis_x + loss_dis_y # Cycle Consistency Loss loss_cyc_x = args.lambda_cyc * loss.recon_loss(xa_recon, xa_aug_unlinked) loss_cyc_y = args.lambda_cyc * loss.recon_loss(yb_recon, yb_aug_unlinked) loss_cyc = loss_cyc_x + loss_cyc_y # Identity Mapping Loss loss_idt_x = args.lambda_idt * loss.recon_loss(xa_idt, xa_aug_unlinked) loss_idt_y = args.lambda_idt * loss.recon_loss(yb_idt, yb_aug_unlinked) loss_idt = loss_idt_x + loss_idt_y # Context Preserving Loss loss_ctx_x = args.lambda_ctx * \ loss.context_preserving_loss(xa_aug_unlinked, yb_fake_unlinked) loss_ctx_y = args.lambda_ctx * \ loss.context_preserving_loss(yb_aug_unlinked, xa_fake_unlinked) loss_ctx = loss_ctx_x + loss_ctx_y # (LS)GAN Loss (for Generator) d_loss_gen_x = loss.lsgan_loss(d_x_fake, True) d_loss_gen_y = loss.lsgan_loss(d_y_fake, True) d_loss_gen = d_loss_gen_x + d_loss_gen_y # Total Loss for Generator loss_gen = loss_cyc + loss_idt + loss_ctx + d_loss_gen # --------------------- Solvers -------------------- # Initial learning rates G_lr = args.learning_rate_G #D_lr = args.learning_rate_D # As opposed to the description in the paper, D_lr is set the same as G_lr. D_lr = args.learning_rate_G # Define solvers solver_gen_x2y = S.Adam(G_lr, args.beta1, args.beta2) solver_gen_y2x = S.Adam(G_lr, args.beta1, args.beta2) solver_dis_x = S.Adam(D_lr, args.beta1, args.beta2) solver_dis_y = S.Adam(D_lr, args.beta1, args.beta2) # Set Parameters to each solver with nn.parameter_scope("gen_x2y"): solver_gen_x2y.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen_y2x"): solver_gen_y2x.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis_x"): solver_dis_x.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis_y"): solver_dis_y.set_parameters(nn.get_parameters()) # create convenient functions manipulating Solvers def solvers_zero_grad(): # Zeroing Gradients of all solvers solver_gen_x2y.zero_grad() solver_gen_y2x.zero_grad() solver_dis_x.zero_grad() solver_dis_y.zero_grad() def solvers_update_parameters(new_D_lr, new_G_lr): # Learning rate updater solver_gen_x2y.set_learning_rate(new_G_lr) solver_gen_y2x.set_learning_rate(new_G_lr) solver_dis_x.set_learning_rate(new_D_lr) solver_dis_y.set_learning_rate(new_D_lr) # -------------------- Data Iterators -------------------- ds_train_A = insta_gan_data_source(args, train=True, domain="A", shuffle=True) di_train_A = insta_gan_data_iterator(ds_train_A, args.batch_size) ds_train_B = insta_gan_data_source(args, train=True, domain="B", shuffle=True) di_train_B = insta_gan_data_iterator(ds_train_B, args.batch_size) # -------------------- Monitors -------------------- monitoring_targets_dis = { 'discriminator_loss_x': loss_dis_x, 'discriminator_loss_y': loss_dis_y } monitors_dis = Monitors(args, monitoring_targets_dis) monitoring_targets_gen = { 'generator_loss_x': d_loss_gen_x, 'generator_loss_y': d_loss_gen_y, 'reconstruction_loss_x': loss_cyc_x, 'reconstruction_loss_y': loss_cyc_y, 'identity_mapping_loss_x': loss_idt_x, 'identity_mapping_loss_y': loss_idt_y, 'content_preserving_loss_x': loss_ctx_x, 'content_preserving_loss_y': loss_ctx_y } monitors_gen = Monitors(args, monitoring_targets_gen) monitor_time = MonitorTimeElapsed("Training_time", Monitor(args.monitor_path), args.log_step) # Training loop epoch = 0 n_images = max([ds_train_B.size, ds_train_A.size]) print("{} images exist.".format(n_images)) max_iter = args.max_epoch * n_images // args.batch_size decay_iter = args.max_epoch - args.lr_decay_start_epoch for i in range(max_iter): if i % (n_images // args.batch_size) == 0 and i > 0: # Learning Rate Decay epoch += 1 print("epoch {}".format(epoch)) if epoch >= args.lr_decay_start_epoch: new_D_lr = D_lr * \ (1.0 - max(0, epoch - args.lr_decay_start_epoch - 1) / float(decay_iter - 1)) new_G_lr = G_lr * \ (1.0 - max(0, epoch - args.lr_decay_start_epoch - 1) / float(decay_iter - 1)) solvers_update_parameters(new_D_lr, new_G_lr) print("Current learning rate for Discriminator: {}".format( solver_dis_x.learning_rate())) print("Current learning rate for Generator: {}".format( solver_gen_x2y.learning_rate())) # Get data x_data, a_data = di_train_A.next() y_data, b_data = di_train_B.next() x.d, a.d = x_data, a_data y.d, b.d = y_data, b_data solvers_zero_grad() # Image Augmentation nn.forward_all([xa_aug, yb_aug], clear_buffer=True) # Generate fake images nn.forward_all([xa_fake, yb_fake], clear_no_need_grad=True) # -------- Train Discriminator -------- loss_dis.forward(clear_no_need_grad=True) monitors_dis.add(i) loss_dis.backward(clear_buffer=True) solver_dis_x.update() solver_dis_y.update() # -------- Train Generators -------- # since the gradients computed above remain, reset to zero. xa_fake_unlinked.grad.zero() yb_fake_unlinked.grad.zero() solvers_zero_grad() loss_gen.forward(clear_no_need_grad=True) monitors_gen.add(i) monitor_time.add(i) loss_gen.backward(clear_buffer=True) xa_fake.backward(grad=None, clear_buffer=True) yb_fake.backward(grad=None, clear_buffer=True) solver_gen_x2y.update() solver_gen_y2x.update() if i % (n_images // args.batch_size) == 0: # save translation results after every epoch. save_images(args, i, xa_aug, yb_fake, domain="x", reconstructed=xa_recon) save_images(args, i, yb_aug, xa_fake, domain="y", reconstructed=yb_recon) # save pretrained parameters nn.save_parameters(os.path.join(args.model_save_path, 'params_%06d.h5' % i))
def test_prohibit_clear_data(): import nnabla.functions as F nn.prefer_cached_array(False) shape = (2, 3, 4) var_np = np.random.rand(*shape) # the case of root variable x1 = nn.Variable.from_numpy_array(var_np) y1 = F.reshape(x1, (-1, ), inplace=True) y1 = F.reshape(y1, shape, inplace=True) * 2 x2 = nn.Variable.from_numpy_array(var_np) y2 = F.reshape(x2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y1, y2], clear_buffer=True) assert_allclose(x1.d, x2.d) assert_allclose(y1.d, y2.d) # the case of persistent variable x1 = nn.Variable.from_numpy_array(var_np) p_y1 = F.mul_scalar(x1, 2).apply(persistent=True) y1 = F.reshape(p_y1, (-1, ), inplace=True) y1 = F.reshape(y1, shape, inplace=True) * 2 x2 = nn.Variable.from_numpy_array(var_np) p_y2 = F.mul_scalar(x2, 2).apply(persistent=True) y2 = F.reshape(p_y2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y1, y2], clear_buffer=True) assert_allclose(p_y1.d, p_y2.d) assert_allclose(y1.d, y2.d) # the case of rewire_on root variable # graph A: x11 -> f_inplace -> y11 x11 = nn.Variable.from_numpy_array(var_np) y11 = F.reshape(x11, (-1, ), inplace=True) # graph B: x12 -> f_inplace -> mul_scalar -> y12 x12 = nn.Variable(shape=y11.shape) y12 = F.reshape(x12, shape, inplace=True) * 2 # graph A->B: x11 -> f_inplace -> f_inplace -> mul_scalar -> y12 x12.rewire_on(y11) x2 = nn.Variable.from_numpy_array(var_np) y2 = F.reshape(x2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y12, y2], clear_buffer=True) assert_allclose(x11.d, x2.d) assert_allclose(y12.d, y2.d) # the case of rewire_on persistent variable # graph A: x11 -> mul_scalar -> p_x11 -> f_inplace -> y11 x11 = nn.Variable.from_numpy_array(var_np) p_x11 = F.mul_scalar(x11, 2).apply(persistent=True) y11 = F.reshape(p_x11, (-1, ), inplace=True) # graph B: x12 -> f_inplace -> mul_scalar -> y12 x12 = nn.Variable(shape=y11.shape) y12 = F.reshape(x12, shape, inplace=True) * 2 # graph A->B: ... -> p_x11 -> f_inplace -> f_inplace -> mul_scalar -> y12 x12.rewire_on(y11) x2 = nn.Variable.from_numpy_array(var_np) p_x2 = F.mul_scalar(x2, 2).apply(persistent=True) y2 = F.reshape(p_x2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y12, y2], clear_buffer=True) assert_allclose(p_x11.d, p_x2.d) assert_allclose(y12.d, y2.d)
def animate(args): # get context ctx = get_extension_context(args.context) nn.set_default_context(ctx) logger.setLevel(logging.ERROR) # to supress minor messages if not args.config: assert not args.params, "pretrained weights file is given, but corresponding config file is not. Please give both." download_provided_file( "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/voxceleb_trained_info.yaml" ) args.config = 'voxceleb_trained_info.yaml' download_provided_file( "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/pretrained_fomm_params.h5" ) config = read_yaml(args.config) dataset_params = config.dataset_params model_params = config.model_params if args.detailed: vis_params = config.visualizer_params visualizer = Visualizer(**vis_params) if not args.params: assert "log_dir" in config, "no log_dir found in config. therefore failed to locate pretrained parameters." param_file = os.path.join(config.log_dir, config.saved_parameters) else: param_file = args.params print(f"Loading {param_file} for image animation...") nn.load_parameters(param_file) bs, h, w, c = [1] + dataset_params.frame_shape source = nn.Variable((bs, c, h, w)) driving_initial = nn.Variable((bs, c, h, w)) driving = nn.Variable((bs, c, h, w)) filename = args.driving # process repeated until all the test data is used driving_video = read_video( filename, dataset_params.frame_shape) # (#frames, h, w, 3) driving_video = np.transpose(driving_video, (0, 3, 1, 2)) # (#frames, 3, h, w) source_img = imread(args.source, channel_first=True, size=(256, 256)) / 255. source_img = source_img[:3] source.d = np.expand_dims(source_img, 0) driving_initial.d = driving_video[0][:3, ] with nn.parameter_scope("kp_detector"): kp_source = detect_keypoint(source, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_source) with nn.parameter_scope("kp_detector"): kp_driving_initial = detect_keypoint(driving_initial, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_driving_initial) with nn.parameter_scope("kp_detector"): kp_driving = detect_keypoint(driving, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_driving) if args.adapt_movement_scale: nn.forward_all([ kp_source["value"], kp_source["jacobian"], kp_driving_initial["value"], kp_driving_initial["jacobian"] ]) source_area = ConvexHull(kp_source['value'].d[0]).volume driving_area = ConvexHull(kp_driving_initial['value'].d[0]).volume adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area) else: adapt_movement_scale = 1 kp_norm = adjust_kp(kp_source=unlink_all(kp_source), kp_driving=kp_driving, kp_driving_initial=unlink_all(kp_driving_initial), adapt_movement_scale=adapt_movement_scale, use_relative_movement=args.unuse_relative_movement, use_relative_jacobian=args.unuse_relative_jacobian) persistent_all(kp_norm) with nn.parameter_scope("generator"): generated = occlusion_aware_generator(source, kp_source=unlink_all(kp_source), kp_driving=kp_norm, **model_params.generator_params, **model_params.common_params, test=True, comm=False) if not args.full and 'sparse_deformed' in generated: del generated['sparse_deformed'] # remove needless info persistent_all(generated) generated['kp_driving'] = kp_driving generated['kp_source'] = kp_source generated['kp_norm'] = kp_norm # generated contains these values; # 'mask': <Variable((bs, num_kp+1, h/4, w/4)) when scale_factor=0.25 # 'sparse_deformed': <Variable((bs, num_kp+1, num_channel, h/4, w/4)) # (bs, num_kp + 1, c, h, w) # 'occlusion_map': <Variable((bs, 1, h/4, w/4)) # 'deformed': <Variable((bs, c, h, w)) # 'prediction': <Variable((bs, c, h, w)) mode = "arbitrary" if "log_dir" in config: result_dir = os.path.join(args.out_dir, os.path.basename(config.log_dir), f"{mode}") else: result_dir = os.path.join(args.out_dir, "test_result", f"{mode}") # create an empty directory to save generated results _ = nm.Monitor(result_dir) # load the header images. header = imread("imgs/header_combined.png", channel_first=True) generated_images = list() # compute these in advance and reuse nn.forward_all([kp_source["value"], kp_source["jacobian"]], clear_buffer=True) nn.forward_all( [kp_driving_initial["value"], kp_driving_initial["jacobian"]], clear_buffer=True) num_of_driving_frames = driving_video.shape[0] for frame_idx in tqdm(range(num_of_driving_frames)): driving.d = driving_video[frame_idx][:3, ] nn.forward_all([generated["prediction"], generated["deformed"]], clear_buffer=True) if args.detailed: # visualize source w/kp, driving w/kp, deformed source, generated w/kp, generated image, occlusion map visualization = visualizer.visualize(source=source.d, driving=driving.d, out=generated) if args.full: visualization = reshape_result(visualization) # (H, W, C) combined_image = visualization.transpose(2, 0, 1) # (C, H, W) elif args.only_generated: combined_image = np.clip(generated["prediction"].d[0], 0.0, 1.0) combined_image = (255 * combined_image).astype( np.uint8) # (C, H, W) else: # visualize source, driving, and generated image driving_fake = np.concatenate([ np.clip(driving.d[0], 0.0, 1.0), np.clip(generated["prediction"].d[0], 0.0, 1.0) ], axis=2) header_source = np.concatenate([ np.clip(header / 255., 0.0, 1.0), np.clip(source.d[0], 0.0, 1.0) ], axis=2) combined_image = np.concatenate([header_source, driving_fake], axis=1) combined_image = (255 * combined_image).astype(np.uint8) generated_images.append(combined_image) # once each video is generated, save it. output_filename = f"{os.path.splitext(os.path.basename(filename))[0]}.mp4" output_filename = f"{os.path.basename(args.source)}_by_{output_filename}" output_filename = output_filename.replace("#", "_") if args.output_png: monitor_vis = nm.MonitorImage(output_filename, nm.Monitor(result_dir), interval=1, num_images=1, normalize_method=lambda x: x) for frame_idx, img in enumerate(generated_images): monitor_vis.add(frame_idx, img) else: generated_images = [_.transpose(1, 2, 0) for _ in generated_images] # you might need to change ffmpeg_params according to your environment. mimsave(f'{os.path.join(result_dir, output_filename)}', generated_images, fps=args.fps, ffmpeg_params=[ "-pix_fmt", "yuv420p", "-vcodec", "libx264", "-f", "mp4", "-q", "0" ]) return
def CNN_run(args, model): data_iterator_train, data_iterator_valid, num_class = \ get_data_iterator_and_num_class(args) channels, image_height, image_width = 3, args.height, args.width batch_size = args.batch_size initial_model_lr = args.model_lr one_epoch = data_iterator_train.size // batch_size max_iter = args.epoch * one_epoch val_iter = data_iterator_valid.size // batch_size # Create monitor. monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=100) monitor_err = MonitorSeries("Training error", monitor, interval=100) monitor_vloss = MonitorSeries("Test loss", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=100) # prepare variables and graph used for test image_valid = nn.Variable( (batch_size, channels, image_height, image_width)) label_valid = nn.Variable((batch_size, 1)) input_image_valid = {"image": image_valid, "label": label_valid} pred_valid = construct_networks(args, image_valid, model, num_class, test=True) pred_valid.persistent = True loss_valid = loss_function(pred_valid, label_valid) top_1e_valid = F.mean(F.top_n_error(pred_valid, label_valid)) # prepare variables and graph used for training image_train = nn.Variable( (batch_size, channels, image_height, image_width)) label_train = nn.Variable((batch_size, 1)) input_image_train = {"image": image_train, "label": label_train} pred_train = construct_networks(args, image_train, model, num_class, test=False) loss_train = loss_function(pred_train, label_train) top_1e_train = F.mean(F.top_n_error(pred_train, label_train)) # prepare solvers solver = S.Momentum(initial_model_lr) solver.set_parameters(nn.get_parameters()) # Training-loop for i in range(max_iter): image, label = data_iterator_train.next() input_image_train["image"].d = image input_image_train["label"].d = label nn.forward_all([loss_train, top_1e_train], clear_no_need_grad=True) monitor_loss.add(i, loss_train.d.copy()) monitor_err.add(i, top_1e_train.d.copy()) if args.lr_control_model: new_lr = learning_rate_scheduler(i, max_iter, initial_model_lr, 0) solver.set_learning_rate(new_lr) solver.zero_grad() loss_train.backward(clear_buffer=True) if args.with_grad_clip_model: for k, v in nn.get_parameters().items(): v.grad.copy_from( F.clip_by_norm(v.grad, args.grad_clip_value_model)) # update parameters solver.weight_decay(args.weight_decay_model) solver.update() if i % args.model_save_interval == 0: # Validation during training. ve = 0. vloss = 0. for j in range(val_iter): v_image, v_label = data_iterator_valid.next() input_image_valid["image"].d = v_image input_image_valid["label"].d = v_label nn.forward_all([loss_valid, top_1e_valid], clear_buffer=True) vloss += loss_valid.d.copy() ve += top_1e_valid.d.copy() ve /= val_iter vloss /= val_iter monitor_vloss.add(i, vloss) monitor_verr.add(i, ve) nn.save_parameters( os.path.join(args.model_save_path, 'params_{}.h5'.format(i))) ve = 0. vloss = 0. for j in range(val_iter): v_image, v_label = data_iterator_valid.next() input_image_valid["image"].d = v_image input_image_valid["label"].d = v_label nn.forward_all([loss_valid, top_1e_valid], clear_buffer=True) vloss += loss_valid.d.copy() ve += top_1e_valid.d.copy() ve /= val_iter vloss /= val_iter monitor_vloss.add(i, vloss) monitor_verr.add(i, ve) nn.save_parameters( os.path.join(args.model_save_path, 'params_{}.h5'.format(i))) return