def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 1000, 1, 1]) fake = generator(z, maxh=1024) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i))
def train(args): ## Sub-functions ## --------------------------------- ## Save Models def save_models(epoch_num, cle_disout, fake_disout, losses_gen, losses_dis, losses_ae): # save generator parameter with nn.parameter_scope("gen"): nn.save_parameters(os.path.join(args.model_save_path, 'generator_param_{:04}.h5'.format(epoch_num + 1))) # save discriminator parameter with nn.parameter_scope("dis"): nn.save_parameters(os.path.join(args.model_save_path, 'discriminator_param_{:04}.h5'.format(epoch_num + 1))) # save results np.save(os.path.join(args.model_save_path, 'disout_his_{:04}.npy'.format(epoch_num + 1)), np.array([cle_disout, fake_disout])) np.save(os.path.join(args.model_save_path, 'losses_gen_{:04}.npy'.format(epoch_num + 1)), np.array(losses_gen)) np.save(os.path.join(args.model_save_path, 'losses_dis_{:04}.npy'.format(epoch_num + 1)), np.array(losses_dis)) np.save(os.path.join(args.model_save_path, 'losses_ae_{:04}.npy'.format(epoch_num + 1)), np.array(losses_ae)) ## Load Models def load_models(epoch_num, gen=True, dis=True): # load generator parameter with nn.parameter_scope("gen"): nn.load_parameters(os.path.join(args.model_save_path, 'generator_param_{:04}.h5'.format(args.epoch_from))) # load discriminator parameter with nn.parameter_scope("dis"): nn.load_parameters(os.path.join(args.model_save_path, 'discriminator_param_{:04}.h5'.format(args.epoch_from))) ## Update parameters class updating: def __init__(self): self.scale = 8 if args.halfprec else 1 def __call__(self, solver, loss): solver.zero_grad() # initialize loss.forward(clear_no_need_grad=True) # calculate forward loss.backward(self.scale, clear_buffer=True) # calculate backward solver.scale_grad(1. / self.scale) # scaling solver.weight_decay(args.weight_decay * self.scale) # decay solver.update() # update ## Inital Settings ## --------------------------------- ## Create network # Clear nn.clear_parameters() # Variables noisy = nn.Variable([args.batch_size, 1, 16384], need_grad=False) # Input clean = nn.Variable([args.batch_size, 1, 16384], need_grad=False) # Desire z = nn.Variable([args.batch_size, 1024, 8], need_grad=False) # Random Latent Variable # Generator genout = Generator(noisy, z) # Predicted Clean genout.persistent = True # Not to clear at backward loss_gen = Loss_gen(genout, clean, Discriminator(noisy, genout)) loss_ae = F.mean(F.absolute_error(genout, clean)) # Discriminator fake_dis = genout.get_unlinked_variable(need_grad=True) cle_disout = Discriminator(noisy, clean) fake_disout = Discriminator(noisy, fake_dis) loss_dis = Loss_dis(Discriminator(noisy, clean),Discriminator(noisy, fake_dis)) ## Solver # RMSprop. # solver_gen = S.RMSprop(args.learning_rate_gen) # solver_dis = S.RMSprop(args.learning_rate_dis) # Adam solver_gen = S.Adam(args.learning_rate_gen) solver_dis = S.Adam(args.learning_rate_dis) # set parameter with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) ## Load data & Create batch clean_data, noisy_data = dt.data_loader() batches = dt.create_batch(clean_data, noisy_data, args.batch_size) del clean_data, noisy_data ## Initial settings for sub-functions fig = figout() disp = display(args.epoch_from, args.epoch, batches.batch_num) upd = updating() ## Train ##---------------------------------------------------- print('== Start Training ==') ## Load "Pre-trained" parameters if args.epoch_from > 0: print(' Retrain parameter from pre-trained network') load_models(args.epoch_from, dis=False) losses_gen = np.load(os.path.join(args.model_save_path, 'losses_gen_{:04}.npy'.format(args.epoch_from))) losses_dis = np.load(os.path.join(args.model_save_path, 'losses_dis_{:04}.npy'.format(args.epoch_from))) losses_ae = np.load(os.path.join(args.model_save_path, 'losses_ae_{:04}.npy'.format(args.epoch_from))) else: losses_gen = [] losses_ae = [] losses_dis = [] ## Create loss loggers point = len(losses_gen) loss_len = (args.epoch - args.epoch_from) * ((batches.batch_num+1)//10) losses_gen = np.append(losses_gen, np.zeros(loss_len)) losses_ae = np.append(losses_ae, np.zeros(loss_len)) losses_dis = np.append(losses_dis, np.zeros(loss_len)) ## Training for i in range(args.epoch_from, args.epoch): print('') print(' =========================================================') print(' Epoch :: {0}/{1}'.format(i + 1, args.epoch)) print(' =========================================================') print('') # Batch iteration for j in range(batches.batch_num): print(' Train (Epoch. {0}) - {1}/{2}'.format(i+1, j+1, batches.batch_num)) ## Batch setting clean.d, noisy.d = batches.next(j) #z.d = np.random.randn(*z.shape) z.d = np.zeros(z.shape) ## Updating upd(solver_gen, loss_gen) # update Generator upd(solver_dis, loss_dis) # update Discriminator ## Display if (j+1) % 10 == 0: # Get result for Display cle_disout.forward() fake_disout.forward() loss_ae.forward(clear_no_need_grad=True) # Display text disp(i, j, loss_gen.d, loss_dis.d, loss_ae.d) # Data logger losses_gen[point] = loss_gen.d losses_ae[point] = loss_ae.d losses_dis[point] = loss_dis.d point = point + 1 # Plot fig.waveform(noisy.d[0,0,:], genout.d[0,0,:], clean.d[0,0,:]) fig.loss(losses_gen[0:point-1], losses_ae[0:point-1], losses_dis[0:point-1]) fig.histogram(cle_disout.d, fake_disout.d) pg.QtGui.QApplication.processEvents() ## Save parameters if ((i+1) % args.model_save_cycle) == 0: save_models(i, cle_disout.d, fake_disout.d, losses_gen[0:point-1], losses_dis[0:point-1], losses_ae[0:point-1]) # save model exporter = pg.exporters.ImageExporter(fig.win.scene()) # Call pg.QtGui.QApplication.processEvents() before exporters!! exporter.export(os.path.join(args.model_save_path, 'plot_{:04}.png'.format(i + 1))) # save fig ## Save parameters (Last) save_models(args.epoch-1, cle_disout.d, fake_disout.d, losses_gen, losses_dis, losses_ae)
def _create_variable(v, name, shape, rng): # Create and initialize variables class Variable: pass parameter = v.type == "Parameter" variable_instance = None if parameter: if v.initializer.type == 'Normal': initializer = NormalInitializer(v.initializer.multiplier, rng=rng) elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward': initializer = (lambda shape: NormalInitializer( calc_normal_std_he_forward(shape[0], numpy.prod(shape[1:])), rng=rng)(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineHeBackward': initializer = (lambda shape: NormalInitializer( calc_normal_std_he_backward(shape[0], numpy.prod(shape[1:])), rng=rng)(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalAffineGlorot': initializer = (lambda shape: NormalInitializer( calc_normal_std_glorot(shape[0], numpy.prod(shape[1:])), rng=rng)(shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward': initializer = ( lambda shape: NormalInitializer(calc_normal_std_he_forward( shape[-3], shape[0], kernel=shape[-2:]), rng=rng) (shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionHeBackward': initializer = ( lambda shape: NormalInitializer(calc_normal_std_he_backward( shape[-3], shape[0], kernel=shape[-2:]), rng=rng) (shape) * v.initializer.multiplier) elif v.initializer.type == 'NormalConvolutionGlorot': initializer = (lambda shape: NormalInitializer( calc_normal_std_glorot(shape[-3], shape[0], kernel=shape[-2:]), rng=rng)(shape) * v.initializer.multiplier) elif v.initializer.type == 'Uniform': initializer = UniformInitializer( lim=[-v.initializer.multiplier, v.initializer.multiplier], rng=rng) elif v.initializer.type == 'UniformAffineGlorot': initializer = (lambda shape: UniformInitializer( calc_uniform_lim_glorot(shape[0], numpy.prod(shape[1:])), rng=rng)(shape) * v.initializer.multiplier) elif v.initializer.type == 'UniformConvolutionGlorot': initializer = ( lambda shape: UniformInitializer(calc_uniform_lim_glorot( shape[-3], shape[0], kernel=shape[-2:]), rng=rng) (shape) * v.initializer.multiplier) elif v.initializer.type == 'Constant': initializer = ConstantInitializer(value=v.initializer.multiplier) else: initializer = None variable_instance = get_parameter_or_create(name, shape, initializer) else: # create empty variable, memory will be allocated in network.setup() # after network optimization variable_instance = nn.Variable() variable = Variable() variable.name = name variable.parameter = parameter variable.shape = shape variable.variable_instance = variable_instance return variable
def distil(): args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == "cifar10_resnet23_prediction": model_prediction = cifar10_resnet23_prediction data_iterator = data_iterator_cifar10 c = 3 h = w = 32 n_train = 50000 n_valid = 10000 # TRAIN teacher = "teacher" student = "student" maps = args.maps rrate = args.reduction_rate # Create input variables. image = nn.Variable([args.batch_size, c, h, w]) image.persistent = True # not clear the intermediate buffer re-used label = nn.Variable([args.batch_size, 1]) label.persistent = True # not clear the intermediate buffer re-used # Create `teacher` and "student" prediction graph. model_load_path = args.model_load_path nn.load_parameters(model_load_path) pred_label = model_prediction(image, net=teacher, maps=maps, test=not args.use_batch) pred_label.need_grad = False # no need backward through teacher graph pred = model_prediction(image, net=student, maps=int(maps * (1. - rrate)), test=False) pred.persistent = True # not clear the intermediate buffer used loss_ce = F.mean(F.softmax_cross_entropy(pred, label)) loss_ce_soft = ce_soft(pred, pred_label) loss = args.weight_ce * loss_ce + args.weight_ce_soft * loss_ce_soft # TEST # Create input variables. vimage = nn.Variable([args.batch_size, c, h, w]) vlabel = nn.Variable([args.batch_size, 1]) # Create teacher prediction graph. vpred = model_prediction(vimage, net=student, maps=int(maps * (1. - rrate)), test=True) # Create Solver. solver = S.Adam(args.learning_rate) with nn.parameter_scope(student): solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Initialize DataIterator for MNIST. data = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) best_ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) if ve < best_ve: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) parameter_file = os.path.join(args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def train(): args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == "cifar10_resnet23_prediction": model_prediction = cifar10_resnet23_prediction if args.net == "cifar10_shufflenet_prediction": model_prediction = functools.partial(cifar10_shuffle_prediction, groups=args.groups) data_iterator = data_iterator_cifar10 c = 3 h = w = 32 n_train = 50000 n_valid = 10000 # TRAIN maps = args.maps # Create input variables. image = nn.Variable([args.batch_size, c, h, w]) label = nn.Variable([args.batch_size, 1]) # Create model_prediction graph. pred = model_prediction(image, maps=maps, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, c, h, w]) vlabel = nn.Variable([args.batch_size, 1]) # Create prediction graph. vpred = model_prediction(vimage, maps=maps, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Initialize DataIterator data = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) best_ve = 1.0 ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) if ve < best_ve: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) parameter_file = os.path.join(args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = args.n_label n_train_data = 73257 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = args.epoch act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context lambda_ = args.lambda_ # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred, log_var = cnn_model_003(ctx, x_l) one = F.constant(1., log_var.shape) loss_ce = ce_loss(ctx, pred, y_l) reg_sigma = sigma_regularization(ctx, log_var, one) loss_supervised = loss_ce + er_loss(ctx, pred) + lambda_ * reg_sigma ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0) pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0, log_var1) reg_sigma0 = sigma_regularization(ctx, log_var0, one) reg_sigma1 = sigma_regularization(ctx, log_var1, one) reg_sigmas = sigmas_regularization(ctx, log_var0, log_var1) loss_unsupervised = loss_sr + er_loss(ctx, pred_x_u0) + er_loss(ctx, pred_x_u1) \ + lambda_ * (reg_sigma0 + reg_sigma1) + lambda_ * reg_sigmas ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval, _ = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/svhn/train.mat") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/svhn/l_train.mat") u_train_path = os.path.join(home, "datasets/svhn/u_train.mat") test_path = os.path.join(home, "datasets/svhn/test.mat") # data reader data_reader = SVHNDataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=False, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. ve_best = 1. save_path_prev = "" for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _, y_l.d = x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d = x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if int((i + 1) % iter_epoch) == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 ve /= iter_val msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve) * 100) print(msg) if ve < ve_best: if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) if save_path_prev != "": os.remove(save_path_prev) save_path = os.path.join(args.model_save_path, 'params_%06d.h5' % epoch) nn.save_parameters(save_path) save_path_prev = save_path ve_best = ve st = time.time() epoch += 1
def test_save_load_reshape(tmpdir, variable_batch_size, shape): x = nn.Variable([10, 1, 28, 28, 10, 10]) y = F.reshape(x, shape=shape) check_save_load(tmpdir, x, y, variable_batch_size)
def test_data_grad_reference(): v = nn.Variable([2, 3, 4]) assert v.d.dtype == np.float32 assert v.g.dtype == np.float32
def test_data_grad(): v = nn.Variable([2, 3, 4]) v.d[...] = np.random.randn(*v.shape) assert v.d is not v.g assert not np.all(v.d == v.g)
def test_name(): x = nn.Variable([2, 3]) x.name = "VariableName" assert x.name == "VariableName"
def test_prohibit_clear_data(): import nnabla.functions as F nn.prefer_cached_array(False) shape = (2, 3, 4) var_np = np.random.rand(*shape) # the case of root variable x1 = nn.Variable.from_numpy_array(var_np) y1 = F.reshape(x1, (-1, ), inplace=True) y1 = F.reshape(y1, shape, inplace=True) * 2 x2 = nn.Variable.from_numpy_array(var_np) y2 = F.reshape(x2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y1, y2], clear_buffer=True) assert_allclose(x1.d, x2.d) assert_allclose(y1.d, y2.d) # the case of persistent variable x1 = nn.Variable.from_numpy_array(var_np) p_y1 = F.mul_scalar(x1, 2).apply(persistent=True) y1 = F.reshape(p_y1, (-1, ), inplace=True) y1 = F.reshape(y1, shape, inplace=True) * 2 x2 = nn.Variable.from_numpy_array(var_np) p_y2 = F.mul_scalar(x2, 2).apply(persistent=True) y2 = F.reshape(p_y2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y1, y2], clear_buffer=True) assert_allclose(p_y1.d, p_y2.d) assert_allclose(y1.d, y2.d) # the case of rewire_on root variable # graph A: x11 -> f_inplace -> y11 x11 = nn.Variable.from_numpy_array(var_np) y11 = F.reshape(x11, (-1, ), inplace=True) # graph B: x12 -> f_inplace -> mul_scalar -> y12 x12 = nn.Variable(shape=y11.shape) y12 = F.reshape(x12, shape, inplace=True) * 2 # graph A->B: x11 -> f_inplace -> f_inplace -> mul_scalar -> y12 x12.rewire_on(y11) x2 = nn.Variable.from_numpy_array(var_np) y2 = F.reshape(x2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y12, y2], clear_buffer=True) assert_allclose(x11.d, x2.d) assert_allclose(y12.d, y2.d) # the case of rewire_on persistent variable # graph A: x11 -> mul_scalar -> p_x11 -> f_inplace -> y11 x11 = nn.Variable.from_numpy_array(var_np) p_x11 = F.mul_scalar(x11, 2).apply(persistent=True) y11 = F.reshape(p_x11, (-1, ), inplace=True) # graph B: x12 -> f_inplace -> mul_scalar -> y12 x12 = nn.Variable(shape=y11.shape) y12 = F.reshape(x12, shape, inplace=True) * 2 # graph A->B: ... -> p_x11 -> f_inplace -> f_inplace -> mul_scalar -> y12 x12.rewire_on(y11) x2 = nn.Variable.from_numpy_array(var_np) p_x2 = F.mul_scalar(x2, 2).apply(persistent=True) y2 = F.reshape(p_x2, (-1, ), inplace=False) y2 = F.reshape(y2, shape, inplace=False) * 2 nn.forward_all([y12, y2], clear_buffer=True) assert_allclose(p_x11.d, p_x2.d) assert_allclose(y12.d, y2.d)
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred, log_var = cnn_model_003(ctx, x_l) loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var) loss_supervised = loss_ce ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0) pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0, log_var1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) loss_unsupervised = loss_sr + loss_er0 + loss_er1 ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval, _ = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _, y_l.d = x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d = x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if (i + 1) % iter_epoch == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) st = time.time() epoch += 1
def solver_tester(rng, solver, ref_solver, solver_args=[], solver_kwargs={}, num_itr=5, decay=1e-4, clip_norm=0.5, atol=1e-6, ctx=None, solver_name=None): if ctx is None: ctx = nn.Context() # Create params p1 = nn.Variable([2, 3, 4]) p2 = nn.Variable([3, 4, 1, 2]) p3 = nn.Variable([]) params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)]) for p in params.values(): p.d = rng.randn(*p.shape) p.g = rng.randn(*p.shape) with nn.context_scope(ctx): s = solver(*solver_args, **solver_kwargs) s.set_parameters(params) if solver_name is not None: assert s.name == solver_name ref_s = ref_solver(*solver_args, **solver_kwargs) ref_s.set_parameters(params) # Get params (unordered_map is used in C++, thus check in both directions) params_ = s.get_parameters() for k0, v0 in iteritems(ref_s.params): v1 = params_[k0] assert_allclose(v0, v1.d, atol=atol) for k1, v1 in iteritems(params_): v0 = ref_s.params[k1] assert_allclose(v0, v1.d, atol=atol) # Check weight decay. grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)]) s.weight_decay(decay) ref_s.weight_decay(grad_copy, decay) for p, ref_p in zip(params.values(), grad_copy.values()): assert_allclose(ref_p, p.g, atol=atol) # Check clip grad by norm. grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)]) s.clip_grad_by_norm(clip_norm) ref_s.clip_grad_by_norm(grad_copy, clip_norm) for p, ref_p in zip(params.values(), grad_copy.values()): assert np.allclose(ref_p, p.g, atol=atol) # Check solver udpate. for i in range(num_itr): grads = OrderedDict([(k, rng.randn(*p.shape)) for k, p in iteritems(params)]) for k, g in iteritems(grads): params[k].g = g s.update() ref_s.update(grads) # update check for p, ref_p in zip(params.values(), ref_s.params.values()): assert_allclose(ref_p, p.d, atol=atol) # iteration state incrementaion check for state in s.get_states().values(): assert state.t == (i + 1) # Check inf, nan, and inf/nan for v, method in zip([[np.inf], [np.nan], [np.inf, np.nan]], [lambda s: s.check_inf_grad(), lambda s: s.check_nan_grad(), lambda s: s.check_inf_or_nan_grad()]): def set_value(p): p.g[...] = rng.choice(v + [-1, 0, 1], size=int(np.prod(p.shape)), replace=True).reshape(p.shape) if v[0] not in p.g: p.g.flat[rng.choice(np.arange(int(np.prod(p.shape))))] = v[0] for p in params.values(): assert method(s) == False g = p.g.copy() set_value(p) assert method(s) == True p.g[...] = g # Rescale grad scale = 10. ref_grad = [p.g.copy() for p in params.values()] for p in params.values(): p.g *= scale s.scale_grad(1. / scale) for ref, p in zip(ref_grad, params.values()): assert_allclose(ref, p.g, atol=1e-4) # Save/Load Test def test_save_load(s, name): # Save states import tempfile tmpdir = tempfile.mkdtemp("solver-test") tmpfile = os.path.join(tmpdir, name) states0 = s.get_states() s.save_states(tmpfile) # Load states with nn.context_scope(ctx): s1 = solver(*solver_args, **solver_kwargs) s1.set_parameters(params) s1.load_states(tmpfile) # Check save/load states states1 = s1.get_states() for k0, s0 in iteritems(states0): s1 = states1[k0] for sname, vx0 in iteritems(s0.pstate): vx1 = s1.pstate[sname] assert_allclose(vx0.d, vx1.d) assert s1.t == s0.t test_save_load(s, "states.h5") test_save_load(s, "states.protobuf") # Check if remove_state_impl work correctly. s.clear_parameters()
def update_graph(self, key='train'): r"""Builds the graph and update the placeholder. Args: training (bool, optional): Type of the graph. Defaults to `train`. """ assert key in ('train', 'valid') self.gen.training = key == 'train' self.dis.training = key == 'train' hp = self.hp def data_aug(v): v = random_flip(v) v = random_scaling(v, hp.scale_low, hp.scale_high) return v # define input variables input_x = nn.Variable((hp.batch_size, 1, hp.segment_length)) input_y = nn.Variable((hp.batch_size, 1, hp.segment_length)) label_x = nn.Variable((hp.batch_size, 1)) label_y = nn.Variable((hp.batch_size, 1)) x_aug = data_aug(input_x) r_jitter_x = random_jitter(x_aug, hp.max_jitter_steps) x_real_con = self.gen.encode(x_aug) s_real, s_mu, s_logvar = self.gen.embed(data_aug(input_x)) x_real = self.gen.decode(x_real_con, s_real) r_fake = self.gen.embed(data_aug(input_y))[0] x_fake = self.gen.decode(x_real_con, r_fake) x_fake_con = self.gen.encode(random_flip(x_fake)) dis_real_x = self.dis(data_aug(input_x), label_x) dis_fake_x = self.dis(data_aug(x_fake), label_y) # ------------------------------ Discriminator ----------------------- d_loss = (self.dis.adversarial_loss(dis_real_x, 1.0) + self.dis.adversarial_loss(dis_fake_x, 0.0)) # -------------------------------------------------------------------- # -------------------------------- Generator ------------------------- g_loss_avd = self.dis.adversarial_loss(self.dis(x_fake, label_y), 1.0) g_loss_con = self.dis.preservation_loss(x_fake_con, x_real_con) g_loss_kld = self.gen.kl_loss(s_mu, s_logvar) g_loss_rec = (self.dis.perceptual_loss(x_real, r_jitter_x) + self.dis.spectral_loss(x_real, r_jitter_x)) g_loss = (g_loss_avd + hp.lambda_con * g_loss_con + hp.lambda_rec * g_loss_rec + hp.lambda_kld * g_loss_kld) # ------------------------------------------------------------------- set_persistent_all(g_loss_con, g_loss_avd, g_loss, d_loss, x_fake, g_loss_kld, g_loss_rec) self.placeholder[key] = dict( input_x=input_x, label_x=label_x, input_y=input_y, label_y=label_y, x_fake=x_fake, d_loss=d_loss, g_loss_avd=g_loss_avd, g_loss_con=g_loss_con, g_loss_rec=g_loss_rec, g_loss_kld=g_loss_kld, g_loss=g_loss, )
def main(): args = get_args() state_size = args.state_size batch_size = args.batch_size num_steps = args.num_steps num_layers = args.num_layers max_epoch = args.max_epoch max_norm = args.gradient_clipping_max_norm num_words = 10000 lr = args.learning_rate train_data, val_data, test_data = get_data() from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) from nnabla.monitor import Monitor, MonitorSeries monitor = Monitor(args.work_dir) monitor_perplexity = MonitorSeries("Training perplexity", monitor, interval=10) monitor_vperplexity = MonitorSeries("Validation perplexity", monitor, interval=(len(val_data) // (num_steps * batch_size))) monitor_tperplexity = MonitorSeries("Test perplexity", monitor, interval=(len(test_data) // (num_steps * 1))) l1 = LSTMWrapper(batch_size, state_size) l2 = LSTMWrapper(batch_size, state_size) # train graph x = nn.Variable((batch_size, num_steps)) t = nn.Variable((batch_size, num_steps)) w = I.UniformInitializer((-0.1, 0.1)) b = I.ConstantInitializer(1) loss = get_loss(l1, l2, x, t, w, b, num_words, batch_size, state_size, True) l1.share_data() l2.share_data() # validaiton graph vx = nn.Variable((batch_size, num_steps)) vt = nn.Variable((batch_size, num_steps)) vloss = get_loss(l1, l2, vx, vt, w, b, num_words, batch_size, state_size) solver = S.Sgd(lr) solver.set_parameters(nn.get_parameters()) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) best_val = 10000 for epoch in range(max_epoch): l1.reset_state() l2.reset_state() for i in range(len(train_data) // (num_steps * batch_size)): x.d, t.d = get_batch(train_data, i * num_steps, batch_size, num_steps) solver.zero_grad() loss.forward() loss.backward(clear_buffer=True) solver.weight_decay(1e-5) gradient_clipping(nn.get_parameters().values(), max_norm) solver.update() perp = perplexity(loss.d.copy()) monitor_perplexity.add( (len(train_data) // (num_steps * batch_size)) * (epoch) + i, perp) l1.reset_state() l2.reset_state() vloss_avg = 0 for i in range(len(val_data) // (num_steps * batch_size)): vx.d, vt.d = get_batch(val_data, i * num_steps, batch_size, num_steps) vloss.forward() vloss_avg += vloss.d.copy() vloss_avg /= float((len(val_data) // (num_steps * batch_size))) vper = perplexity(vloss_avg) if vper < best_val: best_val = vper if vper < 200: save_name = "params_epoch_{:02d}.h5".format(epoch) nn.save_parameters(os.path.join(args.save_dir, save_name)) else: solver.set_learning_rate(solver.learning_rate() * 0.25) logger.info("Decreased learning rate to {:05f}".format( solver.learning_rate())) monitor_vperplexity.add( (len(val_data) // (num_steps * batch_size)) * (epoch) + i, vper) # for final test split t_batch_size = 1 tl1 = LSTMWrapper(t_batch_size, state_size) tl2 = LSTMWrapper(t_batch_size, state_size) tloss_avg = 0 tx = nn.Variable((t_batch_size, num_steps)) tt = nn.Variable((t_batch_size, num_steps)) tloss = get_loss(tl1, tl2, tx, tt, w, b, num_words, 1, state_size) tl1.share_data() tl2.share_data() for i in range(len(test_data) // (num_steps * t_batch_size)): tx.d, tt.d = get_batch(test_data, i * num_steps, 1, num_steps) tloss.forward() tloss_avg += tloss.d.copy() tloss_avg /= float((len(test_data) // (num_steps * t_batch_size))) tper = perplexity(tloss_avg) monitor_tperplexity.add( (len(test_data) // (num_steps * t_batch_size)) * (epoch) + i, tper)
def _get_variable_or_create(self, v, callback, current_scope): if v.variable is not None: return v.variable v = callback._apply_generate_variable(v) if v.variable is not None: return v.variable pvar = v.proto name = pvar.name shape = list(pvar.shape.dim) if len(shape) > 0 and shape[0] < 0: shape[0] = self.batch_size shape = tuple(shape) assert np.all(np.array(shape) > 0 ), "Shape must be positive. Given {}.".format(shape) if pvar.type != 'Parameter': # Create a new variable and returns. var = nn.Variable(shape) v.variable = var var.name = name return var # Trying to load the parameter from the global scope. try: with nn.parameter_scope('', current_scope): param = get_parameter(name) if param is not None: assert shape == param.shape param = param.get_unlinked_variable(need_grad=v.need_grad) v.variable = param param.name = name return param # Parameter does not exist in the global scope. # Then try to load the parameter from .nnp file. callback.verbose('Loading parameter `{}` from .nnp.'.format(name)) param = get_parameter(name) if param is None: logger.info( 'Parameter `{}` is not found. Initializing.'.format(name)) tmp = _create_variable(pvar, name, shape, self.rng) param = tmp.variable_instance # Register the parameter to the current (global) scope. with nn.parameter_scope('', current_scope): set_parameter(name, param) except: import traceback raise ValueError( 'An error occurs during creation of a variable `{}` as a' ' parameter variable. The error was:\n----\n{}\n----\n' 'The parameters registered was {}'.format( name, traceback.format_exc(), '\n'.join( list(nn.get_parameters(grad_only=False).keys())))) assert shape == param.shape param = param.get_unlinked_variable(need_grad=v.need_grad) v.variable = param param.name = name return param
def animate(args): # get context ctx = get_extension_context(args.context) nn.set_default_context(ctx) logger.setLevel(logging.ERROR) # to supress minor messages if not args.config: assert not args.params, "pretrained weights file is given, but corresponding config file is not. Please give both." download_provided_file( "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/voxceleb_trained_info.yaml") args.config = 'voxceleb_trained_info.yaml' download_provided_file( "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/pretrained_fomm_params.h5") config = read_yaml(args.config) dataset_params = config.dataset_params model_params = config.model_params if args.detailed: vis_params = config.visualizer_params visualizer = Visualizer(**vis_params) if not args.params: assert "log_dir" in config, "no log_dir found in config. therefore failed to locate pretrained parameters." param_file = os.path.join( config.log_dir, config.saved_parameters) else: param_file = args.params print(f"Loading {param_file} for image animation...") nn.load_parameters(param_file) bs, h, w, c = [1] + dataset_params.frame_shape source = nn.Variable((bs, c, h, w)) driving_initial = nn.Variable((bs, c, h, w)) driving = nn.Variable((bs, c, h, w)) filename = args.driving # process repeated until all the test data is used driving_video = read_video( filename, dataset_params.frame_shape) # (#frames, h, w, 3) driving_video = np.transpose( driving_video, (0, 3, 1, 2)) # (#frames, 3, h, w) source_img = imread(args.source, channel_first=True, size=(256, 256)) / 255. source_img = source_img[:3] source.d = np.expand_dims(source_img, 0) driving_initial.d = driving_video[0][:3, ] with nn.parameter_scope("kp_detector"): kp_source = detect_keypoint(source, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_source) with nn.parameter_scope("kp_detector"): kp_driving_initial = detect_keypoint(driving_initial, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_driving_initial) with nn.parameter_scope("kp_detector"): kp_driving = detect_keypoint(driving, **model_params.kp_detector_params, **model_params.common_params, test=True, comm=False) persistent_all(kp_driving) if args.adapt_movement_scale: nn.forward_all([kp_source["value"], kp_source["jacobian"], kp_driving_initial["value"], kp_driving_initial["jacobian"]]) source_area = ConvexHull(kp_source['value'][0].d).volume driving_area = ConvexHull(kp_driving_initial['value'][0].d).volume adapt_movement_scale = np.sqrt(source_area) / np.sqrt(driving_area) else: adapt_movement_scale = 1 kp_norm = adjust_kp(kp_source=unlink_all(kp_source), kp_driving=kp_driving, kp_driving_initial=unlink_all(kp_driving_initial), adapt_movement_scale=adapt_movement_scale, use_relative_movement=args.unuse_relative_movement, use_relative_jacobian=args.unuse_relative_jacobian) persistent_all(kp_norm) with nn.parameter_scope("generator"): generated = occlusion_aware_generator(source, kp_source=unlink_all(kp_source), kp_driving=kp_norm, **model_params.generator_params, **model_params.common_params, test=True, comm=False) if not args.full and 'sparse_deformed' in generated: del generated['sparse_deformed'] # remove needless info persistent_all(generated) generated['kp_driving'] = kp_driving generated['kp_source'] = kp_source generated['kp_norm'] = kp_norm # generated contains these values; # 'mask': <Variable((bs, num_kp+1, h/4, w/4)) when scale_factor=0.25 # 'sparse_deformed': <Variable((bs, num_kp+1, num_channel, h/4, w/4)) # (bs, num_kp + 1, c, h, w) # 'occlusion_map': <Variable((bs, 1, h/4, w/4)) # 'deformed': <Variable((bs, c, h, w)) # 'prediction': <Variable((bs, c, h, w)) mode = "arbitrary" if "log_dir" in config: result_dir = os.path.join(args.out_dir, os.path.basename(config.log_dir), f"{mode}") else: result_dir = os.path.join(args.out_dir, "test_result", f"{mode}") # create an empty directory to save generated results _ = nm.Monitor(result_dir) # load the header images. header = imread("imgs/header_combined.png", channel_first=True) generated_images = list() # compute these in advance and reuse nn.forward_all([kp_source["value"], kp_source["jacobian"]], clear_buffer=True) nn.forward_all([kp_driving_initial["value"], kp_driving_initial["jacobian"]], clear_buffer=True) num_of_driving_frames = driving_video.shape[0] for frame_idx in tqdm(range(num_of_driving_frames)): driving.d = driving_video[frame_idx][:3, ] nn.forward_all([generated["prediction"], generated["deformed"]], clear_buffer=True) if args.detailed: # visualize source w/kp, driving w/kp, deformed source, generated w/kp, generated image, occlusion map visualization = visualizer.visualize( source=source.d, driving=driving.d, out=generated) if args.full: visualization = reshape_result(visualization) # (H, W, C) combined_image = visualization.transpose(2, 0, 1) # (C, H, W) elif args.only_generated: combined_image = np.clip(generated["prediction"].d[0], 0.0, 1.0) combined_image = (255*combined_image).astype(np.uint8) # (C, H, W) else: # visualize source, driving, and generated image driving_fake = np.concatenate([np.clip(driving.d[0], 0.0, 1.0), np.clip(generated["prediction"].d[0], 0.0, 1.0)], axis=2) header_source = np.concatenate([np.clip(header / 255., 0.0, 1.0), np.clip(source.d[0], 0.0, 1.0)], axis=2) combined_image = np.concatenate( [header_source, driving_fake], axis=1) combined_image = (255*combined_image).astype(np.uint8) generated_images.append(combined_image) # once each video is generated, save it. output_filename = f"{os.path.splitext(os.path.basename(filename))[0]}.mp4" output_filename = f"{os.path.basename(args.source)}_by_{output_filename}" output_filename = output_filename.replace("#", "_") if args.output_png: monitor_vis = nm.MonitorImage(output_filename, nm.Monitor(result_dir), interval=1, num_images=1, normalize_method=lambda x: x) for frame_idx, img in enumerate(generated_images): monitor_vis.add(frame_idx, img) else: generated_images = [_.transpose(1, 2, 0) for _ in generated_images] # you might need to change ffmpeg_params according to your environment. mimsave(f'{os.path.join(result_dir, output_filename)}', generated_images, fps=args.fps, ffmpeg_params=["-pix_fmt", "yuv420p", "-vcodec", "libx264", "-f", "mp4", "-q", "0"]) return
def main(): """ main - driver code to run training for Zooming SloMo """ # Check NNabla version if get_nnabla_version_integer() < 11700: raise ValueError( 'This does not work with nnabla version less than v1.17.0 since deformable_conv layer is added in v1.17.0 . Please update the nnabla version.' ) conf = get_config() extension_module = conf.nnabla_context.context ctx = get_extension_context(extension_module, device_id=conf.nnabla_context.device_id) comm = CommunicatorWrapper(ctx) nn.set_default_context(comm.ctx) print("comm rank", comm.rank) # change max_iter, learning_rate and cosine_period when batch-size or no. of gpu devices change. default_batch_size = 12 train_scale_factor = comm.n_procs * \ (conf.train.batch_size / default_batch_size) max_iter = int(conf.train.max_iter // train_scale_factor) learning_rate = conf.train.learning_rate * \ (conf.train.batch_size / default_batch_size) cosine_period = int(conf.train.cosine_period // train_scale_factor) # for single-GPU training data_iterator_train = data_iterator(conf, shuffle=True) # for multi-GPU training if comm.n_procs > 1: data_iterator_train = data_iterator_train.slice( rng=None, num_of_slices=comm.n_procs, slice_pos=comm.rank) # LR-LFR data for ZoomingSloMo input data_lr_lfr = nn.Variable( (conf.train.batch_size, (conf.data.n_frames // 2) + 1, 3, conf.data.lr_size, conf.data.lr_size)) # HR-HFR data for ZoomingSloMo ground truth data_gt = nn.Variable((conf.train.batch_size, conf.data.n_frames, 3, conf.data.gt_size, conf.data.gt_size)) if conf.train.only_slomo: ''' High resolution data as input to only-Slomo network for frame interpolation, hence we use lesser number of frames. ''' # LFR data for SloMo input, slomo_gt = data_gt input_to_slomo = slomo_gt[:, 0:conf.data.n_frames:2, :, :, :] # setting up monitors for logging monitor_path = './nnmonitor' monitor = Monitor(monitor_path) monitor_loss = MonitorSeries('loss', monitor, interval=conf.train.monitor_log_freq) monitor_lr = MonitorSeries('learning rate', monitor, interval=conf.train.monitor_log_freq) monitor_time = MonitorTimeElapsed("training time per iteration", monitor, interval=conf.train.monitor_log_freq) scope_name = "ZoomingSloMo" if not conf.train.only_slomo else "SloMo" with nn.parameter_scope(scope_name): if conf.train.only_slomo: generated_frame = zooming_slo_mo_network(input_to_slomo, conf.train.only_slomo) diff = generated_frame - slomo_gt else: generated_frame = zooming_slo_mo_network(data_lr_lfr, conf.train.only_slomo) diff = generated_frame - data_gt # Charbonnier loss loss = F.sum((diff * diff + conf.train.eps)**0.5) # Define optimizer solver = S.Adam(alpha=learning_rate, beta1=conf.train.beta1, beta2=conf.train.beta2) # Set Parameters with nn.parameter_scope(scope_name): solver.set_parameters(nn.get_parameters()) solver_dict = {scope_name: solver} if comm.rank == 0: print("maximum iterations", max_iter) start_point = 0 if conf.train.checkpoint: # Load optimizer/solver information and model weights from checkpoint print("Loading weights from checkpoint:", conf.train.checkpoint) with nn.parameter_scope(scope_name): start_point = load_checkpoint(conf.train.checkpoint, solver_dict) if not os.path.isdir(conf.data.output_dir): os.makedirs(conf.data.output_dir) # Training loop. for i in range(start_point, max_iter): # Get Training Data if conf.train.only_slomo: _, data_gt.d = data_iterator_train.next() else: data_lr_lfr.d, data_gt.d = data_iterator_train.next() l_rate = get_repeated_cosine_annealing_learning_rate( i, learning_rate, conf.train.eta_min, cosine_period, conf.train.cosine_num_period) # Update solver.zero_grad() solver.set_learning_rate(l_rate) loss.forward(clear_no_need_grad=True) if comm.n_procs > 1: all_reduce_callback = comm.get_all_reduce_callback() loss.backward(clear_buffer=True, communicator_callbacks=all_reduce_callback) else: loss.backward(clear_buffer=True) solver.update() if comm.rank == 0: monitor_loss.add(i, loss.d.copy()) monitor_lr.add(i, l_rate) monitor_time.add(i) if (i % conf.train.save_checkpoint_freq) == 0: # Save intermediate check_points with nn.parameter_scope(scope_name): save_checkpoint(conf.data.output_dir, i, solver_dict) # Save final model parameters if comm.rank == 0: with nn.parameter_scope(scope_name): nn.save_parameters( os.path.join(conf.data.output_dir, "final_model.h5"))
def build_static_graph(self): real_img = nn.Variable(shape=(self.batch_size, 3, self.img_size, self.img_size)) noises = [ F.randn(shape=(self.batch_size, self.config['latent_dim'])) for _ in range(2) ] if self.few_shot_config['common']['type'] == 'cdc': NT_class = NoiseTop(n_train=self.train_loader.size, latent_dim=self.config['latent_dim'], batch_size=self.batch_size) noises = NT_class() self.PD_switch_var = NT_class.PD_switch_var if self.config['regularize_gen']: fake_img, dlatents = self.generator(self.batch_size, noises, return_latent=True) else: fake_img = self.generator(self.batch_size, noises) fake_img_test = self.generator_ema(self.batch_size, noises) if self.few_shot_config['common']['type'] != 'cdc': fake_disc_out = self.discriminator(fake_img) real_disc_out = self.discriminator(real_img) disc_loss = disc_logistic_loss(real_disc_out, fake_disc_out) gen_loss = 0 if self.few_shot_config['common']['type'] == 'cdc': fake_img_s = self.generator_s(self.batch_size, noises) cdc_loss = CrossDomainCorrespondence( fake_img, fake_img_s, _choice_num=self.few_shot_config['cdc']['feature_num'], _layer_fix_switch=self.few_shot_config['cdc']['layer_fix']) gen_loss += self.few_shot_config['cdc']['lambda'] * cdc_loss # --- PatchDiscriminator --- fake_disc_out, fake_feature_var = self.discriminator( fake_img, patch_switch=True, index=0) real_disc_out, real_feature_var = self.discriminator( real_img, patch_switch=True, index=0) disc_loss = disc_logistic_loss(real_disc_out, fake_disc_out) disc_loss_patch = disc_logistic_loss(fake_feature_var, real_feature_var) disc_loss += self.PD_switch_var * disc_loss_patch gen_loss += gen_nonsaturating_loss(fake_disc_out) var_name_list = [ 'real_img', 'noises', 'fake_img', 'gen_loss', 'disc_loss', 'fake_disc_out', 'real_disc_out', 'fake_img_test' ] var_list = [ real_img, noises, fake_img, gen_loss, disc_loss, fake_disc_out, real_disc_out, fake_img_test ] if self.config['regularize_gen']: dlatents.need_grad = True mean_path_length = nn.Variable() pl_reg, path_mean, _ = gen_path_regularize( fake_img=fake_img, latents=dlatents, mean_path_length=mean_path_length) path_mean_update = F.assign(mean_path_length, path_mean) path_mean_update.name = 'path_mean_update' pl_reg += 0 * path_mean_update gen_loss_reg = gen_loss + pl_reg var_name_list.append('gen_loss_reg') var_list.append(gen_loss_reg) if self.config['regularize_disc']: real_img.need_grad = True real_disc_out = self.discriminator(real_img) disc_loss_reg = disc_loss + self.config[ 'r1_coeff'] * 0.5 * disc_r1_loss( real_disc_out, real_img) * self.config['disc_reg_step'] real_img.need_grad = False var_name_list.append('disc_loss_reg') var_list.append(disc_loss_reg) Parameters = namedtuple('Parameters', var_name_list) self.parameters = Parameters(*var_list)
def __call__(self, outputs, inputs, grad_outputs=None, persistent_outputs=[], bind_grad_output=False): """ The logic of this method is almost same as one in visit_function_backward in C++ layer. """ # TODO: address test in the dynamic graph mode # TODO: address inplace-Function and its test # TODO: address auto_forward is very slow. It may be python overhead since small diff when BS is large. # TODO: address auto_forward consumes lots of memory, need to call v.get_unlinked_variable()? # TODO: address auto_forward consumes lots of memory, need to use NdArray as inputs? # TODO: NHWC format # TODO: Half # TODO: address `set default context` # Check outputs/inputs outputs = self._force_list(outputs) if not all([isinstance(o, nn.Variable) for o in outputs]): raise ValueError("Element of outputs must be `nnabla.Variable`.") inputs = self._force_list(inputs) if not all([isinstance(i, nn.Variable) for i in inputs]): raise ValueError("Element of inputs must be `nnabla.Variable`.") # Check grad_outputs if grad_outputs is None: grad_outputs = [None] * len(outputs) elif isinstance(grad_outputs, (int, float, np.ndarray, nn.NdArray)): grad_outputs = self._force_list(grad_outputs) elif isinstance(grad_outputs, list): if len(outputs) != len(grad_outputs): raise ValueError( "Length of `grad_outputs` and lenght of `outputs` must be the same." ) for i in range(len(outputs)): o = outputs[i] go = grad_outputs[i] if not isinstance( go, (type(None), int, float, np.ndarray, nn.NdArray)): raise ValueError( "Element of `grad_outputs` must be " "in (`None`, `int`, `float`, `numpy.ndarray`, `nnabla.NdArray`) or " "list of (`None`, `int`, `float`, `numpy.ndarray`, `nnabla.NdArray`)\n" "type(grad_outputs[{}] = {}".format(i, type(go))) elif isinstance(go, np.ndarray) and go.shape != o.shape: raise ValueError( "Shape of each of outputs and grad_outputs must be same.\n" "output[{}]({}) != grad_output[{}]({})".format( i, o.shape, i, go.shape)) elif isinstance(go, nn.NdArray) and go.shape != o.shape: raise ValueError( "Shape of each of outputs and grad_outputs must be same.\n" "output[{}]({}) != grad_output[{}]({})".format( i, o.shape, i, go.shape)) # Check persistent_outputs if len(persistent_outputs) != 0 and len(outputs) != len( persistent_outputs): raise ValueError( "Length of outputs and persistent_outputs " "must be the same except for " "the case that the lenght of the persistent_outputs is 0.") # Persistent outputs since outputs are basically losses to be monitored persistent_outputs = [True] * len( outputs) if persistent_outputs == [] else persistent_outputs for o, p in zip(outputs, persistent_outputs): o.persistent = p # Set grad_outputs for i in range(len(outputs)): o = outputs[i] go = grad_outputs[i] if go is None: pass elif isinstance(go, (int, float)): grad_output = nn.Variable(o.shape).apply(d=go, need_grad=False) outputs[i] = o * grad_output elif isinstance(go, np.ndarray): grad_output = nn.Variable(o.shape).apply(d=go, need_grad=False) outputs[i] = o * grad_output elif isinstance(go, nn.NdArray): grad_output = nn.Variable(o.shape).apply(data=go, need_grad=False) outputs[i] = o * grad_output # Coerce to sum if there is multiple outputs output = sum(outputs) if len(outputs) != 1 else outputs[0] # Connect the forward and backward graph grad_output = GradEndFunction()(output).apply(need_grad=False) # Open list of next search candidate ids = {} def get_id(func): if func not in ids.keys(): size = len(ids) ids[func] = size return size return ids[func] open = set() func = output.parent open.add((-output.rank, get_id(func), func)) # Map for grad_variables consumed on the backward graph grad_vars = OrderedDict() # {F_fwd: {VO_fwd: [VI_bwd]}} grad_vars[func] = OrderedDict({output: [grad_output]}) # Return grads wrt_inputs = inputs grads = [None] * len(wrt_inputs) # Expand the forward graph to the backward graph while len(open) != 0: open = sorted(open) # python set is NOT sorted set. rank_func = open.pop(0) # 0 is necessary open = set(open) f = rank_func[2] if not f.need_grad: continue # Connect variables on the backward graph grad_outputs = self._connect_on_backward_graph(grad_vars, f) # Check grads w.r.t. inputs for inp, grad_out in zip(f.inputs, grad_outputs): if inp not in wrt_inputs or inp.need_grad == False: continue idx = wrt_inputs.index(inp) grads[idx] = grad_out if bind_grad_output: inp.grad = grad_out.data # Propagate down for inp in f.inputs: if not inp.need_grad: continue p_i = inp.parent if not p_i: continue open.add((-p_i.rank, get_id(p_i), p_i)) return grads
def test_save_load_broadcast(tmpdir, variable_batch_size): x = nn.Variable([10, 1, 4, 1, 8]) y = F.broadcast(x, shape=[10, 1, 4, 3, 8]) check_save_load(tmpdir, x, y, variable_batch_size)
def test_no_value(): a = nn.Variable(()) b = nn.Variable(()) with pytest.raises(RuntimeError): F.concatenate(*[a, b], axis=0)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify contexts for computation. * Initialize DataIterator. * Construct a computation graph for training and one for validation. * Initialize solver and set parameter variables to that. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ # Parse args args = get_args() n_train_samples = 50000 bs_valid = args.batch_size extension_module = args.context ctx = get_extension_context( extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) if args.net == "cifar10_resnet23": prediction = functools.partial( resnet23_prediction, ncls=10, nmaps=64, act=F.relu) data_iterator = data_iterator_cifar10 if args.net == "cifar100_resnet23": prediction = functools.partial( resnet23_prediction, ncls=100, nmaps=384, act=F.elu) data_iterator = data_iterator_cifar100 # Create training graphs test = False image_train = nn.Variable((args.batch_size, 3, 32, 32)) label_train = nn.Variable((args.batch_size, 1)) pred_train = prediction(image_train, test) loss_train = loss_function(pred_train, label_train) input_image_train = {"image": image_train, "label": label_train} # Create validation graph test = True image_valid = nn.Variable((bs_valid, 3, 32, 32)) pred_valid = prediction(image_valid, test) input_image_valid = {"image": image_valid} # Solvers solver = S.Adam() solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Data Iterator tdata = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) # save_nnp contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size) save.save(os.path.join(args.model_save_path, '{}_epoch0_result.nnp'.format(args.net)), contents) # Training-loop for i in range(start_point, args.max_iter): # Validation if i % int(n_train_samples / args.batch_size) == 0: ve = 0. for j in range(args.val_iter): image, label = vdata.next() input_image_valid["image"].d = image pred_valid.forward() ve += categorical_error(pred_valid.d, label) ve /= args.val_iter monitor_verr.add(i, ve) if int(i % args.model_save_interval) == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Forward/Zerograd/Backward image, label = tdata.next() input_image_train["image"].d = image input_image_train["label"].d = label loss_train.forward() solver.zero_grad() loss_train.backward() # Solvers update solver.update() e = categorical_error( pred_train.d, input_image_train["label"].d) monitor_loss.add(i, loss_train.d.copy()) monitor_err.add(i, e) monitor_time.add(i) nn.save_parameters(os.path.join(args.model_save_path, 'params_%06d.h5' % (args.max_iter))) # save_nnp_lastepoch contents = save_nnp({'x': image_valid}, {'y': pred_valid}, args.batch_size) save.save(os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), contents)
shuffle=True, with_file_cache=False) valid_data_iter = data_iterator_simple(load_valid_func, len(x_valid), batch_size, shuffle=True, with_file_cache=False) char_embedding_dim = 16 lstm_size = 650 filters = [50, 150, 200, 200] filster_sizes = [1, 3, 5, 7] # filters = [50, 100, 150, 200, 200, 200, 200] # filster_sizes = [1, 2, 3, 4, 5, 6, 7] x = nn.Variable((batch_size, sentence_length, word_length)) h = PF.embed(x, char_vocab_size, char_embedding_dim) h = F.transpose(h, (0, 3, 1, 2)) output = [] for f, f_size in zip(filters, filster_sizes): _h = PF.convolution(h, f, kernel=(1, f_size), pad=(0, f_size // 2), name='conv_{}'.format(f_size)) _h = F.max_pooling(_h, kernel=(1, word_length)) output.append(_h) h = F.concatenate(*output, axis=1) h = F.transpose(h, (0, 2, 1, 3)) h = F.reshape(h, (batch_size, sentence_length, sum(filters))) # h = PF.batch_normalization(h, axes=[2])
def test_global_avgpool_module(): shape = (10, 3, 32, 32) input = smo.Input(nn.Variable(shape)) inp_module = smo.Input(value=input) pool = smo.GlobalAvgPool(parents=[inp_module]) assert pool.shape == (10, 3, 1, 1)
def main(): # Get arguments args = get_args() data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt" model_file = args.work_dir + "model.h5" # Load Dataset itow, wtoi, dataset = load_ptbset(data_file) # Computation environment settings from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create data provider n_word = len(wtoi) n_dim = args.embed_dim batchsize = args.batchsize half_window = args.half_window_length n_negative = args.n_negative_sample di = DataIteratorForEmbeddingLearning( batchsize=batchsize, half_window=half_window, n_negative=n_negative, dataset=dataset) # Create model # - Real batch size including context samples and negative samples size = batchsize * (1 + n_negative) * (2 * (half_window - 1)) # Model for learning # - input variables xl = nn.Variable((size,)) # variable for word yl = nn.Variable((size,)) # variable for context # Embed layers for word embedding function # - f_embed : word index x to get y, the n_dim vector # -- for each sample in a minibatch hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context hl = F.sum(hx * hy, axis=1) # -- Approximated likelihood of context prediction # pos: word context, neg negative samples tl = nn.Variable([size, ], need_grad=False) loss = F.sigmoid_cross_entropy(hl, tl) loss = F.mean(loss) # Model for test of searching similar words xr = nn.Variable((size,), need_grad=False) hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = M.Monitor(args.work_dir) monitor_loss = M.MonitorSeries( "Training loss", monitor, interval=args.monitor_interval) monitor_time = M.MonitorTimeElapsed( "Training time", monitor, interval=args.monitor_interval) # Do training max_epoch = args.max_epoch for epoch in range(max_epoch): # iteration per epoch for i in range(di.n_batch): # get minibatch xi, yi, ti = di.next() # learn solver.zero_grad() xl.d, yl.d, tl.d = xi, yi, ti loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() # monitor itr = epoch * di.n_batch + i monitor_loss.add(itr, loss.d) monitor_time.add(itr) # Save model nn.save_parameters(model_file) nnp_file = os.path.join( args.work_dir, 'wtov_%06d.nnp' % (args.max_epoch)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': size, 'outputs': {'e': hr}, 'names': {'w': xr}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['w'], 'output': ['e']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.work_dir, [xi], [xr], hr, nnp_file) exit() # Evaluate by similarity max_check_words = args.max_check_words for i in range(max_check_words): # prediction xr.d = i hr.forward(clear_buffer=True) h = hr.d # similarity calculation w = nn.get_parameters()['e1/embed/W'].d s = np.sqrt((w * w).sum(1)) w /= s.reshape((s.shape[0], 1)) similarity = w.dot(h[0]) / s[i] # for understanding output_similar_words(itow, i, similarity)
def prod_sum(inputs0, inputs1): out = 0.0 for inp0, inp1 in zip(inputs0, inputs1): out += inp0 * nn.Variable(inp1.shape).apply(data=inp1) return out
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. margin = 1.0 # Margin for contrastive loss. # TRAIN # Create input variables. image0 = nn.Variable([args.batch_size, 1, 28, 28]) image1 = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size]) # Create predition graph. pred = mnist_lenet_siamese(image0, image1, test=False) # Create loss function. loss = F.mean(contrastive_loss(pred, label, margin)) # TEST # Create input variables. vimage0 = nn.Variable([args.batch_size, 1, 28, 28]) vimage1 = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size]) # Create predition graph. vpred = mnist_lenet_siamese(vimage0, vimage1, test=True) vloss = F.mean(contrastive_loss(vpred, vlabel, margin)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10) # Initialize DataIterator for MNIST. rng = np.random.RandomState(313) data = siamese_data_iterator(args.batch_size, True, rng) vdata = siamese_data_iterator(args.batch_size, False, rng) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage0.d, vimage1.d, vlabel.d = vdata.next() vloss.forward(clear_buffer=True) ve += vloss.d monitor_vloss.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) image0.d, image1.d, label.d = data.next() solver.zero_grad() # Training forward, backward and update loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, loss.d.copy()) monitor_time.add(i) nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % args.max_iter))
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path #nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_522000.h5") #z.d = np.random.randn(*z.shape) #gen.forward() #for i in range(40): # Image.fromarray(np.uint8((gen.d[i][0]+1)*255/2.0)).save("./test/"+str(i)+".png") # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) x_ref = nn.Variable([args.batch_size, 1, 28, 28]) #vec = nn.Variable([args.batch_size, 100]) pred_vec = vectorizer(x, maxh=1024, test=False) print pred_vec.shape #z = pred_vec.reshape((args.batch_size, 100, 1, 1)) #gen = generator(z,test=True) gen = generator(pred_vec, maxh=1024, test=False) gen.persistent = True with nn.parameter_scope("gen"): #nn.load_parameters("/home/mizuochi/programing/font/dcgan_model_0220/generator_param_290000.h5") nn.load_parameters( "/home/mizuochi/programing/font/tmp.monitor.dcgan1000/generator_param_458000.h5" ) #loss_dis = F.mean(F.sigmoid_cross_entropy(pred_vec, vec)) print "x_ref shape", x_ref.shape print "gen shape", gen.shape #loss_dis = F.mean(F.squared_error(x_ref.reshape((64,28*28)), gen.reshape((64,28*28)))) loss_dis = F.mean(F.squared_error(x_ref, gen)) print loss_dis.d # Create Solver. solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i)) # Training forward buf, buf2 = data.next() x.d = buf * 2.0 / 255. - 1.0 x_ref.d = buf * 2.0 / 255. - 1.0 # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i))
def train(args): """ Main script. """ # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.get_unlinked_variable(need_grad=True) fake_dis.need_grad = True # TODO: Workaround until v1.0.2 pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint files. start_point = load_checkpoint(args.checkpoint, { "gen": solver_gen, "dis": solver_dis }) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.) data = data_iterator_mnist(args.batch_size, True) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.model_save_interval == 0: save_checkpoint(args.model_save_path, i, { "gen": solver_gen, "dis": solver_dis }) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'), contents)