def train(generator, discriminator, patch_gan, solver_gen, solver_dis, weight_l1, train_iterator, val_iterator, epoch, monitor, interval): # Create Network Graph # for training im, la = train_iterator.next() # for checking image shape real = nn.Variable(im.shape) # real x = nn.Variable(la.shape) # x # for validation real_val = nn.Variable(im.shape) # real x_val = nn.Variable(la.shape) # x # Generator fake = generator(x, test=False) # pix2pix infers just like training mode. fake_val = generator(x_val, test=False) fake_val.persistent = True # Keep to visualize # Discriminator fake_y = discriminator(x, fake, patch_gan=patch_gan, test=False) real_y = discriminator(x, real, patch_gan=patch_gan, test=False) real_target = nn.Variable(fake_y.shape) real_target.data.fill(1) fake_target = nn.Variable(real_y.shape) fake_target.data.zero() loss_gen = F.mean(weight_l1 * F.abs(real - fake)) + \ F.mean(F.sigmoid_cross_entropy(fake_y, real_target)) loss_dis = F.mean( F.sigmoid_cross_entropy(real_y, real_target) + F.sigmoid_cross_entropy(fake_y, fake_target)) # Setting Solvers with nn.parameter_scope('generator'): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope('discriminator'): solver_dis.set_parameters(nn.get_parameters()) # Create Monitors monitors = { 'loss_gen': nm.MonitorSeries("Generator loss", monitor, interval=interval), 'loss_dis': nm.MonitorSeries("Discriminator loss", monitor, interval=interval), 'time': nm.MonitorTimeElapsed("Training time", monitor, interval=interval), 'fake': nm.MonitorImageTile( "Fake images", monitor, interval=interval, num_images=2, normalize_method=lambda x: np.clip(np.divide(x, 255.0), 0.0, 1.0)), } i = 0 for e in range(epoch): logger.info('Epoch = {}'.format(e)) # Training while e == train_iterator.epoch: # forward / backward process real.d, x.d = train_iterator.next() solver_dis.zero_grad() solver_gen.zero_grad() # Discriminator loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.update() # Generator loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.update() monitors['time'].add(i) monitors['loss_gen'].add(i, loss_gen.d.copy()) monitors['loss_dis'].add(i, loss_dis.d.copy()) # Validation real_val.d, x_val.d = val_iterator.next() fake_val.forward() pix2pix_vis = np.stack( [label_to_image(x_val.d), normalize_image(fake_val.d)], axis=1).reshape((-1, ) + fake.shape[1:]) monitors['fake'].add(i, pix2pix_vis) i += 1 # save parameters of generator save_path = os.path.join(monitor._save_path, 'generator_model_{}.h5'.format(i)) with nn.parameter_scope('generator'): nn.save_parameters(save_path) return save_path
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path x1 = nn.Variable([args.batch_size, 1, 28, 28]) #z = nn.Variable([args.batch_size, VEC_SIZE, 1, 1]) #z = vectorizer(x1,maxh = 1024) #fake = generator(z,maxh= 1024) z = vectorizer(x1) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) loss_vec = F.mean(F.squared_error(fake, x1)) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) solver_vec = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("vec"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec1 = M.MonitorImageTile("vec images1", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec2 = M.MonitorImageTile("vec images2", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x1.d = image / 255. - 0.5 # Generator update. solver_vec.zero_grad() loss_vec.forward(clear_no_need_grad=True) loss_vec.backward(clear_buffer=True) solver_vec.weight_decay(args.weight_decay) solver_vec.update() monitor_vec1.add(i, fake) monitor_vec2.add(i, x1) monitor_loss_vec.add(i, loss_vec.d.copy()) x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i))
# パラメータスコープの使い方を見ておく。 print(len(nn.get_parameters())) with nn.parameter_scope("gen"): print(len(nn.get_parameters())) # パラメータスコープ内では、`get_parameters()`で取得できるパラメータがフィルタリングされ # る。 # モニターの設定 path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_loss_gen = M.MonitorSeries("generator_loss", monitor, interval=100) monitor_loss_dis = M.MonitorSeries("discriminator_loss", monitor, interval=100) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.0) # パラメータ保存関数の定義 def save_parameters(i): with nn.parameter_scope("gen"): nn.save_parameters(os.path.join(path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(path, "discriminator_param_%06d.h5" % i)) # 訓練の実行 def train(max_iter): data = I.data_iterator_mnist(batch_size, True)
def train(args): x1 = nn.Variable([args.batch_size, 1, 28, 28]) z_vec = vectorizer(x1) z = z_vec.unlinked() fake2 = generator(z_vec) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) loss_vec = F.mean(F.squared_error(fake2, x1)) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) solver_vec = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("vec"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_vec.set_parameters(nn.get_parameters()) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_loss_vec = M.MonitorSeries("Vectorizer loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec1 = M.MonitorImageTile("vec images1", monitor, normalize_method=lambda x: x + 1 / 2.) monitor_vec2 = M.MonitorImageTile("vec images2", monitor, normalize_method=lambda x: x + 1 / 2.) #data = data_iterator_mnist(args.batch_size, True) data = iterator.simple_data_iterator(load_kanji_data(), args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) with nn.parameter_scope("vec"): nn.save_parameters( os.path.join(args.model_save_path, "vectorizer_param_%06d.h5" % i)) # Training forward image, _ = data.next() x1.d = image / 255. * 2 - 1.0 # Generator update. solver_vec.zero_grad() loss_vec.forward(clear_no_need_grad=True) loss_vec.backward(clear_buffer=True) solver_vec.weight_decay(args.weight_decay) solver_vec.update() fake2.forward() monitor_vec1.add(i, fake2) monitor_vec2.add(i, x1) monitor_loss_vec.add(i, loss_vec.d.copy()) x.d = image / 255. * 2 - 1.0 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i)
def train(args): """ Main script. """ # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.get_unlinked_variable(need_grad=True) fake_dis.need_grad = True # TODO: Workaround until v1.0.2 pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint files. start_point = load_checkpoint(args.checkpoint, { "gen": solver_gen, "dis": solver_dis }) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: (x + 1) / 2.) data = data_iterator_mnist(args.batch_size, True) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Generator_result_epoch0.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save( os.path.join(args.model_save_path, 'Discriminator_result_epoch0.nnp'), contents) # Training loop. for i in range(start_point, args.max_iter): if i % args.model_save_interval == 0: save_checkpoint(args.model_save_path, i, { "gen": solver_gen, "dis": solver_dis }) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Save_nnp contents = save_nnp({'x': z}, {'y': fake}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Generator_result.nnp'), contents) contents = save_nnp({'x': x}, {'y': pred_real}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Discriminator_result.nnp'), contents)
monitor = M.Monitor(opt.monitor_path) monitor_loss_cyc = M.MonitorSeries('Cycle loss', monitor, interval=opt.monitor_interval) monitor_loss_gen = M.MonitorSeries('Generator loss', monitor, interval=opt.monitor_interval) monitor_loss_dis = M.MonitorSeries('Discriminator loss', monitor, interval=opt.monitor_interval) monitor_time = M.MonitorTimeElapsed('Time', monitor, interval=opt.monitor_interval) monitor_A = M.MonitorImageTile('Fake images_A', monitor, normalize_method=lambda x: x + 1 / 2., interval=opt.generate_interval) monitor_B = M.MonitorImageTile('Fake images_B', monitor, normalize_method=lambda x: x + 1 / 2., interval=opt.generate_interval) # training loop for i in range(opt.max_iter): (x_A, x_AB, x_ABA, x_B, x_BA, x_BAB, loss_cyc, loss_gen, loss_dis) = updater.update(i) As = np.concatenate((x_A.d, x_AB.d, x_ABA.d), axis=3) Bs = np.concatenate((x_B.d, x_BA.d, x_BAB.d), axis=3) monitor_A.add(i, As)
def train(args): from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) x = nn.Variable([1, 3, SIZE, SIZE]) y = network(x) dataIn = makeInput() x.d = dataIn.copy() y.forward() img = makePng(y.d) img.save(os.path.join(args.model_save_path, "first.png")) output = nn.Variable([1, 3, SIZE, SIZE]) dataOut = makeOutput("test.png") output.d = dataOut #loss = F.mean(F.sigmoid_cross_entropy(y, output)) loss = F.mean(F.squared_error(y, output)) param = nn.get_parameters() for i, j in param.items(): param.get(i).d = np.random.randn(*(j.d.shape)) solver = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("net"): solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_gen = M.MonitorImageTile("gen images", monitor) #data = data_iterator_mnist(args.batch_size, True) with nn.parameter_scope("net"): param = nn.get_parameters() print param.get("conv0/conv/W").d.reshape((16, 16))[:10, :10] # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("net"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) # Training forward x.d = dataIn.copy() # Generator update. solver.zero_grad() loss.forward(clear_no_need_grad=True) if i % 10 == 0: img = makePng(y.d) img.save(os.path.join(args.model_save_path, "output_%06d.png" % i)) #print "max",max(y.d.flatten()),"min",min(y.d.flatten()) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_gen.add(i, y) monitor_loss_gen.add(i, loss.d.copy()) monitor_time.add(i) with nn.parameter_scope("net"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) return
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean( F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries("Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile("Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join(args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [{ 'name': 'Generator', 'batch_size': args.batch_size, 'outputs': { 'G': fake }, 'names': { 'z': z } }, { 'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': { 'D': pred_real }, 'names': { 'x': x } }], 'executors': [{ 'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G'] }, { 'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D'] }] } save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")