def train(args): with fluid.dygraph.guard(): max_images_num = data_reader.max_images_num() shuffle = True data_shape = [-1] + data_reader.image_shape() print(data_shape) if args.ce: print("ce mode") seed = 33 random.seed(seed) np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed shuffle = False A_pool = ImagePool() B_pool = ImagePool() A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle), args.batch_size)() B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle), args.batch_size)() A_test_reader = data_reader.a_test_reader() B_test_reader = data_reader.b_test_reader() cycle_gan = Cycle_Gan("cycle_gan", istrain=True) losses = [[], []] t_time = 0 optimizer1 = optimizer_setting() optimizer2 = optimizer_setting() optimizer3 = optimizer_setting() for epoch in range(args.epoch): batch_id = 0 for i in range(max_images_num): data_A = next(A_reader) data_B = next(B_reader) s_time = time.time() data_A = np.array([data_A[0].reshape(3, 256, 256)]).astype("float32") data_B = np.array([data_B[0].reshape(3, 256, 256)]).astype("float32") data_A = to_variable(data_A) data_B = to_variable(data_B) # optimize the g_A network fake_A, fake_B, cyc_A, cyc_B, g_A_loss, g_B_loss, idt_loss_A, idt_loss_B, cyc_A_loss, cyc_B_loss, g_loss = cycle_gan( data_A, data_B, True, False, False) g_loss_out = g_loss.numpy() g_loss.backward() vars_G = [] for param in cycle_gan.parameters(): if param.name[: 52] == "cycle_gan/Cycle_Gan_0/build_generator_resnet_9blocks": vars_G.append(param) optimizer1.minimize(g_loss, parameter_list=vars_G) cycle_gan.clear_gradients() fake_pool_B = B_pool.pool_image(fake_B).numpy() fake_pool_B = np.array([fake_pool_B[0].reshape(3, 256, 256) ]).astype("float32") fake_pool_B = to_variable(fake_pool_B) fake_pool_A = A_pool.pool_image(fake_A).numpy() fake_pool_A = np.array([fake_pool_A[0].reshape(3, 256, 256) ]).astype("float32") fake_pool_A = to_variable(fake_pool_A) # optimize the d_A network rec_B, fake_pool_rec_B = cycle_gan(data_B, fake_pool_B, False, True, False) d_loss_A = (fluid.layers.square(fake_pool_rec_B) + fluid.layers.square(rec_B - 1)) / 2.0 d_loss_A = fluid.layers.reduce_mean(d_loss_A) d_loss_A.backward() vars_da = [] for param in cycle_gan.parameters(): if param.name[: 47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_0": vars_da.append(param) optimizer2.minimize(d_loss_A, parameter_list=vars_da) cycle_gan.clear_gradients() # optimize the d_B network rec_A, fake_pool_rec_A = cycle_gan(data_A, fake_pool_A, False, False, True) d_loss_B = (fluid.layers.square(fake_pool_rec_A) + fluid.layers.square(rec_A - 1)) / 2.0 d_loss_B = fluid.layers.reduce_mean(d_loss_B) d_loss_B.backward() vars_db = [] for param in cycle_gan.parameters(): if param.name[: 47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_1": vars_db.append(param) optimizer3.minimize(d_loss_B, parameter_list=vars_db) cycle_gan.clear_gradients() batch_time = time.time() - s_time t_time += batch_time print( "epoch{}; batch{}; g_loss:{}; d_A_loss: {}; d_B_loss:{} ; \n g_A_loss: {}; g_A_cyc_loss: {}; g_A_idt_loss: {}; g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {};Batch_time_cost: {:.2f}" .format(epoch, batch_id, g_loss_out[0], d_loss_A.numpy()[0], d_loss_B.numpy()[0], g_A_loss.numpy()[0], cyc_A_loss.numpy()[0], idt_loss_A.numpy()[0], g_B_loss.numpy()[0], cyc_B_loss.numpy()[0], idt_loss_B.numpy()[0], batch_time)) with open('logging_train.txt', 'a') as log_file: now = time.strftime("%c") log_file.write( "time: {}; epoch{}; batch{}; d_A_loss: {}; g_A_loss: {}; \ g_A_cyc_loss: {}; g_A_idt_loss: {}; d_B_loss: {}; \ g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}; \ Batch_time_cost: {:.2f}\n" .format(now, epoch, \ batch_id, d_loss_A[0], g_A_loss[ 0], cyc_A_loss[0], \ idt_loss_A[0], d_loss_B[0], g_A_loss[0], \ cyc_B_loss[0], idt_loss_B[0], batch_time)) losses[0].append(g_A_loss[0]) losses[1].append(d_loss_A[0]) sys.stdout.flush() batch_id += 1 if args.ce and batch_id == 500: print("kpis\tg_loss\t%0.3f" % g_loss_out[0]) print("kpis\tg_A_loss\t%0.3f" % g_A_loss.numpy()[0]) print("kpis\tg_B_loss\t%0.3f" % g_B_loss.numpy()[0]) print("kpis\td_A_loss\t%0.3f" % d_loss_A.numpy()[0]) print("kpis\td_B_loss\t%0.3f" % d_loss_B.numpy()[0]) break if args.save_checkpoints: fluid.dygraph.save_persistables( cycle_gan.state_dict(), args.output + "/checkpoints/{}".format(epoch))
def train(args): max_images_num = data_reader.max_images_num() shuffle = True if args.run_ce: np.random.seed(10) fluid.default_startup_program().random_seed = 90 max_images_num = 1 shuffle = False data_shape = [-1] + data_reader.image_shape() input_A = fluid.layers.data(name='input_A', shape=data_shape, dtype='float32') input_B = fluid.layers.data(name='input_B', shape=data_shape, dtype='float32') fake_pool_A = fluid.layers.data(name='fake_pool_A', shape=data_shape, dtype='float32') fake_pool_B = fluid.layers.data(name='fake_pool_B', shape=data_shape, dtype='float32') g_A_trainer = GATrainer(input_A, input_B) g_B_trainer = GBTrainer(input_A, input_B) d_A_trainer = DATrainer(input_A, fake_pool_A) d_B_trainer = DBTrainer(input_B, fake_pool_B) # prepare environment place = fluid.CPUPlace() if args.use_gpu: place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) A_pool = ImagePool() B_pool = ImagePool() A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle), args.batch_size)() B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle), args.batch_size)() if not args.run_ce: A_test_reader = data_reader.a_test_reader() B_test_reader = data_reader.b_test_reader() def test(epoch): out_path = args.output + "/test" if not os.path.exists(out_path): os.makedirs(out_path) i = 0 for data_A, data_B in zip(A_test_reader(), B_test_reader()): A_name = data_A[1] B_name = data_B[1] tensor_A = fluid.LoDTensor() tensor_B = fluid.LoDTensor() tensor_A.set(data_A[0], place) tensor_B.set(data_B[0], place) fake_A_temp, fake_B_temp, cyc_A_temp, cyc_B_temp = exe.run( g_A_trainer.infer_program, fetch_list=[ g_A_trainer.fake_A, g_A_trainer.fake_B, g_A_trainer.cyc_A, g_A_trainer.cyc_B ], feed={ "input_A": tensor_A, "input_B": tensor_B }) fake_A_temp = np.squeeze(fake_A_temp[0]).transpose([1, 2, 0]) fake_B_temp = np.squeeze(fake_B_temp[0]).transpose([1, 2, 0]) cyc_A_temp = np.squeeze(cyc_A_temp[0]).transpose([1, 2, 0]) cyc_B_temp = np.squeeze(cyc_B_temp[0]).transpose([1, 2, 0]) input_A_temp = np.squeeze(data_A[0]).transpose([1, 2, 0]) input_B_temp = np.squeeze(data_B[0]).transpose([1, 2, 0]) imsave(out_path + "/fakeB_" + str(epoch) + "_" + A_name, ((fake_B_temp + 1) * 127.5).astype(np.uint8)) imsave(out_path + "/fakeA_" + str(epoch) + "_" + B_name, ((fake_A_temp + 1) * 127.5).astype(np.uint8)) imsave(out_path + "/cycA_" + str(epoch) + "_" + A_name, ((cyc_A_temp + 1) * 127.5).astype(np.uint8)) imsave(out_path + "/cycB_" + str(epoch) + "_" + B_name, ((cyc_B_temp + 1) * 127.5).astype(np.uint8)) imsave(out_path + "/inputA_" + str(epoch) + "_" + A_name, ((input_A_temp + 1) * 127.5).astype(np.uint8)) imsave(out_path + "/inputB_" + str(epoch) + "_" + B_name, ((input_B_temp + 1) * 127.5).astype(np.uint8)) i += 1 def checkpoints(epoch): out_path = args.output + "/checkpoints/" + str(epoch) if not os.path.exists(out_path): os.makedirs(out_path) fluid.io.save_persistables(exe, out_path + "/g_a", main_program=g_A_trainer.program) fluid.io.save_persistables(exe, out_path + "/g_b", main_program=g_B_trainer.program) fluid.io.save_persistables(exe, out_path + "/d_a", main_program=d_A_trainer.program) fluid.io.save_persistables(exe, out_path + "/d_b", main_program=d_B_trainer.program) print("saved checkpoint to {}".format(out_path)) sys.stdout.flush() def init_model(): assert os.path.exists( args.init_model), "[%s] cann't be found." % args.init_mode fluid.io.load_persistables(exe, args.init_model + "/g_a", main_program=g_A_trainer.program) fluid.io.load_persistables(exe, args.init_model + "/g_b", main_program=g_B_trainer.program) fluid.io.load_persistables(exe, args.init_model + "/d_a", main_program=d_A_trainer.program) fluid.io.load_persistables(exe, args.init_model + "/d_b", main_program=d_B_trainer.program) print("Load model from {}".format(args.init_model)) if args.init_model: init_model() losses = [[], []] t_time = 0 build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = False build_strategy.memory_optimize = False exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 exec_strategy.use_experimental_executor = True g_A_trainer_program = fluid.CompiledProgram( g_A_trainer.program).with_data_parallel( loss_name=g_A_trainer.g_loss_A.name, build_strategy=build_strategy, exec_strategy=exec_strategy) g_B_trainer_program = fluid.CompiledProgram( g_B_trainer.program).with_data_parallel( loss_name=g_B_trainer.g_loss_B.name, build_strategy=build_strategy, exec_strategy=exec_strategy) d_B_trainer_program = fluid.CompiledProgram( d_B_trainer.program).with_data_parallel( loss_name=d_B_trainer.d_loss_B.name, build_strategy=build_strategy, exec_strategy=exec_strategy) d_A_trainer_program = fluid.CompiledProgram( d_A_trainer.program).with_data_parallel( loss_name=d_A_trainer.d_loss_A.name, build_strategy=build_strategy, exec_strategy=exec_strategy) for epoch in range(args.epoch): batch_id = 0 for i in range(max_images_num): data_A = next(A_reader) data_B = next(B_reader) tensor_A = fluid.LoDTensor() tensor_B = fluid.LoDTensor() tensor_A.set(data_A, place) tensor_B.set(data_B, place) s_time = time.time() # optimize the g_A network g_A_loss, fake_B_tmp = exe.run( g_A_trainer_program, fetch_list=[g_A_trainer.g_loss_A, g_A_trainer.fake_B], feed={ "input_A": tensor_A, "input_B": tensor_B }) fake_pool_B = B_pool.pool_image(fake_B_tmp) # optimize the d_B network d_B_loss = exe.run(d_B_trainer_program, fetch_list=[d_B_trainer.d_loss_B], feed={ "input_B": tensor_B, "fake_pool_B": fake_pool_B })[0] # optimize the g_B network g_B_loss, fake_A_tmp = exe.run( g_B_trainer_program, fetch_list=[g_B_trainer.g_loss_B, g_B_trainer.fake_A], feed={ "input_A": tensor_A, "input_B": tensor_B }) fake_pool_A = A_pool.pool_image(fake_A_tmp) # optimize the d_A network d_A_loss = exe.run(d_A_trainer_program, fetch_list=[d_A_trainer.d_loss_A], feed={ "input_A": tensor_A, "fake_pool_A": fake_pool_A })[0] batch_time = time.time() - s_time t_time += batch_time print( "epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {}; " "Batch_time_cost: {}".format(epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0], d_A_loss[0], batch_time)) losses[0].append(g_A_loss[0]) losses[1].append(d_A_loss[0]) sys.stdout.flush() batch_id += 1 if args.run_test and not args.run_ce: test(epoch) if args.save_checkpoints and not args.run_ce: checkpoints(epoch) if args.run_ce: print("kpis,g_train_cost,{}".format(np.mean(losses[0]))) print("kpis,d_train_cost,{}".format(np.mean(losses[1]))) print("kpis,duration,{}".format(t_time / args.epoch))
def train(args): with fluid.dygraph.guard(): max_images_num = data_reader.max_images_num() shuffle = True data_shape = [-1] + data_reader.image_shape() #print(data_shape) A_pool = ImagePool() B_pool = ImagePool() A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle), args.batch_size)() B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle), args.batch_size)() A_test_reader = data_reader.a_test_reader() B_test_reader = data_reader.b_test_reader() cycle_gan = Cycle_Gan("cycle_gan", istrain=True) losses = [[], []] t_time = 0 optimizer1 = optimizer_setting() optimizer2 = optimizer_setting() optimizer3 = optimizer_setting() for epoch in range(args.epoch): pro_batch_time = AverageMeter('Time', ':6.3f') pro_data_time = AverageMeter('Data', ':6.3f') progress = ProgressMeter(max_images_num, pro_batch_time, pro_data_time, prefix="epoch: [{}]".format(epoch)) batch_id = 0 end = Tools.time() for i in range(max_images_num): data_A = next(A_reader) data_B = next(B_reader) pro_data_time.update(Tools.time() - end) s_time = time.time() data_A = np.array([data_A[0].reshape(3, 256, 256)]).astype("float32") data_B = np.array([data_B[0].reshape(3, 256, 256)]).astype("float32") data_A = to_variable(data_A) data_B = to_variable(data_B) # optimize the g_A network fake_A, fake_B, cyc_A, cyc_B, g_A_loss, g_B_loss, idt_loss_A, idt_loss_B, cyc_A_loss, cyc_B_loss, g_loss = cycle_gan( data_A, data_B, True, False, False) g_loss_out = g_loss.numpy() g_loss.backward() vars_G = [] for param in cycle_gan.parameters(): if param.name[: 52] == "cycle_gan/Cycle_Gan_0/build_generator_resnet_9blocks": vars_G.append(param) optimizer1.minimize(g_loss, parameter_list=vars_G) cycle_gan.clear_gradients() fake_pool_B = B_pool.pool_image(fake_B).numpy() fake_pool_B = np.array([fake_pool_B[0].reshape(3, 256, 256) ]).astype("float32") fake_pool_B = to_variable(fake_pool_B) fake_pool_A = A_pool.pool_image(fake_A).numpy() fake_pool_A = np.array([fake_pool_A[0].reshape(3, 256, 256) ]).astype("float32") fake_pool_A = to_variable(fake_pool_A) # optimize the d_A network rec_B, fake_pool_rec_B = cycle_gan(data_B, fake_pool_B, False, True, False) d_loss_A = (fluid.layers.square(fake_pool_rec_B) + fluid.layers.square(rec_B - 1)) / 2.0 d_loss_A = fluid.layers.reduce_mean(d_loss_A) d_loss_A.backward() vars_da = [] for param in cycle_gan.parameters(): if param.name[: 47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_0": vars_da.append(param) optimizer2.minimize(d_loss_A, parameter_list=vars_da) cycle_gan.clear_gradients() # optimize the d_B network rec_A, fake_pool_rec_A = cycle_gan(data_A, fake_pool_A, False, False, True) d_loss_B = (fluid.layers.square(fake_pool_rec_A) + fluid.layers.square(rec_A - 1)) / 2.0 d_loss_B = fluid.layers.reduce_mean(d_loss_B) d_loss_B.backward() vars_db = [] for param in cycle_gan.parameters(): if param.name[: 47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_1": vars_db.append(param) optimizer3.minimize(d_loss_B, parameter_list=vars_db) cycle_gan.clear_gradients() pro_batch_time.update(Tools.time() - end) batch_time = time.time() - s_time t_time += batch_time # print( # "epoch{}; batch{}; g_loss:{}; d_A_loss: {}; d_B_loss:{} \ # ; \n g_A_loss: {}; g_A_cyc_loss: {}; g_A_idt_loss: {}\ # ; g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}\ # ;Batch_time_cost: {:.2f}"\ # .format(epoch, batch_id, g_loss_out[0],\ # d_loss_A.numpy()[0], \ # d_loss_B.numpy()[0],\ # g_A_loss.numpy()[0],\ # cyc_A_loss.numpy()[0], \ # idt_loss_A.numpy()[0], \ # g_B_loss.numpy()[0],\ # cyc_B_loss.numpy()[0],\ # idt_loss_B.numpy()[0], \ # batch_time)) with open('logging_train.txt', 'a') as log_file: now = time.strftime("%c") log_file.write( "time: {}; epoch{}; batch{}; d_A_loss: {}; g_A_loss: {}; \ g_A_cyc_loss: {}; g_A_idt_loss: {}; d_B_loss: {}; \ g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}; \ Batch_time_cost: {:.2f}\n" .format(now, epoch, \ batch_id, d_loss_A[0], g_A_loss[ 0], cyc_A_loss[0], \ idt_loss_A[0], d_loss_B[0], g_A_loss[0], \ cyc_B_loss[0], idt_loss_B[0], batch_time)) losses[0].append(g_A_loss[0]) losses[1].append(d_loss_A[0]) sys.stdout.flush() batch_id += 1 if batch_id % 10 == 0: progress.print(batch_id) print("epoch{}; | batch step{}; g_A_loss:{}; d_A_loss:{}" \ .format(epoch, batch_id, g_A_loss.numpy()[0], d_loss_A.numpy()[0])) if batch_id == 500: break end = Tools.time()