def test_error(self): startup_program = Program() main_program = Program() use_cuda = core.is_compiled_with_cuda() with program_guard(main_program, startup_program): def fn_1(opt, avg_loss): opt.minimize(avg_loss) def fn_2(opt, avg_loss): opt.minimize(avg_loss) x = fluid.layers.data("X", [10], 'float32') hidden = layers.fc(x, 5) avg_loss = layers.mean(hidden) adam = optimizer.Adam(learning_rate=LR) sgd = optimizer.SGD(learning_rate=LR) cond = layers.fill_constant([1], 'bool', True) layers.case([(cond, lambda: fn_1(adam, avg_loss))], lambda: fn_2(sgd, avg_loss)) cpu_place = fluid.CPUPlace() cuda_place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() for place in [cpu_place, cuda_place]: exe = fluid.Executor(place) exe.run(startup_program) np.random.seed(SEED) # NOTE(liym27): # This test needs to run in multi cards to test NotImplementedError. # Here, move this test from RUN_TYPE=DIST in tests/unittests/CMakeList.txt, # to use multi cards ** only on CPU ** not GPU to reduce CI time. os.environ['CPU_NUM'] = str(2) pe_exe = fluid.ParallelExecutor(use_cuda=use_cuda, main_program=main_program, loss_name=avg_loss.name) num_devices = pe_exe.device_count def not_implemented_error(): pe_exe.run(feed={ 'X': np.random.random(size=[64, 10]).astype('float32'), }, fetch_list=[avg_loss.name]) if num_devices > 1: self.assertRaises(NotImplementedError, not_implemented_error)
def test_optimizer_in_case(self): BATCH_SIZE = 1 INPUT_SIZE = 784 EPOCH_NUM = 2 x = fluid.data(name='x', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32') y = fluid.data(name='y', shape=[BATCH_SIZE, INPUT_SIZE], dtype='float32') switch_id = fluid.data(name='switch_id', shape=[1], dtype='int32') one = layers.fill_constant(shape=[1], dtype='int32', value=1) adam = optimizer.Adam(learning_rate=0.001) adagrad = optimizer.Adagrad(learning_rate=0.001) def fn_1(): sum = layers.elementwise_mul(x, y) loss = layers.mean(sum, name="f_1_loss") adam.minimize(loss) def fn_2(): sum = layers.elementwise_mul(x, y) loss = layers.mean(sum, name="f_2_loss") adagrad.minimize(loss) layers.case(pred_fn_pairs=[(switch_id == one, fn_1)], default=fn_2) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) for epoch in range(EPOCH_NUM): np.random.seed(epoch) feed_image = np.random.random( size=[BATCH_SIZE, INPUT_SIZE]).astype('float32') main_program = fluid.default_main_program() out = exe.run(main_program, feed={ 'x': feed_image, 'y': feed_image, 'switch_id': np.array([epoch]).astype('int32') }, fetch_list=[])
def build_model(self): """ DataLoader """ # TODO 由于Api不同此处先去掉了RandomCrop train_transform = [ transforms.RandomHorizontalFlip(), transforms.Resize((self.img_size + 30, self.img_size + 30)), transforms.RandomCrop(self.img_size), transforms.ToArray(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), transforms.ToTensor() ] test_transform = [ transforms.Resize((self.img_size, self.img_size)), transforms.ToArray(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), transforms.ToTensor() ] self.trainA = os.path.join('dataset', self.dataset, 'trainA') self.trainB = os.path.join('dataset', self.dataset, 'trainB') self.testA = os.path.join('dataset', self.dataset, 'testA') self.testB = os.path.join('dataset', self.dataset, 'testB') self.trainA_loader = DataLoader(self.trainA, batch_size=self.batch_size, transforms=train_transform, shuffle=True) self.trainB_loader = DataLoader(self.trainB, batch_size=self.batch_size, transforms=train_transform, shuffle=True) self.testA_loader = DataLoader(self.testA, batch_size=1, transforms=test_transform, shuffle=False) self.testB_loader = DataLoader(self.testB, batch_size=1, transforms=test_transform, shuffle=False) """ Define Generator, Discriminator """ self.genA2B = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.genB2A = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) """ Define Loss """ self.L1_loss = dygraph.L1Loss() self.MSE_loss = layers.mse_loss self.BCELoss = bce_Loss # BCELoss should be called with Normalize=True, use seperately """ Trainer """ self.G_optim = optimizer.Adam( learning_rate=self.lr, beta1=0.5, beta2=0.999, parameter_list=itertools.chain(self.genA2B.parameters(), self.genB2A.parameters()), regularization=fluid.regularizer.L1Decay(self.weight_decay)) # self.G_optim = torch.optim.Adam(itertools.chain(self.genA2B.parameters(), self.genB2A.parameters()), lr=self.lr, betas=(0.5, 0.999), weight_decay=self.weight_decay) self.D_optim = optimizer.Adam( learning_rate=self.lr, beta1=0.5, beta2=0.999, parameter_list=itertools.chain(self.disGA.parameters(), self.disLB.parameters()), regularization=fluid.regularizer.L1Decay(self.weight_decay)) # self.D_optim = torch.optim.Adam(itertools.chain(self.disGA.parameters(), self.disGB.parameters(), self.disLA.parameters(), self.disLB.parameters()), lr=self.lr, betas=(0.5, 0.999), weight_decay=self.weight_decay) """ Define Rho clipper to constraint the value of rho in AdaILN and ILN""" self.Rho_clipper = RhoClipper(0, 1)
def main(opts): # Create the data loader # loader = sunnerData.DataLoader(sunnerData.ImageDataset( # root=[[opts.path]], # transforms=transforms.Compose([ # sunnertransforms.Resize((1024, 1024)), # sunnertransforms.ToTensor(), # sunnertransforms.ToFloat(), # #sunnertransforms.Transpose(sunnertransforms.BHWC2BCHW), # sunnertransforms.Normalize(), # ])), # batch_size=opts.batch_size, # shuffle=True, # ) loader = data_loader(opts.path) device = fluid.CUDAPlace(0) if opts.device == 'GPU' else fluid.CPUPlace(0) with fluid.dygraph.guard(device): # Create the model start_epoch = 0 G = StyleGenerator() D = StyleDiscriminator() # Load the pre-trained weight if os.path.exists(opts.resume): INFO("Load the pre-trained weight!") #state = fluid.dygraph.load_dygraph(opts.resume) state = load_checkpoint(opts.resume) G.load_dict(state['G']) D.load_dict(state['D']) start_epoch = state['start_epoch'] else: INFO( "Pre-trained weight cannot load successfully, train from scratch!" ) # # Multi-GPU support # if torch.cuda.device_count() > 1: # INFO("Multiple GPU:" + str(torch.cuda.device_count()) + "\t GPUs") # G = nn.DataParallel(G) # D = nn.DataParallel(D) scheduler_D = exponential_decay(learning_rate=0.00001, decay_steps=1000, decay_rate=0.99) scheduler_G = exponential_decay(learning_rate=0.00001, decay_steps=1000, decay_rate=0.99) optim_D = optim.Adam(parameter_list=D.parameters(), learning_rate=scheduler_D) optim_G = optim.Adam(parameter_list=G.parameters(), learning_rate=scheduler_G) # Train fix_z = np.random.randn(opts.batch_size, 512) fix_z = dygraph.to_variable(fix_z) softplus = SoftPlus() Loss_D_list = [0.0] Loss_G_list = [0.0] D.train() G.train() for ep in range(start_epoch, opts.epoch): bar = tqdm(loader()) loss_D_list = [] loss_G_list = [] for i, data in enumerate(bar): # ======================================================================================================= # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) # ======================================================================================================= # Compute adversarial loss toward discriminator real_img = np.array([item for item in data], dtype='float32').reshape( (-1, 3, 1024, 1024)) D.clear_gradients() real_img = dygraph.to_variable(real_img) real_logit = D(real_img) z = np.float32(np.random.randn(real_img.shape[0], 512)) fake_img = G(dygraph.to_variable(z)) fake_logit = D(fake_img) d_loss = layers.mean(softplus(fake_logit)) d_loss = d_loss + layers.mean(softplus(-real_logit)) if opts.r1_gamma != 0.0: r1_penalty = R1Penalty(real_img, D) d_loss = d_loss + r1_penalty * (opts.r1_gamma * 0.5) if opts.r2_gamma != 0.0: r2_penalty = R2Penalty(fake_img, D) d_loss = d_loss + r2_penalty * (opts.r2_gamma * 0.5) loss_D_list.append(d_loss.numpy()) # Update discriminator d_loss.backward() optim_D.minimize(d_loss) # ======================================================================================================= # (2) Update G network: maximize log(D(G(z))) # ======================================================================================================= if i % CRITIC_ITER == 0: G.clear_gradients() fake_logit = D(fake_img.detach()) g_loss = layers.mean(softplus(-fake_logit)) #print("g_loss",g_loss) loss_G_list.append(g_loss.numpy()) # Update generator g_loss.backward() optim_G.minimize(g_loss) # Output training stats bar.set_description("Epoch {} [{}, {}] [G]: {} [D]: {}".format( ep, i + 1, 52000, loss_G_list[-1], loss_D_list[-1])) # Save the result Loss_G_list.append(np.mean(loss_G_list)) Loss_D_list.append(np.mean(loss_D_list)) # Check how the generator is doing by saving G's output on fixed_noise G.eval() #fake_img = G(fix_z).detach().cpu() fake_img = G(fix_z).numpy().squeeze() log(f"fake_img.shape: {fake_img.shape}") save_image(fake_img, os.path.join(opts.det, 'images', str(ep) + '.png')) G.train() # Save model # print("type:",type(G.state_dict()).__name__) # print("type:",type(D.state_dict()).__name__) states = { 'G': G.state_dict(), 'D': D.state_dict(), 'Loss_G': Loss_G_list, 'Loss_D': Loss_D_list, 'start_epoch': ep, } #dygraph.save_dygraph(state, os.path.join(opts.det, 'models', 'latest')) save_checkpoint(states, os.path.join(opts.det, 'models', 'latest.pp')) # scheduler_D.step() # scheduler_G.step() # Plot the total loss curve Loss_D_list = Loss_D_list[1:] Loss_G_list = Loss_G_list[1:] plotLossCurve(opts, Loss_D_list, Loss_G_list)
def build_model(self): """ DataLoader """ train_transform = [ transforms.RandomHorizontalFlip(), transforms.Resize((self.img_size + 30, self.img_size + 30)), transforms.RandomCrop(self.img_size), transforms.ToArray(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), transforms.ToTensor() ] test_transform = [ transforms.Resize((self.img_size, self.img_size)), transforms.ToArray(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), transforms.ToTensor() ] self.trainA = os.path.join('dataset', self.dataset, 'trainA') self.trainB = os.path.join('dataset', self.dataset, 'trainB') self.testA = os.path.join('dataset', self.dataset, 'testA') self.testB = os.path.join('dataset', self.dataset, 'testB') self.trainA_loader = DataLoader(self.trainA, batch_size=self.batch_size, transforms=train_transform, shuffle=True) self.trainB_loader = DataLoader(self.trainB, batch_size=self.batch_size, transforms=train_transform, shuffle=True) self.testA_loader = DataLoader(self.testA, batch_size=1, transforms=test_transform, shuffle=False) self.testB_loader = DataLoader(self.testB, batch_size=1, transforms=test_transform, shuffle=False) """ Define Generator, Discriminator """ self.genA2B = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.genB2A = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) """ Define Loss """ self.L1_loss = dygraph.L1Loss() self.MSE_loss = layers.mse_loss self.BCELoss = bce_loss # BCELoss should be called with Normalize=True, use seperately """ Trainer """ self.G_optim = optimizer.Adam(learning_rate=self.lr, beta1=0.5, beta2=0.999, parameter_list=self.genA2B.parameters() + self.genB2A.parameters()) self.D_optim = optimizer.Adam(learning_rate=self.lr, beta1=0.5, beta2=0.999, parameter_list=self.disGA.parameters() + self.disLB.parameters())
def static(train_data, loss_in_switch=True, use_cuda=False, use_parallel_exe=False): startup_program = Program() main_program = Program() startup_program.random_seed = SEED main_program.random_seed = SEED with program_guard(main_program, startup_program): def double_fc_net(image): hidden = layers.fc( image, size=FC_SIZE, act='relu', param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.99)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.5)), name="hidden") prediction = layers.fc( hidden, size=CLASS_NUM, act='softmax', param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1.2)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.8)), name="prediction") return hidden, prediction def fn_1(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: loss = layers.cross_entropy(input=pred, label=label) avg_loss = layers.mean(loss, name='mean_cross_entropy_loss') opt.minimize(avg_loss) return avg_loss def fn_2(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: loss = layers.softmax_with_cross_entropy(logits=pred, label=label) avg_loss = layers.mean(loss, name='mean_softmax_loss') opt.minimize(avg_loss) return avg_loss image = fluid.data('image', [BATCH_SIZE, INPUT_SIZE], 'float32') label = fluid.data('label', [BATCH_SIZE, 1], 'int64') hidden, prediction = double_fc_net(image) adam = optimizer.Adam(learning_rate=LR) sgd = optimizer.SGD(learning_rate=LR) id = fluid.data('id', [1], 'int32') two = layers.fill_constant([1], 'int32', 2) mod_two = layers.elementwise_mod(id, two) == 0 if loss_in_switch: avg_loss = layers.case( [(mod_two, lambda: fn_1(adam, None, prediction, label))], lambda: fn_2(sgd, None, prediction, label)) else: loss_1 = layers.cross_entropy(input=prediction, label=label) avg_loss_1 = layers.mean(loss_1) loss_2 = layers.softmax_with_cross_entropy(logits=prediction, label=label) avg_loss_2 = layers.mean(loss_2) avg_loss = layers.case([(mod_two, lambda: fn_1(adam, avg_loss_1))], lambda: fn_2(sgd, avg_loss_2)) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) for epoch in range(EPOCH_NUM): feed_image, feed_label = train_data[epoch] fetch_list = [hidden, prediction, avg_loss] feed = { 'image': feed_image, 'label': feed_label, 'id': np.array([epoch]).astype('int32') } out = exe.run(main_program, feed=feed, fetch_list=fetch_list) out_hidden, out_pred, loss = out return out_hidden, out_pred, loss