def main(): dataset = MyImageFolder(root_dir="data/") loader = DataLoader( dataset, batch_size=config.BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=config.NUM_WORKERS, ) gen = Generator(in_channels=3).to(config.DEVICE) disc = Discriminator(in_channels=3).to(config.DEVICE) initialize_weights(gen) opt_gen = optim.Adam(gen.parameters(), lr=config.LEARNING_RATE, betas=(0.0, 0.9)) opt_disc = optim.Adam(disc.parameters(), lr=config.LEARNING_RATE, betas=(0.0, 0.9)) writer = SummaryWriter("logs") tb_step = 0 l1 = nn.L1Loss() gen.train() disc.train() vgg_loss = VGGLoss() g_scaler = torch.cuda.amp.GradScaler() d_scaler = torch.cuda.amp.GradScaler() if config.LOAD_MODEL: load_checkpoint( config.CHECKPOINT_GEN, gen, opt_gen, config.LEARNING_RATE, ) load_checkpoint( config.CHECKPOINT_DISC, disc, opt_disc, config.LEARNING_RATE, ) for epoch in range(config.NUM_EPOCHS): tb_step = train_fn( loader, disc, gen, opt_gen, opt_disc, l1, vgg_loss, g_scaler, d_scaler, writer, tb_step, ) if config.SAVE_MODEL: save_checkpoint(gen, opt_gen, filename=config.CHECKPOINT_GEN) save_checkpoint(disc, opt_disc, filename=config.CHECKPOINT_DISC)
def train_model_scenario_2(n, model_fname, opponet_model_fname, alpha=0.1, iterations=5000): alpha0 = alpha decay_rate = 0.01 modelInstance = model.load(opponet_model_fname) W0 = modelInstance['W'] b0 = modelInstance['b'] if model_fname == opponet_model_fname: W = W0 b = b0 else: make_movement_fn = lambda x: model.predict2(W0, b0, x) (W, b) = model.initialize_weights(n) for i in range(0, iterations): if model_fname == opponet_model_fname: make_movement_fn = lambda x: model.predict2(W, b, x) ex = training.make_training_examples(make_movement_fn) X = ex['X'] Y = ex['Y'] # displayTrainingExamples(X, Y) # (dW, db) = model.calcGradients(W, b, X, Y) (dW, db, _) = model.back_propagation(n, W, b, X, Y) alpha = alpha0 / (1 + decay_rate * i) model.update_weights(W, dW, b, db, alpha) if i % 100 == 0: print('iteration ' + str(i)) (aL, _) = model.forward_propagation(W, b, X) cost = model.cost_function(Y, aL) print('cost') print(cost) print('alpha') print(alpha) # if i % 1000 == 0: # is_back_prop_correct = model.checkBackPropagation(n, W, b, X, Y) # if not is_back_prop_correct: # print("BP is not correct") # exit() print('------ end -------') model.save(n, W, b, model_fname)
def train_model_scenario_1(n, model_fname, training_examples_fname, m=0, alpha=0.001, iterations=10000): debug = False (W, b) = model.initialize_weights(n) ex = training.read_training_examples(m, fname=training_examples_fname) X = ex['X'] # assert X.shape == (9, 500) Y = ex['Y'] # assert Y.shape == (9, 500) # L is a number of NN layers # (L = 3 for a model 9x18x18x9) L = len(n) - 1 assert len(W) == L for i in range(0, iterations): # (dW, db) = model.calcGradients(W, b, X, Y) (dW, db, _) = model.back_propagation(n, W, b, X, Y) model.update_weights(W, dW, b, db, alpha) if i % 300 == 0: print('iteration ' + str(i)) (aL, _) = model.forward_propagation(W, b, X) cost = model.cost_function(Y, aL) # cost = model.costFunction(Y, A[L]) # print('alpha') # print(alpha) print('cost') print(cost) if debug: if i > 0 and i % 3000 == 0: is_back_prop_correct = model.check_back_propagation(n, W, b, X, Y) if not is_back_prop_correct: print("BP is not correct") exit() print('------ end -------') model.save(n, W, b, model_fname)
]) dataset = datasets.MNIST(root="dataset/", transform=transforms, download=True) # comment mnist above and uncomment below for training on CelebA #dataset = datasets.ImageFolder(root="celeb_dataset", transform=transforms) loader = DataLoader( dataset, batch_size=BATCH_SIZE, shuffle=True, ) # initialize gen and disc, note: discriminator should be called critic, # according to WGAN paper (since it no longer outputs between [0, 1]) gen = Generator(Z_DIM, CHANNELS_IMG, FEATURES_GEN).to(device) critic = Discriminator(CHANNELS_IMG, FEATURES_CRITIC).to(device) initialize_weights(gen) initialize_weights(critic) # initializate optimizer opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.9)) opt_critic = optim.Adam(critic.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.9)) # for tensorboard plotting fixed_noise = torch.randn(32, Z_DIM, 1, 1).to(device) writer_real = SummaryWriter(f"logs/GAN_MNIST/real") writer_fake = SummaryWriter(f"logs/GAN_MNIST/fake") step = 0 gen.train()
transforms.Normalize([0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]), ]) # If you train on MNIST, remember to set channels_img to 1 dataset = datasets.MNIST(root="dataset/", train=True, transform=transforms, download=True) # comment mnist above and uncomment below if train on CelebA #dataset = datasets.ImageFolder(root="celeb_dataset", transform=transforms) dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) gen = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device) disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device) initialize_weights(gen) initialize_weights(disc) opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999)) opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999)) criterion = nn.BCELoss() fixed_noise = torch.randn(32, NOISE_DIM, 1, 1).to(device) writer_real = SummaryWriter(f"logs/real") writer_fake = SummaryWriter(f"logs/fake") step = 0 gen.train() disc.train() for epoch in range(NUM_EPOCHS):
transforms.ToTensor(), transforms.Normalize( [0.5 for _ in range(CHANNELS_IMG)], [ 0.5 for _ in range(CHANNELS_IMG)] ), ] ) dataset = datasets.MNIST(root='dataset/', transform=transforms, download=True) dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) generator = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device) discriminator = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device) initialize_weights(generator) initialize_weights(discriminator) optimizer_g = torch.optim.Adam( generator.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999)) optimizer_d = torch.optim.Adam( discriminator.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999)) criterion = nn.BCELoss() fixed_random_noise = torch.randn(32, NOISE_DIM, 1, 1).to(device) writer_real = SummaryWriter(f"logs/DCGAN_MNIST/real") writer_fake = SummaryWriter(f"logs/DCGAN_MNIST/fake") step = 0 generator.train() discriminator.train()