def __init__(self, context: DeepSpeedTrialContext) -> None: self.context = context self.hparams = AttrDict(self.context.get_hparams()) self.data_config = AttrDict(self.context.get_data_config()) self.logger = TorchWriter() num_channels = data.CHANNELS_BY_DATASET[self.data_config.dataset] gen_net = Generator( self.hparams.generator_width_base, num_channels, self.hparams.noise_length ) gen_net.apply(weights_init) disc_net = Discriminator(self.hparams.discriminator_width_base, num_channels) disc_net.apply(weights_init) gen_parameters = filter(lambda p: p.requires_grad, gen_net.parameters()) disc_parameters = filter(lambda p: p.requires_grad, disc_net.parameters()) ds_config = overwrite_deepspeed_config( self.hparams.deepspeed_config, self.hparams.get("overwrite_deepspeed_args", {}) ) generator, _, _, _ = deepspeed.initialize( model=gen_net, model_parameters=gen_parameters, config=ds_config ) discriminator, _, _, _ = deepspeed.initialize( model=disc_net, model_parameters=disc_parameters, config=ds_config ) self.generator = self.context.wrap_model_engine(generator) self.discriminator = self.context.wrap_model_engine(discriminator) self.fixed_noise = self.context.to_device( torch.randn( self.context.train_micro_batch_size_per_gpu, self.hparams.noise_length, 1, 1 ) ) self.criterion = nn.BCELoss() # TODO: Test fp16 self.fp16 = generator.fp16_enabled() self.gradient_accumulation_steps = generator.gradient_accumulation_steps() # Manually perform gradient accumulation. if self.gradient_accumulation_steps > 1: logging.info("Disabling automatic gradient accumulation.") self.context.disable_auto_grad_accumulation()
def __init__(self, corpus, config, action2name): super(GanRnnAgent, self).__init__() self.use_gpu = config.use_gpu if config.state_type=='rnn': self.vocab = corpus.vocab self.rev_vocab = corpus.rev_vocab self.vocab_size = len(self.vocab) self.go_id = self.rev_vocab[BOS] self.eos_id = self.rev_vocab[EOS] self.context_encoder = ContEncoder(corpus, config) self.action2name=action2name self.lookupProb_ = LookupProb(action2name, config) self.discriminator = Discriminator(config) self.generator = Generator(config) self.loss_BCE = nn.BCELoss() self.config = config
def __init__(self, corpus, config, action2name): super(GanRnnAgent, self).__init__() self.use_gpu = config.use_gpu self.vocab = corpus.vocab self.rev_vocab = corpus.rev_vocab self.vocab_size = len(self.vocab) self.go_id = self.rev_vocab[BOS] self.eos_id = self.rev_vocab[EOS] self.action2name = action2name self.lookupProb_ = LookupProb(action2name, config) # self.lookupProb_ = None # self.generator = generator # self.discriminator = discriminator # self.cont_encoder = state_encoder self.context_encoder = ContEncoder(corpus, config) self.discriminator = Discriminator(config) self.generator = Generator(config) # FNN to get Y self.p_fc1 = nn.Linear(config.ctx_cell_size, config.ctx_cell_size) self.p_y = nn.Linear(config.ctx_cell_size, config.y_size * config.k) self.loss_BCE = nn.BCELoss() self.config = config
def anomaly_test(): print('Anomaly') category = 'hand' img_file = requestFolderName + '/image-16.jpg' print('saved ', img_file, ' category: ', category) count = 16 files = [] for i in range(65): files.append(img_file) data = {'0': np.array(files)} mura_valid_df = pd.DataFrame(data) print(mura_valid_df.head()) transforms = transform(False, True, True, True, True, True, True, False) transforms = inverse_transform(False, True, True, True, True, True, True, False) # resize image to 256 X 256 to construct the output image noresize_transform = transform(False, False, False, True, True, True, True, False) img = cv2.imread(img_file) print(img.shape) img = noresize_transform(img) print(img.shape) transforms1 = transform(False, True, False, False, False, False, True, False) resized_input_img = transforms1(img) # transforms2 = transform(False, True, False, False, False, False, True, False) # resized_input_img = transforms2(img) # rotation, hflip, resize, totensor, normalize, centercrop, to_pil, gray # valid_dataset = MURA_dataset(mura_valid_df, '/content/drive/Shared drives/MeanSquare-Drive/Advanced-DeepLearning/', transforms) valid_dataset = MURA_dataset(mura_valid_df, '', transforms) valid_dataloader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=64, shuffle=True, num_workers=0, drop_last=False) if category == 'hand': out = 'models/XR_HAND/' else: out = 'models/XR_ELBOW/' max_auc = 0 latent_dim = 128 channels = 3 batch_size = 64 generator = Generator(dim=64, zdim=latent_dim, nc=channels) discriminator = Discriminator(dim=64, zdim=latent_dim, nc=channels, out_feat=True) encoder = Encoder(dim=64, zdim=latent_dim, nc=channels) device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu") device = 'cpu' generator.load_state_dict( torch.load(out + 'G_epoch5000.pt', map_location=torch.device('cpu'))) discriminator.load_state_dict( torch.load(out + 'D_epoch5000.pt', map_location=torch.device('cpu'))) generator.to(device) encoder.to(device) discriminator.to(device) with torch.no_grad(): labels = torch.zeros(size=(len(valid_dataloader.dataset), ), dtype=torch.long, device=device) scores = torch.empty(size=(len(valid_dataloader.dataset), ), dtype=torch.float32, device=device) for i, (imgs, lbls) in enumerate(valid_dataloader): print('imgs. shape ', imgs.shape) imgs = imgs.to(device) lbls = lbls.to(device) labels[i * batch_size:(i + 1) * batch_size].copy_(lbls) emb_query = encoder(imgs) print('emb_query. shape ', emb_query.shape) fake_imgs = generator(emb_query) emb_fake = encoder(fake_imgs) image_feats = discriminator(imgs) recon_feats = discriminator(fake_imgs) diff = imgs - fake_imgs image1_tensor = diff[0] im = tensor2im(imgs) im2 = tensor2im(fake_imgs) print(lbls) im3 = tensor2im(diff) # plt.figure(1) # plt.subplot(311) # plt.title('Real image') # plt.imshow(im) # plt.subplot(312) # plt.title('Fake img') # plt.imshow(im2) # plt.show() img = cv2.GaussianBlur(im3, (5, 5), 0) img_gray = rgb2gray(img) #plt.imshow(img_gray) thresh = threshold_otsu(img_gray) binary = img_gray > thresh #plt.imshow(binary) im_rgb = np.array(Image.fromarray(binary).convert('RGB')) mask = binary.copy() mask[mask > 0.5] = 1 mask[mask <= 0.5] = 0 mask3 = np.stack((mask, mask, mask), axis=2) all_labels = measure.label(mask) all_labels[all_labels >= 1] = 255 all_labels[all_labels < 1] = 0 all_labels3 = np.stack((all_labels, all_labels, all_labels), axis=2) # kernel = np.ones((6, 6), np.uint8) # # Using cv2.erode() method # image = cv2.erode(Image.fromarray(mask3), kernel, cv2.BORDER_REFLECT) black_pixels_mask = np.all(mask3 == 1, axis=2) non_black_pixels_mask = np.any(mask3 > [0, 0, 0], axis=-1) all_labels3[non_black_pixels_mask] = [255, 0, 0] # plt.subplot(313) # plt.title('Difference') # plt.imshow(im3) # plt.show() # # plt.subplot(321) # plt.title('colored mask') # plt.imshow(all_labels3) # plt.show() gray = cv2.cvtColor(im3, cv2.COLOR_BGR2GRAY) # Find Canny edges edged = cv2.Canny(gray, 30, 200) # Finding Contours # Use a copy of the image e.g. edged.copy() # since findContours alters the image contours, hierarchy = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # plt.subplot(322) # plt.imshow(edged) # plt.title('Edged') # plt.show() print("Number of Contours found = " + str(len(contours))) # Draw all contours # -1 signifies drawing all contours print('im3: ', im3.shape) backtorgb = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB) print('contours: ', len(contours)) img_contours = np.zeros(backtorgb.shape) cv2.drawContours(img_contours, contours, -1, (220, 0, 0), 1) resized_output_image = cv2.resize(img_contours, (256, 256)) cv2.imshow('output blue', resized_output_image) cv2.waitKey(0) cv2.imwrite('output_files/output-image-' + str(count) + '.jpg', resized_output_image) #Image.fromarray(resized_output_image).save('output_files/output-image-' + str(count) + '.jpg') print('resize: ', resized_output_image.shape, np.asarray(resized_input_img).shape) mix_img = cv2.addWeighted(np.asarray(resized_input_img), 0.3, resized_output_image, 0.7, 0, dtype=cv2.CV_32F) #Image.fromarray(mix_img).save('output_files/mix-image-' + str(count) + '.jpg') cv2.imwrite('output_files/mix-image-' + str(count) + '.jpg', mix_img) # plt.subplot(323) # plt.title('contour') # plt.imshow(gray) # plt.show() thresh = 50 ret, thresh_img = cv2.threshold(gray, thresh, 255, cv2.THRESH_BINARY) contours, hierarchy = cv2.findContours(thresh_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) print('contours second time : ', len(contours)) backtorgb1 = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB) cv2.drawContours(backtorgb1, contours, -1, (0, 255, 0), 1) #backtorgb = cv2.cvtColor(gray,cv2.COLOR_GRAY2RGB) cv2.imshow('output', backtorgb1) cv2.waitKey(0) # Image.fromarray(backtorgb1).save('output_files/image-' + str(count) + '.jpg') # cv2.imwrite('output_files/cv-image-' + str(count) + '.jpg', backtorgb1) #break image_distance = torch.mean(torch.pow(imgs - fake_imgs, 2), dim=[1, 2, 3]) feat_distance = torch.mean(torch.pow(image_feats - recon_feats, 2), dim=1) print(emb_query.shape, emb_fake.shape) z_distance = mse_loss(emb_query, emb_fake) # mse_loss(emb_query, emb_fake) # print z_distance print('z_distance=', z_distance) # print('hiiiiiiiii') scores[i * batch_size:(i + 1) * batch_size].copy_(feat_distance) print('feat_distance ', feat_distance[0]) break output = {} output['status'] = 'done' return 'done'
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) if not os.path.exists(args.figure_path): os.makedirs(args.figure_path) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models (Gen) generator = Generator(args.embed_size, args.hidden_size, len(vocab), args.num_layers) # Build the models (Disc) discriminator = Discriminator(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): generator.cuda() discriminator.cuda() # Loss and Optimizer (Gen) mle_criterion = nn.CrossEntropyLoss() params_gen = list(generator.parameters()) optimizer_gen = torch.optim.Adam(params_gen) # Loss and Optimizer (Disc) params_disc = list(discriminator.parameters()) optimizer_disc = torch.optim.Adam(params_disc) if int(args.pretraining) == 1: # Pre-training: train generator with MLE and discriminator with 3 losses (real + fake + wrong) total_steps = len(data_loader) print(total_steps) disc_losses = [] gen_losses = [] print('pre-training') generator.load_state_dict(torch.load(args.pretrained_gen_path)) discriminator.load_state_dict(torch.load(args.pretrained_disc_path)) for epoch in range( max([ int(args.gen_pretrain_num_epochs), int(args.disc_pretrain_num_epochs) ])): if epoch < 5: continue # for epoch in range(max([int(args.gen_pretrain_num_epochs), int(args.disc_pretrain_num_epochs)])): for i, (images, captions, lengths, wrong_captions, wrong_lengths) in enumerate(data_loader): images = to_var(images, volatile=True) captions = to_var(captions) wrong_captions = to_var(wrong_captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] if epoch < int(args.gen_pretrain_num_epochs): generator.zero_grad() outputs, _ = generator(images, captions, lengths) loss_gen = mle_criterion(outputs, targets) # gen_losses.append(loss_gen.cpu().data.numpy()[0]) loss_gen.backward() optimizer_gen.step() if epoch < int(args.disc_pretrain_num_epochs): discriminator.zero_grad() rewards_real = discriminator(images, captions, lengths) # rewards_fake = discriminator(images, sampled_captions, sampled_lengths) rewards_wrong = discriminator(images, wrong_captions, wrong_lengths) real_loss = -torch.mean(torch.log(rewards_real)) # fake_loss = -torch.mean(torch.clamp(torch.log(1 - rewards_fake), min=-1000)) wrong_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_wrong), min=-1000)) loss_disc = real_loss + wrong_loss # + fake_loss, no fake_loss because this is pretraining # disc_losses.append(loss_disc.cpu().data.numpy()[0]) loss_disc.backward() optimizer_disc.step() if (i + 1) % args.log_step == 0: print( 'Epoch [%d], Step [%d], Disc Loss: %.4f, Gen Loss: %.4f' % (epoch + 1, i + 1, loss_disc, loss_gen)) if (i + 1) % 500 == 0: torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'pretrained-discriminator-%d-%d.pkl' % (int(epoch) + 1, i + 1))) torch.save( generator.state_dict(), os.path.join( args.model_path, 'pretrained-generator-%d-%d.pkl' % (int(epoch) + 1, i + 1))) # Save pretrained models torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'pretrained-discriminator-%d.pkl' % int(args.disc_pretrain_num_epochs))) torch.save( generator.state_dict(), os.path.join( args.model_path, 'pretrained-generator-%d.pkl' % int(args.gen_pretrain_num_epochs))) # Plot pretraining figures # plt.plot(disc_losses, label='pretraining_disc_loss') # plt.savefig(args.figure_path + 'pretraining_disc_losses.png') # plt.clf() # # plt.plot(gen_losses, label='pretraining_gen_loss') # plt.savefig(args.figure_path + 'pretraining_gen_losses.png') # plt.clf() else: generator.load_state_dict(torch.load(args.pretrained_gen_path)) discriminator.load_state_dict(torch.load(args.pretrained_disc_path)) # # Skip the rest for now # return # Train the Models total_step = len(data_loader) disc_gan_losses = [] gen_gan_losses = [] for epoch in range(args.num_epochs): for i, (images, captions, lengths, wrong_captions, wrong_lengths) in enumerate(data_loader): # Set mini-batch dataset images = to_var(images, volatile=True) captions = to_var(captions) wrong_captions = to_var(wrong_captions) generator.zero_grad() outputs, packed_lengths = generator(images, captions, lengths) outputs = PackedSequence(outputs, packed_lengths) outputs = pad_packed_sequence(outputs, batch_first=True) # (b, T, V) Tmax = outputs[0].size(1) if torch.cuda.is_available(): rewards = torch.zeros_like(outputs[0]).type( torch.cuda.FloatTensor) else: rewards = torch.zeros_like(outputs[0]).type(torch.FloatTensor) # getting rewards from disc # for t in tqdm(range(2, Tmax, 4)): for t in range(2, Tmax, 2): # for t in range(2, 4): if t >= min( lengths ): # TODO this makes things easier, but could min(lengths) could be too short break gen_samples = to_var(torch.zeros( (captions.size(0), Tmax)).type(torch.FloatTensor), volatile=True) # part 1: taken from real caption gen_samples[:, :t] = captions[:, :t].data predicted_ids, saved_states = generator.pre_compute( gen_samples, t) # for v in range(predicted_ids.size(1)): v = predicted_ids # pdb.set_trace() # part 2: taken from all possible vocabs # gen_samples[:,t] = predicted_ids[:,v] gen_samples[:, t] = v # part 3: taken from rollouts gen_samples[:, t:] = generator.rollout(gen_samples, t, saved_states) sampled_lengths = [] # finding sampled_lengths for batch in range(int(captions.size(0))): for b_t in range(Tmax): if gen_samples[batch, b_t].cpu().data.numpy() == 2: # <end> sampled_lengths.append(b_t + 1) break elif b_t == Tmax - 1: sampled_lengths.append(Tmax) # sort sampled_lengths sampled_lengths = np.array(sampled_lengths) sampled_lengths[::-1].sort() sampled_lengths = sampled_lengths.tolist() # get rewards from disc rewards[:, t, v] = discriminator(images, gen_samples.detach(), sampled_lengths) # rewards = rewards.detach() # pdb.set_trace() rewards_detached = rewards.data rewards_detached = to_var(rewards_detached) loss_gen = torch.dot(outputs[0], -rewards_detached) # gen_gan_losses.append(loss_gen.cpu().data.numpy()[0]) # pdb.set_trace() loss_gen.backward() optimizer_gen.step() # TODO get sampled_captions sampled_ids = generator.sample(images) # sampled_captions = torch.zeros_like(sampled_ids).type(torch.LongTensor) sampled_lengths = [] # finding sampled_lengths for batch in range(int(captions.size(0))): for b_t in range(20): #pdb.set_trace() #sampled_captions[batch, b_t].data = sampled_ids[batch, b_t].cpu().data.numpy()[0] if sampled_ids[batch, b_t].cpu().data.numpy() == 2: # <end> sampled_lengths.append(b_t + 1) break elif b_t == 20 - 1: sampled_lengths.append(20) # sort sampled_lengths sampled_lengths = np.array(sampled_lengths) sampled_lengths[::-1].sort() sampled_lengths = sampled_lengths.tolist() # Train discriminator discriminator.zero_grad() images.volatile = False captions.volatile = False wrong_captions.volatile = False rewards_real = discriminator(images, captions, lengths) rewards_fake = discriminator(images, sampled_ids, sampled_lengths) rewards_wrong = discriminator(images, wrong_captions, wrong_lengths) real_loss = -torch.mean(torch.log(rewards_real)) fake_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_fake), min=-1000)) wrong_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_wrong), min=-1000)) loss_disc = real_loss + fake_loss + wrong_loss # disc_gan_losses.append(loss_disc.cpu().data.numpy()[0]) loss_disc.backward() optimizer_disc.step() # Print log info if i % args.log_step == 0: print( 'Epoch [%d/%d], Step [%d/%d], Disc Loss: %.4f, Gen Loss: %.4f' % (epoch, args.num_epochs, i, total_step, loss_disc, loss_gen)) # Save the models # if (i+1) % args.save_step == 0: if ( i + 1 ) % args.log_step == 0: # jm: saving at the last iteration instead torch.save( generator.state_dict(), os.path.join( args.model_path, 'generator-gan-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'discriminator-gan-%d-%d.pkl' % (epoch + 1, i + 1)))
def model(lines_ch, lines): # Construct Input data real_inputs = tf.placeholder(tf.float32, shape=[BATCH_SIZE, SEQ_LEN, len(charmap)]) fake_inputs = Generator(BATCH_SIZE) # Input Discriminator disc_real = Discriminator(real_inputs) disc_fake = Discriminator(fake_inputs) # Compute D/G cost disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_cost = -tf.reduce_mean(disc_fake) # WGAN-GP L constraints alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=0., maxval=1.) differences = fake_inputs - real_inputs interpolates = real_inputs + (alpha*differences) gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) gradient_penalty = tf.reduce_mean((slopes-1.)**2) disc_cost += LAMBDA*gradient_penalty # Get parameters gen_params = lib.params_with_name('Generator') disc_params = lib.params_with_name('Discriminator') # Construct optimizer # Optimizer? gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(gen_cost, var_list=gen_params) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=disc_params) # During training we monitor JS divergence between the true & generated ngram distributions for n=1,2,3,4.// 2/3/4/5 # To get an idea of the optimal values, we evaluate these statistics on a held-out set first. true_char_ngram_lms = [language_helpers.NgramLanguageModel(i, lines_ch[100*BATCH_SIZE:], tokenize=False) for i in range(2, 6)] validation_char_ngram_lms = [language_helpers.NgramLanguageModel(i, lines_ch[:100*BATCH_SIZE], tokenize=False) for i in range(2, 6)] for i in range(0, 4): print("validation set JSD for n={}: {}".format(i+2, true_char_ngram_lms[i].js_with(validation_char_ngram_lms[i]))) true_char_ngram_lms = [language_helpers.NgramLanguageModel(i, lines_ch[:], tokenize=False) for i in range(2, 6)] # Start run the graph saver = tf.train.Saver() with tf.Session() as session: # initialize session.run(tf.initialize_all_variables()) # Generate fake samples def generate_samples(): samples = session.run(fake_inputs) samples = np.argmax(samples, axis=2) # print(samples.shape) decoded_samples = [] for i in range(len(samples)): decoded = '' for j in samples[i]: decoded += inv_charmap[j] decoded_samples.append(decoded) # print(len(decoded_samples), len(decoded_samples[0]), decoded_samples[0]) return decoded_samples # Generate real samples gen = data_real_gen(lines) # Start iteration for iteration in range(ITERS): start_time = time.time() # Train generator _gen_cost = 0 if iteration > 0: _gen_cost, _ = session.run([gen_cost, gen_train_op]) # Train discriminator _disc_cost = 0 for i in range(CRITIC_ITERS): _data = next(gen) _disc_cost, _ = session.run([disc_cost, disc_train_op], feed_dict={real_inputs: _data}) # Plot curve lib.plot.plot('time', time.time() - start_time) lib.plot.plot('train gen cost', _gen_cost) lib.plot.plot('train disc cost', _disc_cost) if iteration % 100 == 99: saver.save(session, 'checkpoint/Mymodel_'+str(iteration)) samples = [] for i in range(20): samples.extend(generate_samples()) for i in range(0, 4): lm = language_helpers.NgramLanguageModel(i+2, samples, tokenize=False) lib.plot.plot('js{}'.format(i+2), true_char_ngram_lms[i].js_with(lm)) with open('samples_{}.txt'.format(iteration), 'w', encoding="utf-8") as f: for s in samples: s = "".join(s) f.write(s + "\n") if iteration % 100 == 99: lib.plot.flush() lib.plot.tick()
def train(args): writer = SummaryWriter(log_dir=args.tensorboard_path) create_folder(args.outf) set_seed(args.manualSeed) cudnn.benchmark = True dataset, nc = get_dataset(args) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batchSize, shuffle=True, num_workers=int(args.workers)) torch.cuda.set_device(args.local_rank) device = torch.device( "cuda", args.local_rank) #torch.device("cuda:0" if args.cuda else "cpu") ngpu = 0 nz = int(args.nz) ngf = int(args.ngf) ndf = int(args.ndf) netG = Generator(ngpu, ngf, nc, nz).to(device) netG.apply(weights_init) if args.netG != '': netG.load_state_dict(torch.load(args.netG)) netD = Discriminator(ngpu, ndf, nc).to(device) netD.apply(weights_init) if args.netD != '': netD.load_state_dict(torch.load(args.netD)) criterion = nn.BCELoss() fixed_noise = torch.randn(args.batchSize, nz, 1, 1, device=device) real_label = 1 fake_label = 0 # setup optimizer optimizerD = torch.optim.Adam(netD.parameters(), lr=args.lr, betas=(args.beta1, 0.999)) optimizerG = torch.optim.Adam(netG.parameters(), lr=args.lr, betas=(args.beta1, 0.999)) model_engineD, optimizerD, _, _ = deepspeed.initialize( args=args, model=netD, model_parameters=netD.parameters(), optimizer=optimizerD) model_engineG, optimizerG, _, _ = deepspeed.initialize( args=args, model=netG, model_parameters=netG.parameters(), optimizer=optimizerG) torch.cuda.synchronize() start = time() for epoch in range(args.epochs): for i, data in enumerate(dataloader, 0): ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### # train with real netD.zero_grad() real = data[0].to(device) batch_size = real.size(0) label = torch.full((batch_size, ), real_label, dtype=real.dtype, device=device) output = netD(real) errD_real = criterion(output, label) model_engineD.backward(errD_real) D_x = output.mean().item() # train with fake noise = torch.randn(batch_size, nz, 1, 1, device=device) fake = netG(noise) label.fill_(fake_label) output = netD(fake.detach()) errD_fake = criterion(output, label) model_engineD.backward(errD_fake) D_G_z1 = output.mean().item() errD = errD_real + errD_fake #optimizerD.step() # alternative (equivalent) step model_engineD.step() ############################ # (2) Update G network: maximize log(D(G(z))) ########################### netG.zero_grad() label.fill_(real_label) # fake labels are real for generator cost output = netD(fake) errG = criterion(output, label) model_engineG.backward(errG) D_G_z2 = output.mean().item() #optimizerG.step() # alternative (equivalent) step model_engineG.step() print( '[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f' % (epoch, args.epochs, i, len(dataloader), errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) writer.add_scalar("Loss_D", errD.item(), epoch * len(dataloader) + i) writer.add_scalar("Loss_G", errG.item(), epoch * len(dataloader) + i) if i % 100 == 0: vutils.save_image(real, '%s/real_samples.png' % args.outf, normalize=True) fake = netG(fixed_noise) vutils.save_image(fake.detach(), '%s/fake_samples_epoch_%03d.png' % (args.outf, epoch), normalize=True) # do checkpointing #torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (args.outf, epoch)) #torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (args.outf, epoch)) torch.cuda.synchronize() stop = time() print( f"total wall clock time for {args.epochs} epochs is {stop-start} secs")
def train_gan(): batch_size = 64 epochs = 100 disc_update = 1 gen_update = 5 latent_dimension = 100 lambduh = 10 device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') # load data train_loader, valid_loader, test_loader = get_data_loader( 'data', batch_size) disc_model = Discriminator().to(device) gen_model = Generator(latent_dimension).to(device) disc_optim = Adam(disc_model.parameters(), lr=1e-4, betas=(0.5, 0.9)) gen_optim = Adam(gen_model.parameters(), lr=1e-4, betas=(0.5, 0.9)) for e in range(epochs): disc_loss = 0 gen_loss = 0 for i, (images, _) in enumerate(train_loader): images = images.to(device) b_size = images.shape[0] step = i + 1 if step % disc_update == 0: disc_model.zero_grad() # sample noise noise = torch.randn((b_size, latent_dimension), device=device) # loss on fake inputs = gen_model(noise).detach() f_outputs = disc_model(inputs) loss = f_outputs.mean() # loss on real r_outputs = disc_model(images) loss -= r_outputs.mean() # add gradient penalty loss += lambduh * gradient_penalty(disc_model, images, inputs, device) disc_loss += loss loss.backward() disc_optim.step() if step % gen_update == 0: gen_model.zero_grad() noise = torch.randn((b_size, latent_dimension)).to(device) inputs = gen_model(noise) outputs = disc_model(inputs) loss = -outputs.mean() gen_loss += loss loss.backward() gen_optim.step() torch.save( { 'epoch': e, 'disc_model': disc_model.state_dict(), 'gen_model': gen_model.state_dict(), 'disc_loss': disc_loss, 'gen_loss': gen_loss, 'disc_optim': disc_optim.state_dict(), 'gen_optim': gen_optim.state_dict() }, "upsample/checkpoint_{}.pth".format(e)) print("Epoch: {} Disc loss: {}".format( e + 1, disc_loss.item() / len(train_loader))) print("Epoch: {} Gen loss: {}".format( e + 1, gen_loss.item() / len(train_loader)))
print('batch {:>3}/{:>3}, validation disc cost : {:.4f}'.format( iteration, ITERS, costs_avg)) if __name__ == "__main__": args = get_args() device_D = torch.device(args.deviceD) device_G = torch.device(args.deviceG) # load generator and discriminator model netG = Generator() summary(netG, input_size=(INPUT_LATENT, ), device='cpu') netD = Discriminator() summary(netD, input_size=(3, 32, 32), device='cpu') # set folder to save model checkpoints model_folder = os.path.abspath('./defensive_models') if not os.path.exists(model_folder): os.mkdir(model_folder) check_point_path = './defensive_models/snapshots.pth' if os.path.exists(check_point_path): checkpoint = torch.load(check_point_path) inital_epoch = checkpoint['epoch'] netG.load_state_dict(checkpoint['netG_state_dict'])
d = 64 latent_size = 256 mode = 'lat256' transform2 = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5)) ] ) train_faceDataset = FaceDataset(data_path+'train', data_path+'train.csv', transform2) test_faceDataset = FaceDataset(data_path+'test', data_path+'test.csv', transform2) train_dataloader = DataLoader(ConcatDataset([train_faceDataset, test_faceDataset]), batch_size=batch_size, num_workers=1) netG = Generator(d, latent_size) netD = Discriminator(d) if cuda: netG = netG.cuda() netD = netD.cuda() #print(netG) #summary(netG, (1, 128)) # print(netD) # summary(netD, (3, 64, 64)) # exit() criterion = nn.BCELoss() optimizerG = optim.Adam(netG.parameters(), lr=0.002, betas=(0.5, 0.999)) optimizerD = optim.Adam(netD.parameters(), lr=0.002, betas=(0.5, 0.999)) fix_noise = torch.randn(batch_size, latent_size, 1, 1).cuda()
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models (Gen) # TODO: put these in generator encoder = EncoderCNN(args.embed_size) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) # Build the models (Disc) discriminator = Discriminator(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() discriminator.cuda() # Loss and Optimizer (Gen) criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Loss and Optimizer (Disc) params_disc = list(discriminator.parameters()) optimizer_disc = torch.optim.Adam(params_disc) # Train the Models total_step = len(data_loader) disc_losses = [] for epoch in range(args.num_epochs): for i, (images, captions, lengths, wrong_captions, wrong_lengths) in enumerate(data_loader): # pdb.set_trace() # TODO: train disc before gen # Set mini-batch dataset images = to_var(images, volatile=True) captions = to_var(captions) wrong_captions = to_var(wrong_captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() features = encoder(images) outputs = decoder(features, captions, lengths) sampled_captions = decoder.sample(features) # sampled_captions = torch.zeros_like(sampled_ids) sampled_lengths = [] for row in range(sampled_captions.size(0)): for index, word_id in enumerate(sampled_captions[row, :]): # pdb.set_trace() word = vocab.idx2word[word_id.cpu().data.numpy()[0]] # sampled_captions[row, index].data = word if word == '<end>': sampled_lengths.append(index + 1) break elif index == sampled_captions.size(1) - 1: sampled_lengths.append(sampled_captions.size(1)) break sampled_lengths = np.array(sampled_lengths) sampled_lengths[::-1].sort() sampled_lengths = sampled_lengths.tolist() loss = criterion(outputs, targets) loss.backward() optimizer.step() # Train discriminator discriminator.zero_grad() rewards_real = discriminator(images, captions, lengths) rewards_fake = discriminator(images, sampled_captions, sampled_lengths) rewards_wrong = discriminator(images, wrong_captions, wrong_lengths) real_loss = -torch.mean(torch.log(rewards_real)) fake_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_fake), min=-1000)) wrong_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_wrong), min=-1000)) loss_disc = real_loss + fake_loss + wrong_loss disc_losses.append(loss_disc.cpu().data.numpy()[0]) loss_disc.backward() optimizer_disc.step() # print('iteration %i' % i) # Print log info if i % args.log_step == 0: print( 'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' % (epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) # Save the models # if (i+1) % args.save_step == 0: if ( i + 1 ) % total_step == 0: # jm: saving at the last iteration instead torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'discriminator-%d-%d.pkl' % (epoch + 1, i + 1))) # plot at the end of every epoch plt.plot(disc_losses, label='disc loss') plt.savefig('disc_losses.png') plt.clf()