def main(config): # For fast training. cudnn.benchmark = True # Create directories if not exist. if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.model_save_dir): os.makedirs(config.model_save_dir) if not os.path.exists(config.sample_dir): os.makedirs(config.sample_dir) if not os.path.exists(config.result_dir): os.makedirs(config.result_dir) # Data loader. celeba_loader = None rafd_loader = None if config.dataset in ['CelebA', 'Both']: celeba_loader = get_loader(config.celeba_image_dir, config.attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, 'CelebA', config.mode, config.num_workers) if config.dataset in ['RaFD', 'Both']: rafd_loader = get_loader(config.rafd_image_dir, None, None, config.rafd_crop_size, config.image_size, config.batch_size, 'RaFD', config.mode, config.num_workers) # Solver for training and testing StarGAN. solver = Solver(celeba_loader, rafd_loader, config) if config.mode == 'train': if config.dataset in ['CelebA', 'RaFD']: solver.train() elif config.dataset in ['Both']: solver.train_multi() elif config.mode == 'test': if config.dataset in ['CelebA', 'RaFD']: solver.test() elif config.dataset in ['Both']: solver.test_multi()
def train(model, elogger, train_set, eval_set): # record the experiment setting elogger.log(str(model)) elogger.log(str(args._get_kwargs())) if torch.cuda.is_available(): model.cuda() optimizer = optim.Adam(model.parameters(), lr = 1e-3) for epoch in range(args.epochs): model.train() print('Training on epoch {}'.format(epoch)) for input_file in train_set: print('Train on file {}'.format(input_file)) # data loader, return two dictionaries, attr and traj data_iter = data_loader.get_loader(input_file, args.batch_size) running_loss = 0.0 for idx, (attr, traj) in enumerate(data_iter): # transform the input to pytorch variable attr, traj = utils.to_var(attr), utils.to_var(traj) _, loss = model.eval_on_batch(attr, traj, config) # update the model optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.data.item() print('\r Progress {:.2f}%, average loss {}'.format((idx + 1) * 100.0 / len(data_iter), running_loss / (idx + 1.0)), end=' ') print() elogger.log('Training Epoch {}, File {}, Loss {}'.format(epoch, input_file, running_loss / (idx + 1.0))) # evaluate the model after each epoch evaluate(model, elogger, eval_set, save_result = False) # save the weight file after each epoch weight_name = '{}_{}'.format(args.log_file, str(datetime.datetime.now())) elogger.log('Save weight file {}'.format(weight_name)) torch.save(model.state_dict(), './saved_weights/weights')
def main(config): if config.outf is None: config.outf = 'samples' os.system('mkdir {0}'.format(config.outf)) config.manual_seed = random.randint(1, 10000) print("Random Seed: ", config.manual_seed) random.seed(config.manual_seed) torch.manual_seed(config.manual_seed) if config.cuda: torch.cuda.manual_seed_all(config.manual_seed) cudnn.benchmark = True if torch.cuda.is_available() and not config.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) if not config.training: config.stage = 2 if config.stage == 1: config.batch_size = 128 config.image_size = 64 else: config.batch_size = 40 config.image_size = 256 if config.training: data_loader = get_loader(_dataset=config.dataset, dataroot=config.dataroot, batch_size=config.batch_size, num_workers=int(config.workers), image_size=config.image_size) trainer = Trainer(config, data_loader, None) trainer.train() else: datapath = '%s/test/val_captions.t7' % (config.dataroot) trainer = Trainer(config, None, datapath) trainer.sample()
def main(): checkpoint_path = './models/checkpoint_2_7340.pt' vocab_path = './data/vocab.pkl' caption_dir = 'data/annotations/' val_dir = 'data/val2014' batch_size = 32 num_workers = 2 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') state = torch.load(checkpoint_path) with open(vocab_path, 'rb') as f: vocab = pickle.load(f) encoder = state['encoder'].to(device) decoder = state['decoder'].to(device) encoder.eval() decoder.eval() transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) val_annotations = os.path.join( caption_dir, "captions_{}.json".format(os.path.basename(val_dir))) val_loader = get_loader(val_dir, val_annotations, vocab, transform, batch_size, shuffle=True, num_workers=num_workers) # ids = val_loader.dataset.ids # val_loader.dataset.ids = np.random.choice(val_loader.dataset.ids, 2000, replace=False) print("Scoring model...") score = bleu_score(val_loader, encoder, decoder, device) print("BLEU-4 SCORE: {:.4f}".format(score * 100))
def main(config): # For fast training. cudnn.benchmark = True # Create directories if not exist. if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.model_save_dir): os.makedirs(config.model_save_dir) if not os.path.exists(config.sample_dir): os.makedirs(config.sample_dir) if not os.path.exists(config.result_dir): os.makedirs(config.result_dir) # Data loader. #celeba_loader = None #rafd_loader = None #if config.dataset in ['CelebA', 'Both']: celeba_loader = get_loader(config.celeba_image_dir, config.attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, 'CelebA', config.mode, config.num_workers) ''' if config.dataset in ['RaFD', 'Both']: rafd_loader = get_loader(config.rafd_image_dir, None, None, config.rafd_crop_size, config.image_size, config.batch_size, 'RaFD', config.mode, config.num_workers) ''' # Solver for training and testing StarGAN. solver = Solver( celeba_loader, config) #solver = Solver(celeba_loader, rafd_loader, config) if config.mode == 'train': #if config.dataset in ['CelebA', 'RaFD']: solver.train() #elif config.dataset in ['Both']: # solver.train_multi() elif config.mode == 'test': #if config.dataset in ['CelebA', 'RaFD']: solver.test()
def __init__(self, config): self.image_size = 299 #Inception net condition self.lr = 0.0001 self.log_step = 100 self.selected_attrs = config.selected_attrs self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.buildIncNet() self.save_incDir = config.inc_net_dir self.pretrained_incNet = config.pretrained_incNet self.dataset = config.dataset self.test_dataset = get_loader(config.celeba_image_dir, config.attr_path, config.selected_attrs, image_size=self.image_size, num_workers=config.num_workers, dataset=config.dataset, mode='test')
def train(model): optimizer = optim.Adam(model.parameters(), lr=1e-3) data_iter = data_loader.get_loader(batch_size=args.batch_size) for epoch in range(args.epochs): model.train() run_loss = 0.0 for idx, data in enumerate(data_iter): data = utils.to_var(data) ret = model.run_on_batch(data, optimizer, epoch) run_loss += ret['loss'].item() print '\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)), evaluate(model, data_iter)
def main(config): # For fast training. cudnn.benchmark = True # Create directories if not exist. if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.model_save_dir): os.makedirs(config.model_save_dir) if not os.path.exists(config.sample_dir): os.makedirs(config.sample_dir) # Data loader. vcc_loader = get_loader(hparams) # Solver for training solver = Solver(vcc_loader, config, hparams) solver.train()
def main(config): svhn_loader, mnist_loader, svhn_test_loader, mnist_test_loader = get_loader(config) solver = Solver(config, svhn_loader, mnist_loader) cudnn.benchmark = True # create directories if not exist if not os.path.exists(config.model_path): os.makedirs(config.model_path) if not os.path.exists(config.sample_path): os.makedirs(config.sample_path) base = config.log_path filename = os.path.join(base, str(config.max_items)) if not os.path.isdir(base): os.mkdir(base) logging.basicConfig(filename=filename, level=logging.DEBUG) if config.mode == 'train': solver.train()
def main(): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_transform = transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224)]) val_loader = get_loader(opts.img_path, val_transform, vocab, opts.data_path, partition='test', batch_size=opts.batch_size, shuffle=False, num_workers=opts.workers, pin_memory=True) print('Validation loader prepared.') test(val_loader)
def main(config): # For fast training cudnn.benchmark = True # Create directories if not exist mkdir(config.log_path) mkdir(config.model_save_path) mkdir(config.sample_path) mkdir(config.result_path) data_loader = get_loader(config.data_path, config.image_size, config.crop_size, config.batch_size, transform=True, dataset='PascalVOC2012', mode=config.mode) # Solver solver = Solver(data_loader, vars(config)) if config.mode == 'train': solver.train() elif config.mode == 'test': solver.test()
def initialize_for_test(params): data_loader = get_loader(params, mode='test') encoder_file = os.path.join(params.encoder_save, 'epoch-%d.pkl' % params.num_epochs) decoder_file = os.path.join(params.decoder_save, 'epoch-%d.pkl' % params.num_epochs) vocab_size = len(data_loader.dataset.vocab) # Initialize the encoder and decoder, and set each to inference mode. encoder = Encoder(params) decoder = Decoder(params, vocab_size) encoder.eval() decoder.eval() # Load the trained weights. encoder.load_state_dict(torch.load(encoder_file)) decoder.load_state_dict(torch.load(decoder_file)) encoder.to(params.device) decoder.to(params.device) return data_loader, encoder, decoder
def main(config): cudnn.benchmark = True data_loader = get_loader(image_path=config.image_path, image_size=config.image_size, batch_size=config.batch_size, num_workers=config.num_workers) solver = Solver(config, data_loader) # Create directories if not exist if not os.path.exists(config.model_path): os.makedirs(config.model_path) if not os.path.exists(config.sample_path): os.makedirs(config.sample_path) # Train and sample the images if config.mode == 'train': solver.train() elif config.mode == 'sample': solver.sample()
def main(): transform = transforms.Compose( [transforms.Resize(30), transforms.ToTensor()]) loader = get_loader("./data/images_background", 1, 10, True, transform) for i, (images_1, images_2, label) in enumerate(loader): print("example %d" % i) draw_image(images_1.numpy()) draw_image(images_2.numpy()) label = label.numpy() if label == 1: print("same") else: print("different") images_1 = images_1.to(device) time.sleep(3) print("\n\n")
def main(config): # For fast training. cudnn.benchmark = True # Create directories if not exist. if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.model_save_dir): os.makedirs(config.model_save_dir) if not os.path.exists(config.sample_dir): os.makedirs(config.sample_dir) # Data loader. train_loader = get_loader(config.train_data_dir, config.batch_size, 'train', num_workers=config.num_workers) test_loader = TestDataset(config.test_data_dir, config.wav_dir, src_spk='p262', trg_spk='p272') # Solver for training and testing StarGAN. solver = Solver(train_loader, test_loader, config) if config.mode == 'train': solver.train()
def extract_features(root, files, transform, batch_size, shuffle, num_workers, model): dataloader = get_loader(root, files, transform, batch_size, shuffle, num_workers) model = model.cuda() model.eval() features = [] imnames = [] n_iters = len(dataloader) for i, (images, names) in enumerate(dataloader): images = Variable(images).cuda() feas = model(images).cpu() features.append(feas.data) imnames.extend(names) if (i + 1) % 100 == 0: print 'iter [%d/%d] finsihed.' % (i, n_iters) return torch.cat(features, 0), imnames
def main(config): cudnn.benchmark = True if not os.path.exists(config.save_path): os.makedirs(config.save_path) if not os.path.exists(config.infer_path): os.makedirs(config.infer_path) num_users, num_items, train_loader, test_loader, infer_loader, num_to_user_id, num_to_item_id \ = get_loader(data_path = config.data_path, train_negs = config.train_negs, test_negs = config.test_negs, batch_size = config.batch_size, num_workers = config.num_workers) solver = Solver(config, num_users, num_items) if config.mode == 'train': solver.train(train_loader, test_loader) elif config.mode == 'infer': solver.infer(infer_loader, num_to_user_id, num_to_item_id)
def main(config): # For fast training cudnn.benchmark = True # Create directories if not exist mkdir(config.log_path) mkdir(config.model_save_path) data_loader = get_loader(config.data_path, batch_size=config.batch_size, mode=config.mode) # Solver solver = Solver(data_loader, vars(config)) if config.mode == 'train': solver.train() elif config.mode == 'test': solver.test() return solver
def model_setup(config): """ Set up the directories and the data before creating the whole model :param config: (Hyperparams) Dictionary of the hyperparameters as a class :return: (nn.Module) The model of the DAGMM """ # For fast training cudnn.benchmark = True # Good if input size doesn't change (bad otherwise) # Create directories if they don't exist make_directory(config.model_save_path) make_directory(config.fig_save_path) # Create data loader data_loader = get_loader(config.data_path, batch_size=config.batch_size, train_ratio=config.train_ratio) # Create Model return Model(data_loader, vars(config))
def train(self, config): train_dataset = get_loader(config.celeba_image_dir, config.attr_path, config.selected_attrs, image_size=self.image_size, num_workers=config.num_workers, dataset=config.dataset) print('Start Training...') start_time = time.time() max_acc, epochs = 0, 50 for p in range(epochs): for i, data in enumerate(train_dataset): img, label = data img = img.to(self.device) label = label.to(self.device) batch_pred = self.inc_net(img) loss = self.classification_loss(batch_pred, label, config.dataset) self.opt.zero_grad() loss.backward() self.opt.step() if i % self.log_step == 0: et = time.time() - start_time et = str(datetime.timedelta(seconds=et))[:-7] acc = self.test() print("Test Accuracy: ", acc) if acc > max_acc: path = os.path.join(self.save_incDir, '{}-{}-incNet.ckpt'.format(p, i)) torch.save(self.inc_net.state_dict(), path) max_acc = acc log = "Elapsed [{}], Epoch[{}] - Iteration [{}/{}] , loss [{}], max_acc[{}]".format( et, p, i + 1, len(train_dataset), loss.item(), max_acc) print(log)
def evaluate(args, device, model, test_dataset, test_dataset_name): tqdm.write(f'evaluating for {test_dataset_name}') tqdm.write('test data size: {}'.format(len(test_dataset))) # Build data loader test_loader = get_loader(test_dataset, args.batch_size, shuffle=False, num_workers=args.num_workers, drop_last=False) criterion = nn.BCEWithLogitsLoss() with torch.no_grad(): loss_values = [] all_predictions = [] all_targets = [] for video_ids, frame_ids, images, targets in tqdm( test_loader, desc=test_dataset_name): images = images.to(device) targets = targets.to(device) outputs = model(images) loss = criterion(outputs, targets) loss_values.append(loss.item()) predictions = outputs > 0.0 all_predictions.append(predictions) all_targets.append(targets) val_loss = sum(loss_values) / len(loss_values) all_predictions = torch.cat(all_predictions).int() all_targets = torch.cat(all_targets).int() test_accuracy = (all_predictions == all_targets ).sum().float().item() / all_targets.shape[0] tqdm.write('Testing results - Loss: {:.3f}, Acc: {:.3f}'.format( val_loss, test_accuracy))
def main(args): with open(os.path.join(args.root_dir, 'vocab.pkl'), 'rb') as f: vocab = pickle.load(f) data_type = args.data_type data_loader = get_loader(args.root_dir, vocab, args.batch_size, data_type, shuffle=True, num_workers=args.num_workers, debug=args.debug) print('Iterating the dataset') print("Length of data loader: " + str(len(data_loader))) for i, (features, captions, lengths) in enumerate(data_loader): print("Index: " + str(i)) #print("Features shape: ") #print(features.shape) print("Captions shape: ") print(captions.shape) print("Lengths: ") print(len(lengths))
def eval(nb_test): train_loader, dataset = get_loader( "dataset/flickr8k/images/", annotation_file="dataset/flickr8k/captions.txt", transform=transform, num_workers=2) loop = tqdm(enumerate(train_loader), total=nb_test, leave=False) embed_size = 256 hidden_size = 256 vocab_size = len(dataset.vocab) num_layers = 1 model = Img2Text(embed_size, hidden_size, vocab_size, num_layers).to(device) checkpoint = torch.load("my_checkpoint.pth.tar") model.load_state_dict(checkpoint["state_dict"]) fig = plt.figure(figsize=(10, 10)) model.eval() with torch.no_grad(): for idx, (imgs, captions) in loop: if idx + 1 == nb_test: break ax = fig.add_subplot(2, 2, idx + 1) predicted_str, predicted_int = model.caption_image( imgs.to(device), dataset.vocab) #[dataset.vocab.itos[idx] for idx in result_caption] captions = [ dataset.vocab.itos[idx] for idx in captions.squeeze(-1).tolist() ] score = bleu_score([predicted_str[1:-1]], [captions]) ax.imshow(imgs.squeeze(0).permute(1, 2, 0)) text = f"CORRECT:{captions[1:-1]}\nPREDICTED:{predicted_str[1:-1]}\nBleu score:{score}" ax.title.set_text(text) plt.show()
def test_multi(self, sample_dir, result_dir): """Translate images using StarGAN trained on multiple datasets.""" # Load the trained generator. # self.restore_model(self.test_iters) config = self.config test_loader = get_loader(config.celeba_image_dir, config.attr_path, config.selected_attrs, config.celeba_crop_size, config.image_size, config.batch_size, 'CelebA', config.mode, sample_dir, config.num_workers) with torch.no_grad(): for i, (x_real, c_org) in enumerate(test_loader): # Prepare input images and target domain labels. x_real = x_real.to(self.device) c_rafd_list = self.create_labels(c_org, self.c2_dim, 'RaFD') zero_celeba = torch.zeros(x_real.size(0), self.c_dim).to( self.device) # Zero vector for CelebA. mask_rafd = self.label2onehot(torch.ones( x_real.size(0)), 2).to(self.device) # Mask vector: [0, 1]. # Translate images. x_fake_list = [x_real] for c_rafd in c_rafd_list: c_trg = torch.cat([zero_celeba, c_rafd, mask_rafd], dim=1) x_fake_list.append(self.G(x_real, c_trg)) x_concat = torch.cat(x_fake_list, dim=3) ii = 0 for l in x_fake_list: result_path = os.path.join(result_dir, '{}-images.jpg'.format(ii + 1)) save_image(self.denorm(l.data.cpu()), result_path, nrow=1, padding=0) ii = ii + 1 print('Saved real and fake images into {}...'.format( result_path))
def main(config): # For fast training. cudnn.benchmark = True # Create directories if not exist. if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.model_save_dir): os.makedirs(config.model_save_dir) if not os.path.exists(config.sample_dir): os.makedirs(config.sample_dir) if not os.path.exists(config.speaker_path): raise Exception(f"speaker list {config.speaker_path} does not exist") with open(config.speaker_path) as f: speakers = json.load(f) print(f"load speakers {speakers}", flush=True) # Data loader. train_loader = get_loader( config.train_data_dir, config.batch_size, config.min_length, 'train', speakers, num_workers=config.num_workers, ) test_loader = TestDataset(config.test_data_dir, config.wav_dir, speakers, src_spk=config.test_src_spk, trg_spk=config.test_trg_spk) # Solver for training and testing StarGAN. solver = Solver(train_loader, test_loader, config) if config.mode == 'train': solver.train()
def main(config): prepare_dirs_and_logger(config) torch.manual_seed(config.random_seed) if config.num_gpu > 0: torch.cuda.manual_seed(config.random_seed) if config.is_train: data_path = config.data_path batch_size = config.batch_size else: if config.test_data_path is None: data_path = config.data_path else: data_path = config.test_data_path batch_size = config.sample_per_image if config.dataset == 'celebA': a_data_loader, b_data_loader = get_celebA_loader( data_path, batch_size, config.input_scale_size, config.style_A, config.style_B, config.constraint, config.constraint_type, config.num_worker, config.skip_pix2pix_processing) else: a_data_loader, b_data_loader = get_loader( data_path, batch_size, config.input_scale_size, config.num_worker, config.skip_pix2pix_processing) trainer = Trainer(config, a_data_loader, b_data_loader) if config.is_train: save_config(config) trainer.train() else: if not config.load_path: raise Exception( "[!] You should specify `load_path` to load a pretrained model" ) trainer.test()
def main(): args = configs() if args.training_instance: args.load_path = os.path.join(args.load_path, args.training_instance) args.summary_path = os.path.join(args.summary_path, args.training_instance) else: args.load_path = os.path.join( args.load_path, "evflownet_{}".format(datetime.now().strftime("%m%d_%H%M%S"))) args.summary_path = os.path.join( args.summary_path, "evflownet_{}".format(datetime.now().strftime("%m%d_%H%M%S"))) if not os.path.exists(args.load_path): os.makedirs(args.load_path) if not os.path.exists(args.summary_path): os.makedirs(args.summary_path) # Fix the random seed for reproducibility. # Remove this if you are using this code for something else! tf.set_random_seed(12345) event_img_loader, prev_img_loader, next_img_loader, _, n_ima = get_loader( args.data_path, args.batch_size, args.image_width, args.image_height, split='train', shuffle=True) print("Number of images: {}".format(n_ima)) trainer = EVFlowNet(args, event_img_loader, prev_img_loader, next_img_loader, n_ima, is_training=True) trainer.train()
def evaluate(model, elogger, files, save_result = False): model.eval() if save_result: fs = open('%s' % args.result_file, 'w') for input_file in files: running_loss = 0.0 data_iter = data_loader.get_loader(input_file, args.batch_size) for idx, (attr, traj) in enumerate(data_iter): attr, traj = utils.to_var(attr), utils.to_var(traj) pred_dict, loss = model.eval_on_batch(attr, traj, config) if save_result: write_result(fs, pred_dict, attr) running_loss += loss.data.item() print('Evaluate on file {}, loss {}'.format(input_file, running_loss / (idx + 1.0))) elogger.log('Evaluate File {}, Loss {}'.format(input_file, running_loss / (idx + 1.0))) if save_result: fs.close()
def main(config): # For fast training. cudnn.benchmark = True # Data loader. train_loader_casia = get_loader(config.train_data_dir_casia, config.target_speaker, config.source_emotion, config.target_emotion, config.batch_size, 'train', num_workers=config.num_workers) test_loader = TestDataset(config.test_data_dir, config.src_wav_dir, config.target_speaker, config.source_emotion, config.target_emotion) # Solver for training and testing StarGAN. solver = Solver(train_loader_casia, test_loader, config, log) if config.mode == 'train': solver.train()
def main(config): if config.gpu > -1: os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu) generator_one = GeneratorCNN_Pose_UAEAfterResidual_256(21, config.z_num, config.repeat_num, config.hidden_num) generator_two = UAE_noFC_AfterNoise(6, config.repeat_num - 2, config.hidden_num) discriminator = DCGANDiscriminator_256(use_gpu=config.use_gpu) if config.use_gpu: generator_one.cuda() generator_two.cuda() discriminator.cuda() L1_criterion = nn.L1Loss() BCE_criterion = nn.BCELoss() gen_train_op1 = optim.Adam(generator_one.parameters(), lr=config.g_lr, betas=(config.beta1, config.beta2)) gen_train_op2 = optim.Adam(generator_two.parameters(), lr=config.g_lr, betas=(config.beta1, config.beta2)) dis_train_op1 = optim.Adam(discriminator.parameters(), lr=config.d_lr, betas=(config.beta1, config.beta2)) pose_loader = data_loader.get_loader(os.path.join(config.data_dir, 'DF_img_pose'), config.batch_size) train(generator_one, generator_two, discriminator, L1_criterion, BCE_criterion, gen_train_op1, gen_train_op2, dis_train_op1, pose_loader, config)
def main(config): # For fast training. cudnn.benchmark = True # Create directories if not exist. if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.model_save_dir): os.makedirs(config.model_save_dir) if not os.path.exists(config.sample_dir): os.makedirs(config.sample_dir) if not os.path.exists(config.result_dir): os.makedirs(config.result_dir) # Data loader. data_loader = get_loader(config.image_dir, config.crop_size, config.image_size, config.batch_size, 'test', config.num_workers) # Solver for training and testing StarGAN. solver = Solver(data_loader, config) solver.test()
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models encoder = EncoderCNN(args.embed_size) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the Models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): # Set mini-batch dataset images = to_var(images, volatile=True) captions = to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) loss.backward() optimizer.step() # Print log info if i % args.log_step == 0: print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' %(epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) # Save the models if (i+1) % args.save_step == 0: torch.save(decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' %(epoch+1, i+1))) torch.save(encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' %(epoch+1, i+1)))
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing, normalization for the pretrained resnet transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models encoder = EncoderCNN(args.embed_size).to(device) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): # Set mini-batch dataset images = images.to(device) captions = captions.to(device) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, backward and optimize features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() # Print log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) # Save the model checkpoints if (i+1) % args.save_step == 0: torch.save(decoder.state_dict(), os.path.join( args.model_path, 'decoder-{}-{}.ckpt'.format(epoch+1, i+1))) torch.save(encoder.state_dict(), os.path.join( args.model_path, 'encoder-{}-{}.ckpt'.format(epoch+1, i+1)))
# (Optional) TODO #2: Amend the image transform below. transform_train = transforms.Compose([ transforms.Resize(256), # smaller edge of image resized to 256 transforms.RandomRotation(5.0), # Rotate the image randomly transforms.RandomCrop(224), # get 224x224 crop from random location transforms.RandomHorizontalFlip(), # horizontally flip image with probability=0.5 transforms.ColorJitter(0.05, 0.05, 0.05), # Jitter the color a little transforms.ToTensor(), # convert the PIL Image to a tensor transforms.Normalize((0.485, 0.456, 0.406), # normalize image for pre-trained model (0.229, 0.224, 0.225))]) # Build data loader. data_loader = get_loader(transform=transform_train, mode='train', batch_size=batch_size, vocab_threshold=vocab_threshold, vocab_from_file=vocab_from_file, cocoapi_loc=COCOPATH) # The size of the vocabulary. vocab_size = len(data_loader.dataset.vocab) # Initialize the encoder and decoder. encoder = EncoderCNN(embed_size) decoder = DecoderRNN(embed_size, hidden_size, vocab_size) # Move models to GPU if CUDA is available. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") encoder.to(device) decoder.to(device)