def main(imgurl): # Load word map (word2ix) with open('input_files/WORDMAP.json', 'r') as j: word_map = json.load(j) rev_word_map = {v: k for k, v in word_map.items()} # ix2word # Load model decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None decoder.load_state_dict( torch.load('output_files/BEST_checkpoint_decoder.pth.tar')) encoder.load_state_dict( torch.load('output_files/BEST_checkpoint_encoder.pth.tar')) decoder = decoder.to(device) decoder.eval() encoder = encoder.to(device) encoder.eval() # Encode, decode with attention and beam search seq, alphas = caption_image_beam_search(encoder, decoder, imgurl, word_map, beam_size=5) alphas = torch.FloatTensor(alphas) # Visualize caption and attention of best sequence # visualize_att(img, seq, alphas, rev_word_map, args.smooth) words = [rev_word_map[ind] for ind in seq] caption = ' '.join(words[1:-1]) visualize_att(imgurl, seq, alphas, rev_word_map)
def fit(t_params, checkpoint=None, m_params=None): # info data_name = t_params['data_name'] imgs_path = t_params['imgs_path'] df_path = t_params['df_path'] vocab = t_params['vocab'] start_epoch = 0 epochs_since_improvement = 0 best_bleu4 = 0 epochs = t_params['epochs'] batch_size = t_params['batch_size'] workers = t_params['workers'] encoder_lr = t_params['encoder_lr'] decoder_lr = t_params['decoder_lr'] fine_tune_encoder = t_params['fine_tune_encoder'] # init / load checkpoint if checkpoint is None: # getting hyperparameters attention_dim = m_params['attention_dim'] embed_dim = m_params['embed_dim'] decoder_dim = m_params['decoder_dim'] encoder_dim = m_params['encoder_dim'] dropout = m_params['dropout'] decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=embed_dim, decoder_dim=decoder_dim, encoder_dim=encoder_dim, vocab_size=len(vocab), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None # load checkpoint else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # move to gpu, if available decoder = decoder.to(device) encoder = encoder.to(device) # loss function criterion = nn.CrossEntropyLoss().to(device) # dataloaders transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) print('Loading Data') train_loader, val_loader = get_loaders(batch_size, imgs_path, df_path, transform, vocab, workers) print('_' * 50) print('-' * 20, 'Fitting', '-' * 20) for epoch in range(start_epoch, epochs): # decay lr is there is no improvement for 8 consecutive epochs and terminate after 20 if epochs_since_improvement == 20: print('No improvement for 20 consecutive epochs, terminating...') break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) print('_' * 50) print('-' * 20, 'Training', '-' * 20) # one epoch of training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # one epoch of validation print('-' * 20, 'Validation', '-' * 20) recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # check for improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print( f'\nEpochs since last improvement: {epochs_since_improvement,}' ) else: # reset epochs_since_improvement = 0 save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch,data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adamax(params=filter(lambda p: p.requires_grad, decoder.parameters())) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] # Move to GPU, if available decoder = decoder.to(device) # Loss functions criterion_ce = nn.CrossEntropyLoss().to(device) criterion_dis = nn.MultiLabelMarginLoss().to(device) # Custom dataloaders train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, decoder=decoder, criterion_ce = criterion_ce, criterion_dis=criterion_dis, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, decoder=decoder, criterion_ce=criterion_ce, criterion_dis=criterion_dis) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, decoder,decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ # In Python, global keyword allows you to modify the variable outside of the current scope. # It is used to create a global variable and make changes to the variable in a local context. ''' The basic rules for global keyword in Python are: When we create a variable inside a function, it is local by default. When we define a variable outside of a function, it is global by default. You don't have to use global keyword. We use global keyword to read and write a global variable inside a function. Use of global keyword outside a function has no effect. ''' global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) ''' The filter() method constructs an iterator from elements of an iterable for which a function returns true. The filter() method takes two parameters: function - function that tests if elements of an iterable returns true or false If None, the function defaults to Identity function - which returns false if any elements are false iterable - iterable which is to be filtered, could be sets, lists, tuples, or containers of any iterators The filter() method returns an iterator that passed the function check for each element in the iterable. ''' decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # If there's no improvement in Bleu score for 20 epochs then stop training if epochs_since_improvement == 20: break # If there's no improvement in Bleu score for 8 epochs lower the lr if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) train_annotations = COCO( os.path.join('dataset', 'annotations', 'instances_train2014.json')) val_annotations = COCO( os.path.join('dataset', 'annotations', 'instances_val2014.json')) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( CaptionDatasetFastTextWithReplacementCV( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize]), train_annotations=train_annotations, val_annotations=val_annotations), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, collate_fn=my_collate) val_loader = torch.utils.data.DataLoader(CaptionDatasetFastText( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize]), train_annotations=train_annotations, val_annotations=val_annotations), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, collate_fn=my_collate) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 5: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best) #Show timestamps of the end of each epoch now = datetime.now() print("now =", now) # dd/mm/YY H:M:S dt_string = now.strftime("%d/%m/%Y %H:%M:%S") print("date and time = ", dt_string)
def main(): """ Training and validation. """ global best_bleu4, use_amp, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map #use_amp = True #print("Using amp for mized precision training") # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) # use mixed precision training using Nvidia Apex if use_amp: decoder, decoder_optimizer = amp.initialize( decoder, decoder_optimizer, opt_level="O2", keep_batchnorm_fp32=True, loss_scale="dynamic") encoder = encoder.to(device) if not encoder_optimizer: print("Encoder is not being optimized") elif use_amp: encoder, encoder_optimizer = amp.initialize( encoder, encoder_optimizer, opt_level="O2", keep_batchnorm_fp32=True, loss_scale="dynamic") # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
# Read word map word_map_file = os.path.join('/scratch/scratch2/adsue/caption_dataset', 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder.load_state_dict( torch.load('/scratch/scratch2/adsue/pretrained/decoder_dict.pkl')) decoder = decoder.to(device) decoder.eval() encoder = Encoder() encoder.load_state_dict( torch.load('/scratch/scratch2/adsue/pretrained/encoder_dict.pkl')) encoder = encoder.to(device) encoder.eval() ########################################################################################################################## imsize = 256 image_transform = transforms.Compose( [transforms.Scale(int(imsize * 76 / 64)), transforms.RandomCrop(imsize)]) norm = transforms.Compose([
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map, glove_path, emb_dim, rev_word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) rev_word_map = {v: k for k, v in word_map.items()} #get glove vectors = bcolz.open(f'{glove_path}/6B.300.dat')[:] words = pickle.load(open(f'{glove_path}/6B.300_words.pkl', 'rb')) word2idx = pickle.load(open(f'{glove_path}/6B.300_idx.pkl', 'rb')) glove = {w: vectors[word2idx[w]] for w in words} matrix_len = len(word_map) weights_matrix = np.zeros((matrix_len, emb_dim)) words_found = 0 for i, word in enumerate(word_map.keys()): try: weights_matrix[i] = glove[word] words_found += 1 except KeyError: weights_matrix[i] = np.random.normal(scale=0.6, size=(emb_dim, )) # weights_matrix = np.float64(weights_matrix) # weights_matrix = torch.from_numpy(weights_matrix) # pretrained_embedding = weights_matrix.to(dtype=torch.float) # print(pretrained_embedding.dtype) # if device.type == 'cpu' : # pretrained_embedding = torch.FloatTensor(weights_matrix) # else: # pretrained_embedding = torch.cuda.FloatTensor(weights_matrix) pretrained_embedding = torch.FloatTensor(weights_matrix) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder.load_pretrained_embeddings( pretrained_embedding ) # pretrained_embeddings should be of dimensions (len(word_map), emb_dim) decoder.fine_tune_embeddings(True) # or False decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training # train(train_loader=train_loader, # encoder=encoder, # decoder=decoder, # criterion=criterion, # encoder_optimizer=encoder_optimizer, # decoder_optimizer=decoder_optimizer, # epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: emb_dim=100 #remove if not usiong pretrained model decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) pretrained_embeddings = decoder.create_pretrained_embedding_matrix(word_map) decoder.load_pretrained_embeddings( pretrained_embeddings) # pretrained_embeddings should be of dimensions (len(word_map), emb_dim) decoder.fine_tune_embeddings(True) decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4, val_loss_avg, val_accu_avg = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) #write to tensorboard writer.add_scalar('validation_loss', val_loss_avg, epoch) writer.add_scalar('validation_accuracy', val_accu_avg, epoch) writer.add_scalar('validation_bleu4', recent_bleu4, epoch) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint print("Saving model to file",ckpt_name.format(epoch, bleu=recent_bleu4, loss=val_loss_avg, acc=val_accu_avg)) save_checkpoint(ckpt_name.format(epoch, bleu=recent_bleu4, loss=val_loss_avg, acc=val_accu_avg), epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best) #close tensorboard writer writer.close()
def main(args): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map data_folder = '{:s}_folder'.format(args.dataset) # folder with data files saved by create_input_files.py data_name = '{:s}_5_cap_per_img_5_min_word_freq'.format(args.dataset) # base name shared by data files # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout, adaptive_att=args.adaptive) decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) if args.adaptive: encoder = Adaptive_Encoder(encoded_image_size=14, embed_dim=emb_dim, decoder_dim=decoder_dim) else: encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, pin_memory=False) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, pin_memory=False) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map, role_map #print('reading word map') # Read word map word_map_file = os.path.join(data_folder, 'token2id' + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) #print('reading role map') role_map_file = os.path.join(data_folder, 'roles2id' + '.json') with open(role_map_file, 'r') as j: role_map = json.load(j) #print('initializing..') # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), role_vocab_size=len(role_map), role_embed_dim=role_dim, dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] #best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) #print('creating encoder/decoder..') #encoder = nn.DataParallel(encoder,device_ids=[0,1]) #decoder = nn.DataParallel(decoder,device_ids=[0,1]) # Loss function criterion = nn.CrossEntropyLoss().to(device) #print('creating dataloader..') # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(FrameDataset( data_folder, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # val_loader = torch.utils.data.DataLoader( # FrameDataset(data_folder, 'VAL', transform=transforms.Compose([normalize])), # batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # decay learning rate somehow # One epoch's training #print('start training') train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) print('start validation..')
def main(): """Training and validation.""" global best_cidr, epochs_since_improvement, checkpoint, start_epoch, data_name, word_map # Read word map with open(word_map_file, 'r') as j: word_map = json.load(j) rev_word_map = {v: k for k, v in word_map.items()} # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adamax(params=filter(lambda p: p.requires_grad, decoder.parameters())) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_cidr = 0 decoder = checkpoint['decoder'] decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=5*1e-5) # Move to GPU, if available decoder = decoder.to(device) # Custom dataloaders train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL'), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 200: break #if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: #adjust_learning_rate(decoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, decoder=decoder, decoder_optimizer=decoder_optimizer, epoch=epoch, rev_word_map=rev_word_map) # One epoch's validation recent_cidr = validate(val_loader=val_loader, decoder=decoder,rev_word_map=rev_word_map) # Check if there was an improvement is_best = recent_cidr > best_cidr best_cidr = max(recent_cidr, best_cidr) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, 0, epochs_since_improvement, decoder,decoder_optimizer, recent_cidr, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map checkpoint = 'BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar' # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) #CVAE encoderVae = EncodeVAE() encoderVae_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) decoderVae = DecodeVAE() decoderVae_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) else: print('load ' + checkpoint) checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoderVae = checkpoint['encoderVae'] decoderVae = checkpoint['decoderVae'] encoderVae_optimizer = checkpoint['encoderVae_optimizer'] decoderVae_optimizer = checkpoint['decoderVae_optimizer'] # Move to GPU, if available decoder = decoder.to(device) encoderVae = encoderVae.to(device) decoderVae = decoderVae.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) criterion_VAE = nn.BCELoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, pin_memory=True) #num_workers=workers, val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, pin_memory=True) #num_workers=workers, # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, decoder=decoder, encoderVae=encoderVae, decoderVae=decoderVae, criterion=criterion, criterion_VAE=criterion_VAE, decoder_optimizer=decoder_optimizer, encoderVae_optimizer=encoderVae_optimizer, decoderVae_optimizer=decoderVae_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, decoder, encoderVae, decoderVae, decoder_optimizer, encoderVae_optimizer, decoderVae_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map, lowest_loss_val # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Train N models and save them to each directory for n in range(1, args.num_models + 1): # Directory where the model will be saved model_out = os.path.join(args.model, "model_{}".format(n)) try: os.mkdir(model_out) except: pass # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 20 consecutive epochs # and terminate training after 50 consecutive epochs if epochs_since_improvement == 50: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_loss, recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement using bleu is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) # Check if there was an improvement using loss #is_best = recent_loss < lowest_loss_val #lowest_loss_val = min(recent_loss, lowest_loss_val) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 #save_checkpoint_with_dir(model_out, data_name, epoch, epochs_since_improvement, # encoder, decoder, encoder_optimizer, # decoder_optimizer, lowest_loss_val, is_best) save_checkpoint_with_dir(model_out, data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best) # Delete encoder&decoder objects and reset memory del decoder del encoder torch.cuda.empty_cache() # Reset epochs since improvement to 0 for a new round of training epochs_since_improvement = 0 best_bleu4, start_epoch = 0, 0 check_point = None fine_tune_encoder = False
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Load Pretrained Embeddings and compare to Wordmap if True, otherwise reload the pickle file if reload_pretrained_embed == True: embeddings_index = dict() fid = open(pretrained_embeddings_file, encoding="utf8") for line in fid: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs fid.close() pretrained_embeddings = torch.zeros((len(word_map) + 1, emb_dim)) for word, idx in word_map.items(): embed_vector = embeddings_index.get(word) if embed_vector is not None: # words not found in embedding index will be all-zeros. pretrained_embeddings[idx] = torch.from_numpy(embed_vector) else: pretrained_embeddings[idx] = torch.from_numpy( np.random.uniform(-1, 1, emb_dim)) # print(pretrained_embeddings[0:2, :]) # fid = open("embedding_matrix.pkl","wb") # dump(pretrained_embeddings, fid) # fid.close() # else: # pretrained_embeddings = open(pretrained_embedding_matrix, "wb") # print('Successfully Loaded Pretrained Embeddings Pickle') # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) # decoder.load_pretrained_embeddings(pretrained_embeddings) # pretrained_embeddings should be of dimensions (len(word_map), emb_dim) # decoder.fine_tune_embeddings(True) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=args['decoder_lr']) # encoder = vgg_face_dag() #VGG Face encoder = Encoder() #OG Encoder # encoder.cuda() # print(summary(encoder, (3, 224, 224))) # print('ENCODER SUMMARY') encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=args['encoder_lr']) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=args['encoder_lr']) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #OG figures # normalize = transforms.Normalize(mean= [129.186279296875, 104.76238250732422, 93.59396362304688], #VGG Face figures # std= [1, 1, 1]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=args['batch_size'], shuffle=True, num_workers=workers, pin_memory=True) # print('validation_loader') val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=args['batch_size'], shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, args['epochs']): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 15 if epochs_since_improvement == 15: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation # print('validation_loader_2') recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion, epoch=epoch) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 tensorboard_writer.add_scalar('BLEU-4/epoch', recent_bleu4, epoch) # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best) PATH = './cifar_net.pth' tensorboard_writer.close() print('Task ID number is: {}'.format(task.id))
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map if dual_encoder: # this is always initialized with pre-trained models: print("DUAL ENCODER") if dual_encoder_checkpoint is not None: print('Loaded Dual Encoder Checkpoint') dual_branch_checkpoint = torch.load(checkpoint, map_location='cuda:0') encoder = dual_branch_checkpoint['encoder'] decoder = dual_branch_checkpoint['decoder'] decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) else: main_branch_checkpoint = torch.load(checkpoint, map_location='cuda:0') encoder = DualEncoder(sketch_resnet=sketch_encoder_resnet) encoder.m_resnet = main_branch_checkpoint['encoder'].resnet print("Use pre-trained resnet") # encoder.m_adaptive_pool = main_branch_checkpoint['encoder'].adaptive_pool decoder = main_branch_checkpoint['decoder'] decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) if fine_tune_encoder is True: print("!!! Will fine tune Encoder !!!") encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) else: encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) else: # following method is for One Encoder architecture # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder(specify_resnet=main_encoder_resnet) encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint, map_location='cuda:0') # start_epoch = checkpoint['epoch'] + 1 # epochs_since_improvement = checkpoint['epochs_since_improvement'] # best_bleu4 = checkpoint['bleu-4'] this metric is unfair when we switch to a different domain decoder = checkpoint['decoder'] # decoder_optimizer = checkpoint['decoder_optimizer'] decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) if main_encoder_resnet is not None: encoder = Encoder( specify_resnet=main_encoder_resnet ) # specify here so the encoder remove the last 2 layers of resnet encoder.adaptive_pool = checkpoint['encoder'].adaptive_pool else: encoder = checkpoint['encoder'] # encoder_optimizer = checkpoint['encoder_optimizer'] # if fine_tune_encoder is True and encoder_optimizer is None: if fine_tune_encoder is True: print("Will fine tune Encoder") encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) else: encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # data augmention for nycc dataset augment = transforms.Compose([ transforms.RandomAffine(20, (0.1, 0.1), (0.8, 1.2)), transforms.RandomHorizontalFlip(p=0.5) ]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([augment, normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 # if epochs_since_improvement == 40: # break # if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: # adjust_learning_rate(decoder_optimizer, 0.8) # if fine_tune_encoder: # adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion, epoch=epoch) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint print(" *** saving model with bleu score: ", recent_bleu4) save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best) print(" *** LAST EPOCH saving model with bleu score: ", recent_bleu4) save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ 训练和验证 """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # 读入词典 word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # 初始化/加载模型 if checkpoint is None: decoder = DecoderWithAttention(hidden_size=hidden_size, vocab_size=len(word_map), attention_dim=attention_dim, embed_size=emb_dim, dropout=dropout) decoder_optimizer = torch.optim.Adam(params=decoder.parameters(), lr=decoder_lr, betas=(0.8, 0.999)) encoder = Encoder(hidden_size=hidden_size, embed_size=emb_dim, dropout=dropout) # 是否微调 encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr, betas=(0.8, 0.999)) if fine_tune_encoder else None else: #载入checkpoint checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: # 如果此时要开始微调,需要定义优化器 encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr, betas=(0.8, 0.999)) # 移动到GPU decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #ImageNet # pin_memory = True 驻留内存,不换进换出 train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): if epoch > 15: adjust_learning_rate(decoder_optimizer, epoch) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, epoch) # Early Stopping if the validation score does not imporive for 6 consecutive epochs if epochs_since_improvement == 6: break # 一个epoch的训练 train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch, vocab_size=len(word_map)) # 一个epoch的验证 recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # 检查是否有提升 is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # 保存模型 save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint decoder = Fine_Tune_DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) val_decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None g_remover = RemoveGenderRegion() if checkpoint is not None: if is_cpu: checkpoint = torch.load(checkpoint, map_location='cpu') else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = load_parameter(checkpoint['decoder'], decoder) encoder = load_parameter(checkpoint['encoder'], encoder) # decoder_optimizer = checkpoint['decoder_optimizer'] decoder_optimizer = load_parameter(checkpoint['decoder_optimizer'], decoder_optimizer) #encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder_optimizer = load_parameter(checkpoint['encoder_optimizer'], encoder_optimizer) if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if freeze_decoder_lstm: decoder.freeze_LSTM(freeze=True) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) g_remover = g_remover.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # fix CUDA bug if not is_cpu: for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() ''' train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) ''' if not supervised_training: train_loader = torch.utils.data.DataLoader( Fine_Tune_CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) else: train_loader = torch.utils.data.DataLoader( Fine_Tune_CaptionDataset_With_Mask(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) if not supervised_training: # One epoch's training self_guided_fine_tune_train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, g_remover=g_remover, epoch=epoch) else: supervised_guided_fine_tune_train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, g_remover=g_remover, epoch=epoch) # One epoch's validation val_decoder = load_parameter(decoder, val_decoder) val_decoder = val_decoder.to(device) recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=val_decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best, checkpoint_savepath)
def main(): """ Training and validation. """ global checkpoint, start_epoch, fine_tune_encoder # Initialize / load checkpoint if checkpoint is None: encoder = Encoder() print(encoder) encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=vocab_size, encoder_dim=encoder_dim, dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # customized dataloader MyDataset = DualLoadDatasets(imgsz, txt_folder, img_folder, bin_folder, split, Gfiltersz, Gblursigma) #drop the last batch since it is not divisible by batchsize train_loader = torch.utils.data.DataLoader(MyDataset, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, drop_last=True) # val_loader = torch.utils.data.DataLoader( # CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), # batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Save checkpoint epoch = 0 save_checkpoint(epoch, encoder, decoder, encoder_optimizer, decoder_optimizer) print('saving models to models/checkpoint') # Epochs for epoch in range(start_epoch, epochs): #print(image_transforms) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, transform=transform, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # Save checkpoint save_checkpoint(epoch, encoder, decoder, encoder_optimizer, decoder_optimizer) print('saving models to models/checkpoint')
def fit(t_params, checkpoint=None, m_params=None, logger=None): # info data_name = t_params['data_name'] imgs_path = t_params['imgs_path'] df_path = t_params['df_path'] vocab = t_params['vocab'] start_epoch = 0 epochs_since_improvement = 0 best_bleu4 = 0 epochs = t_params['epochs'] batch_size = t_params['batch_size'] workers = t_params['workers'] encoder_lr = t_params['encoder_lr'] decoder_lr = t_params['decoder_lr'] fine_tune_encoder = t_params['fine_tune_encoder'] # pretrained word embeddings pretrained_embeddings = t_params['pretrained_embeddings'] if pretrained_embeddings: fine_tune_embeddings = t_params['fine_tune_embeddings'] embeddings_matrix = m_params['embeddings_matrix'] # init / load checkpoint if checkpoint is None: # getting hyperparameters attention_dim = m_params['attention_dim'] embed_dim = m_params['embed_dim'] decoder_dim = m_params['decoder_dim'] encoder_dim = m_params['encoder_dim'] dropout = m_params['dropout'] decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=embed_dim, decoder_dim=decoder_dim, encoder_dim=encoder_dim, vocab_size=len(vocab), dropout=dropout) if pretrained_embeddings: decoder.load_pretrained_embeddings( torch.tensor(embeddings_matrix, dtype=torch.float32)) decoder.fine_tune_embeddings(fine_tune=fine_tune_embeddings) decoder_optimizer = torch.optim.RMSprop(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.RMSprop( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None # load checkpoint else: checkpoint = torch.load(checkpoint) print('Loaded Checkpoint!!') start_epoch = checkpoint['epoch'] + 1 print(f"Starting Epoch: {start_epoch}") epochs_since_improvement = checkpoint['epochs_since_imrovment'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['deocder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.RMSprop(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Schedulers decoder_scheduler = ReduceLROnPlateau(decoder_optimizer, patience=2, verbose=True) if fine_tune_encoder: encoder_scheduler = ReduceLROnPlateau(encoder_optimizer, patience=2, verbose=True) # move to gpu, if available decoder = decoder.to(device) encoder = encoder.to(device) # loss function criterion = nn.CrossEntropyLoss().to(device) # dataloaders transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) print('Loading Data') train_loader, val_loader = get_loaders(batch_size, imgs_path, df_path, transform, vocab, False, workers) print('_' * 50) print('-' * 20, 'Fitting', '-' * 20) for epoch in range(start_epoch, epochs): # if epochs_since_improvement > 0 and epochs_since_improvement % 2 == 0: # adjust_learning_rate(decoder_optimizer, 0.8) # if fine_tune_encoder: # adjust_learning_rate(encoder_optimizer, 0.8) print('_' * 50) print('-' * 20, 'Training', '-' * 20) # one epoch of training epoch_time = AverageMeter() start_time = time.time() train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch, logger=logger) epoch_time.update(time.time() - start_time) print(f"Epoch train time {epoch_time.val:.3f} (epoch_time.avg:.3f)") # one epoch of validation epoch_time = AverageMeter() start_time = time.time() print('-' * 20, 'Validation', '-' * 20) b1, b2, b3, recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion, vocab=vocab, epoch=epoch, logger=logger) epoch_time.update(time.time() - start_time) # tensorboard logger.add_scalar(f'b-1/valid', b1, epoch) logger.add_scalar(f'b-2/valid', b2, epoch) logger.add_scalar(f'b-3/valid', b3, epoch) logger.add_scalar(f'b-4/valid', recent_bleu4, epoch) # logger.add_scalar(f'Meteor/valid', m, epoch) print( f"Epoch validation time {epoch_time.val:.3f} (epoch_time.avg:.3f)") # check for improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print( f'\nEpochs since last improvement: {epochs_since_improvement,}' ) else: # reset epochs_since_improvement = 0 # stop training if no improvement for 5 epochs if epochs_since_improvement == 5: print('No improvement for 5 consecutive epochs, terminating...') break # learning rate schedular decoder_scheduler.step(recent_bleu4) if fine_tune_encoder: encoder_scheduler.step(recent_bleu4) save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map #word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') #with open(word_map_file, 'r') as j: # word_map = json.load(j) with open("/content/image_captioning/Image-Captioning-Codebase/vocab.pkl", "rb") as f: vocab = pickle.load(f) word_map = vocab.word2idx # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose( [ # smaller edge of image resized to 256 transforms.Resize( (224, 224)), # get 224x224 crop from random location transforms.RandomHorizontalFlip( ), # horizontally flip image with probability=0.5 transforms.ToTensor(), # convert the PIL Image to a tensor transforms.Normalize( (0.485, 0.456, 0.406), # normalize image for pre-trained model (0.229, 0.224, 0.225)) ]) """ train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) """ train_loader = torch.utils.data.DataLoader( Flickr8kDataset(annot_path="/content/", img_path="/content/Flicker8k_Dataset/", \ split="train", transform=transform_train), \ batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( Flickr8kDataset(annot_path="/content/", img_path="/content/Flicker8k_Dataset/", \ split="dev", transform=transform_train), \ batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): parser = argparse.ArgumentParser(description='caption model') parser.add_argument('--save_dir', type=str, default='logs/tmp', help='directory of model save') # 数据集参数 parser.add_argument('--data_folder', type=str, default='./datasets/caption_data', help='caption dataset folder') parser.add_argument('--data_name', type=str, default='flickr8k_5_cap_per_img_5_min_word_freq', help='dataset name [coco, flickr8k, flickr30k]') parser.add_argument('--batch_size', type=int, default=32, help='training batch size') parser.add_argument('--print_freq', type=int, default=100, help='print training state every n times') parser.add_argument( '--num_workers', type=int, default=0, #8, help='number of data loader workers ') parser.add_argument('--epochs', type=int, default=120, help='total training epochs') parser.add_argument('--grad_clip', type=float, default=5., help='number of gradient clip') parser.add_argument('--alpha_c', type=float, default=1., help='ratio of attention matrix') parser.add_argument('--encoder_lr', type=float, default=1e-4, help='encoder learning rate') parser.add_argument('--decoder_lr', type=float, default=4e-4, help='decoder learning rate') # 模型参数 parser.add_argument('--attention_dim', type=float, default=512, help='dimension of attention') parser.add_argument('--embed_dim', type=float, default=512, help='dimension of word embedding') parser.add_argument('--decoder_dim', type=float, default=512, help='dimension of decoder') #default=2048, help='dimension of decoder') parser.add_argument('--dropout', type=float, default=0.5, help='rate of dropout') parser.add_argument('-frz', '--freeze_encoder', action='store_true', help='whether freeze encoder parameters') args = parser.parse_args() mkdir_if_missing(args.save_dir) log_path = os.path.join(args.save_dir, 'log.txt') with open(log_path, 'w') as f: f.write('{}\n'.format(args)) # 定义训练集的数据增强操作和验证集的数据增强操作 # 图片的大小都已经 resize 到 256 x 256 # 训练集和验证集都只需要将图片转换成 Tensor,然后用 ImageNet 的 mean 和 std 做标准化 tfms = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = CaptionDataset(args.data_folder, args.data_name, split='TRAIN', transform=tfms) val_dataset = CaptionDataset(args.data_folder, args.data_name, split='VAL', transform=tfms) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) # suffle:打乱 num_workers:数据加载的子进程数量 val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) word_map_file = os.path.join(args.data_folder, 'WORDMAP_' + args.data_name + '.json') with open(word_map_file, 'r') as f: word_map = json.load(f) # 初始化模型 encoder = Encoder() encoder.freeze_params(args.freeze_encoder) decoder = DecoderWithAttention(attention_dim=args.attention_dim, embed_dim=args.embed_dim, decoder_dim=args.decoder_dim, vocab_size=len(word_map), dropout=args.dropout) # 定义 Encoder 和 Decoder 的优化器 encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=args.encoder_lr) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=args.decoder_lr) # 把模型放到 GPU 上 encoder = encoder.to(device) decoder = decoder.to(device) criterion = nn.CrossEntropyLoss() train(args=args, train_loader=train_loader, val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, log_path=log_path)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map, rev_word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) rev_word_map = {v: k for k, v in word_map.items()} # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) pretrained_embs, pretrained_embs_dim = load_embeddings( '/home/Iwamura/datasets/datasets/GloVe/glove.6B.300d.txt', word_map) assert pretrained_embs_dim == decoder.embed_dim decoder.load_pretrained_embeddings(pretrained_embs) decoder.fine_tune_embeddings(True) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder_opt = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_opt.fine_tune(fine_tune_encoder_opt) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None encoder_optimizer_opt = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder_opt.parameters()), lr=encoder_opt_lr) if fine_tune_encoder_opt else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder_opt = checkpoint['encoder_opt'] encoder_optimizer_opt = checkpoint['encoder_optimizer_opt'] # if fine_tune_encoder is True and encoder_optimizer is None and encoder_optimizer_opt is None if fine_tune_encoder_opt is True and encoder_optimizer_opt is None: encoder_opt.fine_tune(fine_tune_encoder_opt) encoder_optimizer_opt = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder_opt.parameters()), lr=encoder_opt_lr) # Move to GPU, if available decoder = decoder.to(device) encoder_opt = encoder_opt.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize_opt = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize_opt])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize_opt])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 10: break if epoch > 0 and epoch % 4 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder_opt: adjust_learning_rate(encoder_optimizer_opt, 0.8) # One epoch's training train(train_loader=train_loader, encoder_opt=encoder_opt, decoder=decoder, criterion=criterion, encoder_optimizer_opt=encoder_optimizer_opt, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder_opt=encoder_opt, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder_opt, decoder, encoder_optimizer_opt, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map word_map_file = os.path.join(data_folder, "WORDMAP_" + data_name + ".json") with open(word_map_file, "r") as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention( attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout, ) decoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr, ) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = (torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr, ) if fine_tune_encoder else None) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint["epoch"] + 1 epochs_since_improvement = checkpoint["epochs_since_improvement"] best_bleu4 = checkpoint["bleu-4"] decoder = checkpoint["decoder"] decoder_optimizer = checkpoint["decoder_optimizer"] encoder = checkpoint["encoder"] encoder_optimizer = checkpoint["encoder_optimizer"] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr, ) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, "TRAIN", transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, ) val_loader = torch.utils.data.DataLoader( CaptionDataset(data_folder, data_name, "VAL", transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, ) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train( train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch, ) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint( data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best, )
def main(): """ Training and validation. """ parser = argparse.ArgumentParser() parser.add_argument( "--data_folder", default='data/', type=str, help="folder with data files saved by create_input_files.py") parser.add_argument("--data_name", default='coco_5_cap_per_img_5_min_word_freq', type=str, help="base name shared by data files") parser.add_argument("--output_dir", default='saved_models/', type=str, help="path to save checkpoints") parser.add_argument("--checkpoint", default=None, type=str, help="path to checkpoint") parser.add_argument("--emb_dim", default=512, type=int, help="dimension of word embeddings") parser.add_argument("--attention_dim", default=512, type=int, help="dimension of attention linear layers") parser.add_argument("--decoder_dim", default=512, type=int, help="dimension of decoder RNN") parser.add_argument("--dropout", default=0.5, type=float, help="dimension of word embeddings") parser.add_argument("--start_epoch", default=0, type=int) parser.add_argument( "--epochs", default=120, type=int, help= "number of epochs to train for (if early stopping is not triggered)") parser.add_argument("--batch_size", default=128, type=int, help="batch size for training and testing") parser.add_argument("--workers", default=8, type=int, help="num of workers for data-loading") parser.add_argument("--encoder_lr", default=1e-4, type=float) parser.add_argument("--decoder_lr", default=5e-4, type=float) parser.add_argument("--grad_clip", default=5, type=float, help="clip gradients at an absolute value of") parser.add_argument( "--alpha_c", default=1, type=int, help= "regularization parameter for 'doubly stochastic attention', as in the paper" ) parser.add_argument( "--print_freq", default=100, type=int, help="print training/validation stats every __ batches") parser.add_argument("--fine_tune_encoder", action='store_true', help="Whether to finetune the encoder") args = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") args.device = device best_bleu4 = 0 epochs_since_improvement = 0 # Read word map word_map_file = os.path.join(args.data_folder, 'WORDMAP_' + args.data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if args.checkpoint is None: decoder = DecoderWithAttention(attention_dim=args.attention_dim, embed_dim=args.emb_dim, decoder_dim=args.decoder_dim, vocab_size=len(word_map), dropout=args.dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=args.decoder_lr) encoder = Encoder() encoder.fine_tune(args.fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=args.encoder_lr) if args.fine_tune_encoder else None else: checkpoint = torch.load(args.checkpoint) args.start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if args.fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(args.fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=args.encoder_lr) # Move to GPU, if available decoder = decoder.to(args.device) encoder = encoder.to(args.device) # Loss function criterion = nn.CrossEntropyLoss(ignore_index=0).to(args.device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( args.data_folder, args.data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print(f'train dataset length {len(train_loader)}') val_loader = torch.utils.data.DataLoader(CaptionDataset( args.data_folder, args.data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print(f'val dataset length {len(val_loader)}') # Epochs for epoch in range(args.start_epoch, args.epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if args.fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch, args=args) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion, word_map=word_map, args=args) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(args.data_name, args.output_dir, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder if checkpoint is None: decoder = DecoderWithAttention( attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, # vocab_size=len(word_map), vocab_size=2, # X, Y coordinates and use it for regression dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) decoderMulti = network.__dict__['MultiTask'](output_size) decoderMulti_optimizer = torch.optim.Adam( decoderMulti.parameters(), lr=decoderMulti_lr, weight_decay=decoderMulti_lr_weight_decay) encoder = Encoder() encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best = checkpoint['b4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] decoderMulti = checkpoint['decoderMulti'] decoderMulti_optimizer = checkpoint['decoderMulti_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available if multiGpu: decoder = torch.nn.DataParallel(decoder).to(device) encoder = torch.nn.DataParallel(encoder).to(device) decoderMulti = torch.nn.DataParallel(decoderMulti).to(device) else: decoder = decoder.to(device) encoder = encoder.to(device) decoderMulti = decoderMulti.to(device) # Loss function criterionBinary = nn.BCELoss().to(device) criterionMse = nn.MSELoss().to(device) criterion = [criterionBinary, criterionMse] # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader_p = torch.utils.data.DataLoader(fiberDataset_COCO( data_folder_p, jason_file_p, image_folder, offset_folder, transforms.Compose([transforms.ToTensor(), normalize]), True), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, drop_last=True) val_loader_p = torch.utils.data.DataLoader(fiberDataset_COCO( data_folder_val, jason_file_val, image_folder_val, offset_folder_val, transforms.Compose([transforms.ToTensor(), normalize]), True), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, drop_last=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) train(train_loader=train_loader_p, encoder=encoder, decoder=decoder, decoderMulti=decoderMulti, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, decoderMulti_optimizer=decoderMulti_optimizer, epoch=epoch) recent = validate(val_loader=val_loader_p, encoder=encoder, decoder=decoder, decoderMulti=decoderMulti, criterion=criterion) # # # Check if there was an improvement is_best = recent < best best = min(recent, best) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 epochs_since_improvement = 0 # Save checkpoint save_checkpoint(save_weights_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, decoderMulti, decoderMulti_optimizer, best, is_best)
def main(checkpoint, tienet): """ Training and validation. """ global best_bleu4, epochs_since_improvement, start_epoch, fine_tune_encoder, data_name, word_map if checkpoint: dest_dir = checkpoint checkpoint = os.path.join( dest_dir, 'checkpoint_mimiccxr_1_cap_per_img_5_min_word_freq.pth.tar' ) # path to checkpoint, None if none else: dest_dir = os.path.join( '/data/medg/misc/liuguanx/TieNet/models', datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S-%f')) os.makedirs(dest_dir) checkpoint = None # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None if (tienet): jointlearner = JointLearning(num_global_att=num_global_att, s=s, decoder_dim=decoder_dim, label_size=label_size) jointlearner_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, jointlearner.parameters()), lr=jointlearning_lr) else: jointlearner = None jointlearner_optimizer = None else: checkpoint = torch.load(checkpoint) print('checkpoint loaded') start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['best_bleu'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] jointlearner = checkpoint['jointlearner'] jointlearner_optimizer = checkpoint['jointlearner_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available if torch.cuda.device_count() > 1: print('Using', torch.cuda.device_count(), 'GPUs') # decoder = nn.DataParallel(decoder) encoder = nn.DataParallel(encoder, device_ids=[1]) if tienet: jointlearner = nn.DataParallel(jointlearner, device_ids=[1]) decoder = decoder.to(device) encoder = encoder.to(device) if tienet: jointlearner = jointlearner.to(device) # Loss function criterion_R = nn.CrossEntropyLoss().to(device) criterion_C = nn.BCEWithLogitsLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if tienet: adjust_learning_rate(jointlearner_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, jointlearner=jointlearner, criterion_R=criterion_R, criterion_C=criterion_C, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, jointlearner_optimizer=jointlearner_optimizer, epoch=epoch, dest_dir=dest_dir, tienet=tienet) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, jointlearner=jointlearner, criterion_R=criterion_R, criterion_C=criterion_C, tienet=tienet) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, jointlearner, encoder_optimizer, decoder_optimizer, jointlearner_optimizer, recent_bleu4, best_bleu4, is_best, dest_dir)
checkpoint = 'logs/tmp/BEST_MODEL.pth.tar' device = torch.device("cuda" if torch.cuda.is_available() else "cpu") with open(word_map_file, 'r') as f: word_map = json.load(f) rev_word_map = {v: k for k, v in word_map.items()} vocab_size = len(word_map) # 载入模型 encoder = Encoder() decoder = DecoderWithAttention(512, 512, 512, vocab_size) checkpoint = torch.load(checkpoint) encoder.load_state_dict(checkpoint['encoder']) decoder.load_state_dict(checkpoint['decoder']) encoder.to(device) decoder.to(device) encoder.eval() decoder.eval() preprocess = T.Compose([ T.Resize(size=(256, 256)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def get_image_caption(ori_img): img_tensor = preprocess(ori_img) img_tensor = img_tensor.unsqueeze(0) img_tensor = img_tensor.to(device)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map (w2i) word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders (This page details the preprocessing or transformation we need to perform – # pixel values must be in the range [0,1] and we must then normalize the image by the mean and standard # deviation of the ImageNet images' RGB channels.) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) initial_time = time.time() print("Initial time", initial_time) # Epochs for epoch in range(start_epoch, epochs): print("Starting epoch ", epoch) # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch, initial_time=initial_time) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map # Read word map word_map_file = os.path.join(data_folder, 'WORDMAP_' + data_name + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) # Initialize / load checkpoint if use_sam: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout, use_glove=use_glove, word_map=word_map) base_optimizer = torch.optim.SGD decoder_optimizer = SAM(filter(lambda p: p.requires_grad, decoder.parameters()), base_optimizer, lr=decoder_lr, momentum=0.9) checkpoint = torch.load(checkpoint) encoder = checkpoint['encoder'] encoder_optimizer = None print("Loading best encoder but random decoder and using SAM...") elif checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout, use_glove=use_glove, word_map=word_map) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 print(f"Continuing training from epoch {start_epoch}...") epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] if use_sam: lr = checkpoint['decoder_optimizer'].param_groups[0]['lr'] base_optimizer = torch.optim.SGD decoder_optimizer = SAM(filter(lambda p: p.requires_grad, decoder.parameters()), base_optimizer, lr=lr, momentum=0.9) else: decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] if use_sam and fine_tune_encoder is True: lr = checkpoint['encoder_optimizer'].param_groups[0]['lr'] base_optimizer = torch.optim.SGD encoder_optimizer = SAM(filter(lambda p: p.requires_grad, encoder.parameters()), base_optimizer, lr=lr, momentum=0.9) else: encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) if use_sam: base_optimizer = torch.optim.SGD encoder_optimizer = SAM(filter(lambda p: p.requires_grad, encoder.parameters()), base_optimizer, lr=encoder_lr, momentum=0.9) else: encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # initialize dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(CocoCaptionDataset( data_folder, data_name, 'TRAIN', transforms=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CocoCaptionDataset( data_folder, data_name, 'VAL', transforms=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) print(f"Train dataloader len: {len(train_loader)}") print(f"Val dataloader len: {len(val_loader)}") # set up tensorbaord train_writer = SummaryWriter( os.path.join(log_directory, f"{log_name}/train")) val_writer = SummaryWriter(os.path.join(log_directory, f"{log_name}/val")) # Epochs for epoch in tqdm(range(start_epoch, epochs)): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch, train_writer=train_writer) # One epoch's validation recent_bleu4, val_loss, val_top5_acc = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) val_writer.add_scalar('Epoch loss', val_loss, epoch + 1) val_writer.add_scalar('Epoch top-5 accuracy', val_top5_acc, epoch + 1) val_writer.add_scalar('BLEU-4', recent_bleu4, epoch + 1) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint checkpoint_name = data_name if use_glove: checkpoint_name = f"glove_{checkpoint_name}" if use_sam: checkpoint_name = f"sam_{checkpoint_name}" save_checkpoint(checkpoint_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best, checkpoint_path)