def train(model, num_epochs=10, lr=0.0003, print_every=100): """Train a model on IWSLT""" if params['USE_CUDA']: model.cuda() # optionally add label smoothing; see the Annotated Transformer criterion = nn.NLLLoss(reduction="sum", ignore_index=PAD_INDEX) optim = torch.optim.Adam(model.parameters(), lr=lr) dev_perplexities = [] for epoch in range(num_epochs): print("Epoch", epoch) model.train() train_perplexity = run_epoch( (rebatch(PAD_INDEX, b) for b in train_iter), model, SimpleLossCompute(model.generator, criterion, optim), print_every=print_every) model.eval() with torch.no_grad(): print_examples((rebatch(PAD_INDEX, x) for x in valid_iter), model, n=3, src_vocab=SRC.vocab, trg_vocab=TRG.vocab) dev_perplexity = run_epoch( (rebatch(PAD_INDEX, b) for b in valid_iter), model, SimpleLossCompute(model.generator, criterion, None)) print("Validation perplexity: %f" % dev_perplexity) dev_perplexities.append(dev_perplexity) return dev_perplexities
def train(): transform = transforms.Compose([ transforms.Resize((240, 240)), transforms.RandomCrop( (224, 224)), #the input size of inception network transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) train_loader, dataset = get_loader(root_folder="archive/Images", annotation_file="archive/captions.txt", transform=transform, batch_size=128, num_workers=0) #Set some hyperparamters torch.backends.cudnn.benchmark = True #Speed up the training process device = torch.device("cuda" if torch.cuda.is_available() else 'cpu') load_model = False save_model = False train_CNN = False embed_size = 256 hidden_size = 256 vocab_size = len(dataset.vocab) num_layers = 1 learning_rate = 3e-4 num_epochs = 100 #for tensorboard writer = SummaryWriter("runs/flickr") step = 0 #initialize model, loss etc model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device) # Only finetune the CNN for name, param in model.EncoderCNN.inception.named_parameters(): if "fc.weight" in name or "fc.bias" in name: param.requires_grad = True else: param.requires_grad = train_CNN if load_model: step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer) criterion = nn.CrossEntropyLoss( ignore_index=dataset.vocab.stoi["<PAD>"]) #对于"<PAD>"的词语不需要计算损失 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 140]) model.train() print('Begins') imgs, captions = next(iter(train_loader)) for epoch in range(num_epochs): print_examples(model, device, dataset, save_path='result.txt') if save_model: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "step": step } save_checkpoint(checkpoint) # loop = tqdm(enumerate(train_loader),total=len(train_loader),leave=False) total_loss = 0 # for idx, (imgs,captions) in loop: imgs = imgs.to(device) captions = captions.to(device) outputs = model(imgs, captions[:-1]) #EOS标志不需要送进网络训练,我们希望他能自己训练出来 # outputs :(seq_len, batch_size, vocabulary_size), 但是交叉熵损失接受二维的tensor loss = criterion(outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)) step += 1 optimizer.zero_grad() loss.backward(loss) total_loss += loss.item() optimizer.step() print(total_loss)
def train(): transform = transforms.Compose([ transforms.Resize((356, 356)), transforms.RandomCrop((299, 299)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train_loader, dataset = get_loader(root_folder='flickr8k/images/', annotation_file='flickr8k/captions.txt', transform=transform, num_workers=2) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') load_model = False save_model = True embed_size = 256 hidden_size = 256 vocab_size = len(dataset.vocab) num_layers = 1 learning_rate = 3e-4 num_epochs = 100 writer = SummaryWriter('logs/flickr') step = 0 model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device) criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi['<PAD>']) optimizer = optim.Adam(model.parameters(), lr=learning_rate) if load_model: step = load_checkpoint(torch.load('my_ckpt.pth.tar'), model, optimizer) model.train() for epoch in range(num_epochs): print_examples(model, device, epoch) if save_model: checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'step': step, } save_checkpoint(checkpoint) for idx, (imgs, captions) in enumerate(train_loader): imgs = imgs.to(device) captions = captions.to(device) outputs = model(imgs, captions)[:-1] loss = criterion(outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)) writer.add_scalar('loss', loss.item(), global_step=step) step += 1 optimizer.zero_grad() loss.backward() optimizer.step()
def train(): transform = transforms.Compose( [ transforms.Resize((356, 356)), transforms.RandomCrop((299, 299)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ] ) train_loader, dataset = get_loader( root_folder="flickr8k/images", annotation_file="flickr8k/captions.txt", transform=transform, num_workers=2, ) torch.backends.cudnn.benchmark = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") load_model = False save_model = False train_CNN = False embed_size = 256 hidden_size = 256 vocab_size = len(dataset.vocab) num_layers = 1 learning_rate = 3e-4 num_epochs = 100 writer = SummaryWriter("runs/flickr") step = 0 model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device) criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"]) optimizer = optim.Adam(model.parameters(), lr = learning_rate) for name, param in model.encoderCNN.inception.named_parameters(): if "fc.weight" in name or "fc.bias" in name: param.requires_grad = True else: param.requires_grad = train_CNN if load_model: step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer) model.train() for epoch in range(num_epochs): print_examples(model, device, dataset) if save_model: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "step": step, } save_checkpoint(checkpoint) for idx, (imgs, captions) in tqdm( enumerate(train_loader), total=len(train_loader), leave=False ): imgs = imgs.to(device) captions = captions.to(device) outputs = model(imgs, captions[:-1]) loss = criterion(outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)) writer.add_scalar("Training Loss", loss.item(), global_step= step) step+= 1 optimizer.zero_grad() loss.backward(loss) optimizer.step()
for i in valid_losses: f.write("%s\n" % i) with open("result/valid_prec.txt", "w") as f: for i in valid_prec_score: f.write("%s\n" % i) with open("result/valid_recall.txt", "w") as f: for i in valid_recall_score: f.write("%s\n" % i) with open("result/valid_f1.txt", "w") as f: for i in valid_f1_score: f.write("%s\n" % i) print("Finish training ... ") print_examples((rebatch(x, SRC_PAD_INDEX, TRG_PAD_INDEX, TRG_UNK_INDEX, len(TRG.vocab), SRC.vocab) for x in test_iter), model, num=params['TEST_EXAMPLE_TO_PRINT'], src_vocab=SRC.vocab, trg_vocab=TRG.vocab) ######################## print("Start testing ... ") test_f1_score, test_prec_score, test_recall_score = run_test( (rebatch(x, SRC_PAD_INDEX, TRG_PAD_INDEX, TRG_UNK_INDEX, len( TRG.vocab), SRC.vocab) for x in test_iter), model, trg_vocab=TRG.vocab) print("test precision score: ", test_prec_score) print("test recall score: ", test_recall_score) print("test f1 score: ", test_f1_score)
def train(): transform = transforms.Compose([ transforms.Resize((356, 356)), transforms.RandomCrop((299, 299)), # CNN takes input 299 x 299 transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) train_loader, dataset = get_loader( root_folder='flickr8k/images', annotation_file='flickr8k/captions.txt', transform=transform, num_workers=2, ) # model configuration torch.backends.cudnn.benchmark = True device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') load_model = False save_model = False train_CNN = False # Hyperparameters ## We can increase capacity embed_size = 256 hidden_size = 256 vocab_size = len(dataset.vocab) num_layers = 1 laerning_rate = 3e-4 num_epochs = 100 # for tensorboard writer = SummaryWriter('runs/flickr') step = 0 # initialize model, loss etc model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device) criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"]) optimizer = optim.Adam(model.parameters(), lr=learning_rate) if load_model: step = load_checkpoint( torch.load('my_checkpoint.pth.tar'), model, optimizer ) # we're returning step here so that the loss fucntions continues where it ended model.train() for epoch in range(num_epochs): print_examples(model, device, dataset) if save_model: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "step": step, } save_checkpoint(checkpoint) for idx, (imgs, captions) in enumerate(train_loader): imgs = imgs.to(device) captions = captions.to(device) outputs = model( imgs, captions[:-1] ) # we actually learn to predict the end token so we're not going to send in the end token loss = criterion( outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1) ) #predicting for each example we're predicting for a bunch of different time steps # example , 20 words that it's predicting and then each word has its logit corresponding to each word in the vocabulary right here. ## so we have three dimensions here , but the criterion only 2 dimension ### output -> (seq_len, N, vocabulary_size) , target -> (seq_len , N) writer.add_scalar("Training loss", loss.item(), global_step=step) step += 1 optimizer.zero_grad() loss.backward(loss) optimizer.step()
model = make_model(SRC.vocab, output_class=3, embed_size=params['EMBEDDING_DIM'], hidden_size=params['HIDDEN_SIZE'], num_layers=params['NUM_LAYERS'], dropout=params['DROPOUT_PROB']) model.apply(init_weight) ############################# print("Start training ... ") # print(SRC.vocab.itos[1], SRC.vocab.itos[2], SRC.vocab.itos[3], SRC.vocab.itos[4]) train_losses, valid_losses, train_ac, valid_ac = train(model, num_epochs=params['NUM_EPOCHS'], learning_rate=params['LEARNING_RATE']) print("Start test ... ") print_examples((rebatch(PAD_INDEX, b) for b in test_iter), model, n=5, src_vocab=SRC.vocab) # sos_idx=SOS_INDEX, eos_idx=EOS_INDEX, print() test_accuracy = run_test((rebatch(PAD_INDEX, b) for b in test_iter), model) print('Test accuracy: ', test_accuracy)
def train(): transform = transforms.Compose([ transforms.Resize((356, 356)), transforms.RandomCrop((299, 299)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) train_loader, dataset = get_loader( root_folder= "/mnt/liguanlin/DataSets/ImageCaptionDatasets/flickr8k/images", annotation_file= "/mnt/liguanlin/DataSets/ImageCaptionDatasets/flickr8k/captions.txt", transform=transform, num_workers=2, ) torch.backends.cudnn.benchmark = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") load_model = False save_model = True #Hyperparameters embed_size = 256 hidden_size = 256 vocab_size = len(dataset.vocab) num_layers = 1 learning_rate = 3e-4 num_epochs = 100 #for tensorboard writer = SummaryWriter("runs/flickr") step = 0 #initialize model, loss etc model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device) criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"]) optimizer = optim.Adam(model.parameters(), lr=learning_rate) if load_model: step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer) model.train() for epoch in range(num_epochs): print_examples(model, device, dataset) if save_model: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "step": step, } save_checkpoint(checkpoint) for idx, (imgs, captions) in enumerate(train_loader): imgs = imgs.to(device) captions = captions.to(device) outputs = model(imgs, captions[:-1]) loss = criterion(outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)) #record loss writer.add_scalar("Training loss", loss.item(), global_step=step) step += 1 optimizer.zero_grad() loss.backward() optimizer.step()