Ejemplo n.º 1
0
def train(model, num_epochs=10, lr=0.0003, print_every=100):
    """Train a model on IWSLT"""

    if params['USE_CUDA']:
        model.cuda()

    # optionally add label smoothing; see the Annotated Transformer
    criterion = nn.NLLLoss(reduction="sum", ignore_index=PAD_INDEX)
    optim = torch.optim.Adam(model.parameters(), lr=lr)

    dev_perplexities = []

    for epoch in range(num_epochs):

        print("Epoch", epoch)
        model.train()
        train_perplexity = run_epoch(
            (rebatch(PAD_INDEX, b) for b in train_iter),
            model,
            SimpleLossCompute(model.generator, criterion, optim),
            print_every=print_every)

        model.eval()
        with torch.no_grad():
            print_examples((rebatch(PAD_INDEX, x) for x in valid_iter),
                           model,
                           n=3,
                           src_vocab=SRC.vocab,
                           trg_vocab=TRG.vocab)

            dev_perplexity = run_epoch(
                (rebatch(PAD_INDEX, b) for b in valid_iter), model,
                SimpleLossCompute(model.generator, criterion, None))
            print("Validation perplexity: %f" % dev_perplexity)
            dev_perplexities.append(dev_perplexity)

    return dev_perplexities
Ejemplo n.º 2
0
def train():
    transform = transforms.Compose([
        transforms.Resize((240, 240)),
        transforms.RandomCrop(
            (224, 224)),  #the input size of inception network
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])
    train_loader, dataset = get_loader(root_folder="archive/Images",
                                       annotation_file="archive/captions.txt",
                                       transform=transform,
                                       batch_size=128,
                                       num_workers=0)
    #Set some hyperparamters
    torch.backends.cudnn.benchmark = True  #Speed up the training process
    device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
    load_model = False
    save_model = False
    train_CNN = False
    embed_size = 256
    hidden_size = 256
    vocab_size = len(dataset.vocab)
    num_layers = 1
    learning_rate = 3e-4
    num_epochs = 100
    #for tensorboard
    writer = SummaryWriter("runs/flickr")
    step = 0
    #initialize model, loss etc
    model = CNNtoRNN(embed_size, hidden_size, vocab_size,
                     num_layers).to(device)

    # Only finetune the CNN
    for name, param in model.EncoderCNN.inception.named_parameters():
        if "fc.weight" in name or "fc.bias" in name:
            param.requires_grad = True
        else:
            param.requires_grad = train_CNN

    if load_model:
        step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model,
                               optimizer)

    criterion = nn.CrossEntropyLoss(
        ignore_index=dataset.vocab.stoi["<PAD>"])  #对于"<PAD>"的词语不需要计算损失
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=learning_rate)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[60, 120, 140])
    model.train()
    print('Begins')
    imgs, captions = next(iter(train_loader))
    for epoch in range(num_epochs):
        print_examples(model, device, dataset, save_path='result.txt')
        if save_model:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "step": step
            }
            save_checkpoint(checkpoint)
        # loop = tqdm(enumerate(train_loader),total=len(train_loader),leave=False)
        total_loss = 0
        # for idx, (imgs,captions) in loop:
        imgs = imgs.to(device)
        captions = captions.to(device)

        outputs = model(imgs, captions[:-1])  #EOS标志不需要送进网络训练,我们希望他能自己训练出来
        # outputs :(seq_len, batch_size, vocabulary_size), 但是交叉熵损失接受二维的tensor
        loss = criterion(outputs.reshape(-1, outputs.shape[2]),
                         captions.reshape(-1))
        step += 1
        optimizer.zero_grad()
        loss.backward(loss)
        total_loss += loss.item()
        optimizer.step()
        print(total_loss)
Ejemplo n.º 3
0
def train():
    transform = transforms.Compose([
        transforms.Resize((356, 356)),
        transforms.RandomCrop((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_loader, dataset = get_loader(root_folder='flickr8k/images/',
                                       annotation_file='flickr8k/captions.txt',
                                       transform=transform,
                                       num_workers=2)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    load_model = False
    save_model = True

    embed_size = 256
    hidden_size = 256
    vocab_size = len(dataset.vocab)
    num_layers = 1
    learning_rate = 3e-4
    num_epochs = 100

    writer = SummaryWriter('logs/flickr')
    step = 0

    model = CNNtoRNN(embed_size, hidden_size, vocab_size,
                     num_layers).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi['<PAD>'])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_model:
        step = load_checkpoint(torch.load('my_ckpt.pth.tar'), model, optimizer)

    model.train()

    for epoch in range(num_epochs):
        print_examples(model, device, epoch)
        if save_model:
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'step': step,
            }
            save_checkpoint(checkpoint)

        for idx, (imgs, captions) in enumerate(train_loader):
            imgs = imgs.to(device)
            captions = captions.to(device)

            outputs = model(imgs, captions)[:-1]

            loss = criterion(outputs.reshape(-1, outputs.shape[2]),
                             captions.reshape(-1))

            writer.add_scalar('loss', loss.item(), global_step=step)
            step += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
def train():
    transform = transforms.Compose(
        [
            transforms.Resize((356, 356)),
            transforms.RandomCrop((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]
    )

    train_loader, dataset = get_loader(
        root_folder="flickr8k/images",
        annotation_file="flickr8k/captions.txt",
        transform=transform,
        num_workers=2,
    )   

    torch.backends.cudnn.benchmark = True

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    load_model = False
    save_model = False
    train_CNN = False


    embed_size = 256
    hidden_size = 256
    vocab_size = len(dataset.vocab)
    num_layers = 1
    learning_rate = 3e-4
    num_epochs = 100
    

    writer = SummaryWriter("runs/flickr")
    step = 0


    model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"])
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)

    for name, param in model.encoderCNN.inception.named_parameters():
        if "fc.weight" in name or "fc.bias" in name:
            param.requires_grad = True
        else:
            param.requires_grad = train_CNN
    if load_model:
        step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)

    model.train()

    for epoch in range(num_epochs):
        print_examples(model, device, dataset)
        if save_model:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "step": step,
            }
            save_checkpoint(checkpoint)


        for idx, (imgs, captions) in tqdm(
            enumerate(train_loader), total=len(train_loader), leave=False
        ):            
            imgs = imgs.to(device)
            captions = captions.to(device)

            outputs = model(imgs, captions[:-1])
            loss = criterion(outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1))

            
            writer.add_scalar("Training Loss", loss.item(), global_step= step)

            step+= 1

            optimizer.zero_grad()
            loss.backward(loss)
            optimizer.step()
Ejemplo n.º 5
0
        for i in valid_losses:
            f.write("%s\n" % i)
    with open("result/valid_prec.txt", "w") as f:
        for i in valid_prec_score:
            f.write("%s\n" % i)
    with open("result/valid_recall.txt", "w") as f:
        for i in valid_recall_score:
            f.write("%s\n" % i)
    with open("result/valid_f1.txt", "w") as f:
        for i in valid_f1_score:
            f.write("%s\n" % i)

    print("Finish training ... ")
    print_examples((rebatch(x, SRC_PAD_INDEX, TRG_PAD_INDEX, TRG_UNK_INDEX,
                            len(TRG.vocab), SRC.vocab) for x in test_iter),
                   model,
                   num=params['TEST_EXAMPLE_TO_PRINT'],
                   src_vocab=SRC.vocab,
                   trg_vocab=TRG.vocab)

    ########################
    print("Start testing ... ")
    test_f1_score, test_prec_score, test_recall_score = run_test(
        (rebatch(x, SRC_PAD_INDEX, TRG_PAD_INDEX, TRG_UNK_INDEX, len(
            TRG.vocab), SRC.vocab) for x in test_iter),
        model,
        trg_vocab=TRG.vocab)

    print("test precision score: ", test_prec_score)
    print("test recall score: ", test_recall_score)
    print("test f1 score: ", test_f1_score)
Ejemplo n.º 6
0
def train():
    transform = transforms.Compose([
        transforms.Resize((356, 356)),
        transforms.RandomCrop((299, 299)),  # CNN takes input 299 x 299
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    train_loader, dataset = get_loader(
        root_folder='flickr8k/images',
        annotation_file='flickr8k/captions.txt',
        transform=transform,
        num_workers=2,
    )

    # model configuration
    torch.backends.cudnn.benchmark = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    load_model = False
    save_model = False
    train_CNN = False

    # Hyperparameters
    ## We can increase capacity
    embed_size = 256
    hidden_size = 256
    vocab_size = len(dataset.vocab)
    num_layers = 1
    laerning_rate = 3e-4
    num_epochs = 100

    # for tensorboard
    writer = SummaryWriter('runs/flickr')
    step = 0

    # initialize model, loss etc
    model = CNNtoRNN(embed_size, hidden_size, vocab_size,
                     num_layers).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_model:
        step = load_checkpoint(
            torch.load('my_checkpoint.pth.tar'), model, optimizer
        )  # we're returning step here so that the loss fucntions continues where it ended

    model.train()

    for epoch in range(num_epochs):
        print_examples(model, device, dataset)
        if save_model:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "step": step,
            }
            save_checkpoint(checkpoint)

        for idx, (imgs, captions) in enumerate(train_loader):
            imgs = imgs.to(device)
            captions = captions.to(device)

            outputs = model(
                imgs, captions[:-1]
            )  # we actually learn to predict the end token so we're not going to send in the end token
            loss = criterion(
                outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)
            )  #predicting for each example we're predicting for a bunch of different time steps
            # example , 20 words that it's predicting and then each word has its logit corresponding to each word in the vocabulary right here.
            ## so we have three dimensions here , but the criterion only 2 dimension
            ### output -> (seq_len, N, vocabulary_size) , target -> (seq_len , N)

            writer.add_scalar("Training loss", loss.item(), global_step=step)
            step += 1

            optimizer.zero_grad()
            loss.backward(loss)
            optimizer.step()
Ejemplo n.º 7
0
	model = make_model(SRC.vocab, output_class=3, embed_size=params['EMBEDDING_DIM'], 
		hidden_size=params['HIDDEN_SIZE'], num_layers=params['NUM_LAYERS'], 
		dropout=params['DROPOUT_PROB'])

	model.apply(init_weight)
	#############################

	print("Start training ... ")
	# print(SRC.vocab.itos[1], SRC.vocab.itos[2], SRC.vocab.itos[3], SRC.vocab.itos[4])

	train_losses, valid_losses, train_ac, valid_ac = train(model, num_epochs=params['NUM_EPOCHS'], learning_rate=params['LEARNING_RATE'])

	print("Start test ... ")

	print_examples((rebatch(PAD_INDEX, b) for b in test_iter), model, n=5, src_vocab=SRC.vocab)
	# sos_idx=SOS_INDEX, eos_idx=EOS_INDEX, 
	print()

	test_accuracy = run_test((rebatch(PAD_INDEX, b) for b in test_iter), model)

	print('Test accuracy: ', test_accuracy)
		







Ejemplo n.º 8
0
def train():
    transform = transforms.Compose([
        transforms.Resize((356, 356)),
        transforms.RandomCrop((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    train_loader, dataset = get_loader(
        root_folder=
        "/mnt/liguanlin/DataSets/ImageCaptionDatasets/flickr8k/images",
        annotation_file=
        "/mnt/liguanlin/DataSets/ImageCaptionDatasets/flickr8k/captions.txt",
        transform=transform,
        num_workers=2,
    )

    torch.backends.cudnn.benchmark = True
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    load_model = False
    save_model = True

    #Hyperparameters
    embed_size = 256
    hidden_size = 256
    vocab_size = len(dataset.vocab)
    num_layers = 1
    learning_rate = 3e-4
    num_epochs = 100

    #for tensorboard
    writer = SummaryWriter("runs/flickr")
    step = 0

    #initialize model, loss etc
    model = CNNtoRNN(embed_size, hidden_size, vocab_size,
                     num_layers).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_model:
        step = load_checkpoint(torch.load("my_checkpoint.pth.tar"), model,
                               optimizer)

    model.train()

    for epoch in range(num_epochs):
        print_examples(model, device, dataset)
        if save_model:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "step": step,
            }
            save_checkpoint(checkpoint)

        for idx, (imgs, captions) in enumerate(train_loader):
            imgs = imgs.to(device)
            captions = captions.to(device)

            outputs = model(imgs, captions[:-1])
            loss = criterion(outputs.reshape(-1, outputs.shape[2]),
                             captions.reshape(-1))

            #record loss
            writer.add_scalar("Training loss", loss.item(), global_step=step)
            step += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()