def train():

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    data_location = './flickr8k'
    train_loader, dataset = get_loader(
        root_folder=data_location + "/Images",
        annotation_file=data_location + "/captions.txt",
        transform=transform,
        num_workers=4,
    )
    torch.backends.cudnn.benchmark = True  # Get some boost probaby
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = 'cpu'
    load_model = False
    save_model = False
    train_CNN = False
    #Hyperparameters
    embed_size = 256
    hidden_size = 256
    vocab_size = len(dataset.vocab)
    num_layers = 2
    learning_rate = 3e-4
    num_epochs = 20

    step = 0
    # init model, loss
    model = CNNtoRNN(embed_size, hidden_size, vocab_size,
                     num_layers).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_model:
        step = load_checkpoint(
            torch.load("../input/checkpoint2-epoch20/my_checkpoint2.pth.tar",
                       map_location='cpu'), model, optimizer)

    model.train()
    wanna_print = 100

    for epoch in range(num_epochs):

        if save_model:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "step": step
            }
            save_checkpoint(checkpoint)

        for idx, (imgs, captions) in enumerate(train_loader):

            imgs = imgs.to(device)
            captions = captions.to(device)

            # Don't pass the <EOS>
            outputs = model(imgs, captions[:-1])

            # loss accepts only 2 dimension
            # seq_len, N, vocabulary_size --> (seq_len, N) Each time as its own example

            print("Outputs shape ", outputs.shape)

            loss = criterion(outputs.reshape(-1, outputs.shape[2]),
                             captions.reshape(-1))

            print("Step", idx, loss.item())

            step += 1

            optimizer.zero_grad()

            loss.backward(loss)

            optimizer.step()

            if (idx + 1) % wanna_print == 0:
                print("Epoch: {} loss: {:.5f}".format(epoch, loss.item()))

                #generate the caption
                model.eval()
                with torch.no_grad():
                    dataiter = iter(train_loader)
                    img, _ = next(dataiter)
                    print(img[0].shape)
                    caps = model.caption_image(img[0:1].to(device),
                                               vocabulary=dataset.vocab)
                    caption = ' '.join(caps)
                    show_image(img[0], title=caption)
                model.train()
vocab_size = len(vocab)
num_layers = 2
learning_rate = 3e-4
print(len(vocab))

model_path = './weights/my_checkpoint2.pth.tar'

model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi["<PAD>"])
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
if load_model:
    step = load_checkpoint(torch.load(model_path, map_location='cpu'), model,
                           optimizer)

model.eval()

# image_path = 'flickr8k/Images/54501196_a9ac9d66f2.jpg'
image_path = './test_examples/boat.jpg'

img = PIL.Image.open(image_path).convert("RGB")

img_t = transform(img)

caps = model.caption_image(img_t.unsqueeze(0), vocab)
# print(caps)
caps = caps[1:-1]

caption = ' '.join(caps)

show_image2(img_t, 0, caption)
Example #3
0
MODEL_PATH = os.getenv('MODEL_PATH')  #"my_checkpoint.pth.tar"
MODEL_URL = os.getenv(
    'MODEL_URL'
)  #"https://vonage-models.s3.amazonaws.com/my_checkpoint.pth.tar"

if not path.exists(MODEL_PATH):
    print("downloading model....")
    r = requests.get(MODEL_URL)
    open(MODEL_PATH, 'wb').write(r.content)

print('done!\nloading up the saved model weights...')

myModel = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to("cpu")
myModel.load_state_dict(
    torch.load(MODEL_PATH, map_location=torch.device('cpu'))['state_dict'])
myModel.eval()

app = Flask(__name__)

UPLOAD_FOLDER = os.path.dirname(os.path.abspath(__file__)) + '/uploads/'

ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg', 'gif'])

app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS

Example #4
0
def train_with_epoch(start_epoch):
    file_path_cap = os.path.join(Constants.data_folder_ann,
                                 Constants.captions_train_file)
    file_path_inst = os.path.join(Constants.data_folder_ann,
                                  Constants.instances_train_file)
    coco_dataloader_train, coco_data_train = get_dataloader(
        file_path_cap, file_path_inst, "train")
    file_path_cap = os.path.join(Constants.data_folder_ann,
                                 Constants.captions_val_file)
    file_path_inst = os.path.join(Constants.data_folder_ann,
                                  Constants.instances_val_file)
    coco_dataloader_val, coco_data_val = get_dataloader(
        file_path_cap, file_path_inst, "val")
    step = 0
    best_bleu4 = 0
    epochs_since_improvement = 0
    # initilze model, loss, etc
    model = CNNtoRNN(coco_data_train.vocab)
    model = model.to(Constants.device)
    criterion = nn.CrossEntropyLoss(
        ignore_index=coco_data_train.vocab.stoi[Constants.PAD])
    optimizer = optim.Adam(model.parameters(), lr=Hyper.learning_rate)
    #####################################################################
    # Load model file here
    ##
    step = load_checkpoint_epoch(model, optimizer, start_epoch)

    model.eval()  # Set model to validation mode
    recent_bleu4 = validate(val_loader=coco_dataloader_val,
                            model=model,
                            criterion=criterion)

    if start_epoch >= Hyper.total_epochs:
        return  # Validated the last epoch

    for i in range(start_epoch, Hyper.total_epochs):
        model.train()  # Set model to training mode
        model.decoderRNN.train()
        model.encoderCNN.train()
        epoch = i + 1
        print(f"Epoch: {epoch}")
        if Constants.save_model:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "step": step,
            }
            save_checkpoint(checkpoint)

        for _, (imgs, captions) in tqdm(enumerate(coco_dataloader_train),
                                        total=len(coco_dataloader_train),
                                        leave=False):
            imgs = imgs.to(Constants.device)
            captions = captions.to(Constants.device)
            outputs = model(imgs, captions[:-1])
            vocab_size = outputs.shape[2]
            outputs1 = outputs.reshape(-1, vocab_size)
            captions1 = captions.reshape(-1)
            loss = criterion(outputs1, captions1)
            optimizer.zero_grad()
            loss.backward(loss)
            optimizer.step()

        save_checkpoint_epoch(checkpoint, epoch)
        # One epoch's validation
        recent_bleu4 = validate(val_loader=coco_dataloader_val,
                                model=model,
                                criterion=criterion)
        # Check if there was an improvement
        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0