def train(): mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] transform = transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize(mean, std) ]) data_location = './flickr8k' train_loader, dataset = get_loader( root_folder=data_location + "/Images", annotation_file=data_location + "/captions.txt", transform=transform, num_workers=4, ) torch.backends.cudnn.benchmark = True # Get some boost probaby # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = 'cpu' load_model = False save_model = False train_CNN = False #Hyperparameters embed_size = 256 hidden_size = 256 vocab_size = len(dataset.vocab) num_layers = 2 learning_rate = 3e-4 num_epochs = 20 step = 0 # init model, loss model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device) criterion = nn.CrossEntropyLoss(ignore_index=dataset.vocab.stoi["<PAD>"]) optimizer = optim.Adam(model.parameters(), lr=learning_rate) if load_model: step = load_checkpoint( torch.load("../input/checkpoint2-epoch20/my_checkpoint2.pth.tar", map_location='cpu'), model, optimizer) model.train() wanna_print = 100 for epoch in range(num_epochs): if save_model: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "step": step } save_checkpoint(checkpoint) for idx, (imgs, captions) in enumerate(train_loader): imgs = imgs.to(device) captions = captions.to(device) # Don't pass the <EOS> outputs = model(imgs, captions[:-1]) # loss accepts only 2 dimension # seq_len, N, vocabulary_size --> (seq_len, N) Each time as its own example print("Outputs shape ", outputs.shape) loss = criterion(outputs.reshape(-1, outputs.shape[2]), captions.reshape(-1)) print("Step", idx, loss.item()) step += 1 optimizer.zero_grad() loss.backward(loss) optimizer.step() if (idx + 1) % wanna_print == 0: print("Epoch: {} loss: {:.5f}".format(epoch, loss.item())) #generate the caption model.eval() with torch.no_grad(): dataiter = iter(train_loader) img, _ = next(dataiter) print(img[0].shape) caps = model.caption_image(img[0:1].to(device), vocabulary=dataset.vocab) caption = ' '.join(caps) show_image(img[0], title=caption) model.train()
vocab_size = len(vocab) num_layers = 2 learning_rate = 3e-4 print(len(vocab)) model_path = './weights/my_checkpoint2.pth.tar' model = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to(device) criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi["<PAD>"]) optimizer = optim.Adam(model.parameters(), lr=learning_rate) if load_model: step = load_checkpoint(torch.load(model_path, map_location='cpu'), model, optimizer) model.eval() # image_path = 'flickr8k/Images/54501196_a9ac9d66f2.jpg' image_path = './test_examples/boat.jpg' img = PIL.Image.open(image_path).convert("RGB") img_t = transform(img) caps = model.caption_image(img_t.unsqueeze(0), vocab) # print(caps) caps = caps[1:-1] caption = ' '.join(caps) show_image2(img_t, 0, caption)
MODEL_PATH = os.getenv('MODEL_PATH') #"my_checkpoint.pth.tar" MODEL_URL = os.getenv( 'MODEL_URL' ) #"https://vonage-models.s3.amazonaws.com/my_checkpoint.pth.tar" if not path.exists(MODEL_PATH): print("downloading model....") r = requests.get(MODEL_URL) open(MODEL_PATH, 'wb').write(r.content) print('done!\nloading up the saved model weights...') myModel = CNNtoRNN(embed_size, hidden_size, vocab_size, num_layers).to("cpu") myModel.load_state_dict( torch.load(MODEL_PATH, map_location=torch.device('cpu'))['state_dict']) myModel.eval() app = Flask(__name__) UPLOAD_FOLDER = os.path.dirname(os.path.abspath(__file__)) + '/uploads/' ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg', 'gif']) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
def train_with_epoch(start_epoch): file_path_cap = os.path.join(Constants.data_folder_ann, Constants.captions_train_file) file_path_inst = os.path.join(Constants.data_folder_ann, Constants.instances_train_file) coco_dataloader_train, coco_data_train = get_dataloader( file_path_cap, file_path_inst, "train") file_path_cap = os.path.join(Constants.data_folder_ann, Constants.captions_val_file) file_path_inst = os.path.join(Constants.data_folder_ann, Constants.instances_val_file) coco_dataloader_val, coco_data_val = get_dataloader( file_path_cap, file_path_inst, "val") step = 0 best_bleu4 = 0 epochs_since_improvement = 0 # initilze model, loss, etc model = CNNtoRNN(coco_data_train.vocab) model = model.to(Constants.device) criterion = nn.CrossEntropyLoss( ignore_index=coco_data_train.vocab.stoi[Constants.PAD]) optimizer = optim.Adam(model.parameters(), lr=Hyper.learning_rate) ##################################################################### # Load model file here ## step = load_checkpoint_epoch(model, optimizer, start_epoch) model.eval() # Set model to validation mode recent_bleu4 = validate(val_loader=coco_dataloader_val, model=model, criterion=criterion) if start_epoch >= Hyper.total_epochs: return # Validated the last epoch for i in range(start_epoch, Hyper.total_epochs): model.train() # Set model to training mode model.decoderRNN.train() model.encoderCNN.train() epoch = i + 1 print(f"Epoch: {epoch}") if Constants.save_model: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "step": step, } save_checkpoint(checkpoint) for _, (imgs, captions) in tqdm(enumerate(coco_dataloader_train), total=len(coco_dataloader_train), leave=False): imgs = imgs.to(Constants.device) captions = captions.to(Constants.device) outputs = model(imgs, captions[:-1]) vocab_size = outputs.shape[2] outputs1 = outputs.reshape(-1, vocab_size) captions1 = captions.reshape(-1) loss = criterion(outputs1, captions1) optimizer.zero_grad() loss.backward(loss) optimizer.step() save_checkpoint_epoch(checkpoint, epoch) # One epoch's validation recent_bleu4 = validate(val_loader=coco_dataloader_val, model=model, criterion=criterion) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0