def __init__(self, data, max_train_data=2000, pca=True, hidden_dim=512, wordvec_dim=256, num_epochs=50, batch_size=50, lr=5e-3, lr_decay=0.997): np.random.seed(231) self.small_data = load_coco_data(max_train=max_train_data, pca_features=pca) self.small_lstm_model = CaptioningRNN( cell_type='lstm', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=hidden_dim, wordvec_dim=wordvec_dim, dtype=np.float32, ) self.small_lstm_solver = CaptioningSolver(self.small_lstm_model, self.small_data, update_rule='adam', num_epochs=50, batch_size=50, optim_config={ 'learning_rate': lr, }, lr_decay=lr_decay, verbose=True, print_every=10, )
def overfit_small_data(): """ Similar to the Solver class that we used to train image classification models on the previous assignment, on this assignment we use a CaptioningSolver class to train image captioning models. Open the file cs231n/captioning_solver.py and read through the CaptioningSolver class; it should look very familiar. Once you have familiarized yourself with the API, run the following to make sure your model overfits a small sample of 100 training examples. You should see a final loss of less than 0.1. """ np.random.seed(231) small_data = load_coco_data(max_train=50) small_rnn_model = CaptioningRNN( cell_type='rnn', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, ) small_rnn_solver = CaptioningSolver( small_rnn_model, small_data, update_rule='adam', num_epochs=50, batch_size=25, optim_config={ 'learning_rate': 5e-3, }, lr_decay=0.95, verbose=True, print_every=10, ) small_rnn_solver.train() # Plot the training losses plt.plot(small_rnn_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() for split in ['train', 'val']: gt_captions, features, urls = sample_coco_minibatch(small_data, split=split, batch_size=2) gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = small_rnn_model.sample(features) sample_captions = decode_captions(sample_captions, data['idx_to_word']) for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): plt.imshow(image_from_url(url)) plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption)) plt.axis('off') plt.show()
def overfit_lstm_captioning_model(): """You should see a final loss less than 0.5.""" np.random.seed(231) small_data = load_coco_data(max_train=50) small_lstm_model = CaptioningRNN( cell_type='lstm', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, dtype=np.float32, ) small_lstm_solver = CaptioningSolver( small_lstm_model, small_data, update_rule='adam', num_epochs=50, batch_size=25, optim_config={ 'learning_rate': 5e-3, }, lr_decay=0.995, verbose=True, print_every=10, ) small_lstm_solver.train() # Plot the training losses plt.plot(small_lstm_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() for split in ['train', 'val']: minibatch = sample_coco_minibatch(small_data, split=split, batch_size=2) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = small_lstm_model.sample(features) sample_captions = decode_captions(sample_captions, data['idx_to_word']) for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): plt.imshow(image_from_url(url)) plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption)) plt.axis('off') plt.show()
small_data = load_coco_data(max_train=50) small_lstm_model = CaptioningRNN( cell_type='lstm', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, dtype=np.float32, ) small_lstm_solver = CaptioningSolver(small_lstm_model, small_data, update_rule='adam', num_epochs=50, batch_size=25, optim_config={ 'learning_rate': 5e-3, }, lr_decay=0.995, verbose=True, print_every=10, ) small_lstm_solver.train() # Plot the training losses plt.plot(small_lstm_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() for split in ['train', 'val']:
small_data = load_coco_data(max_train=50) small_rnn_model = CaptioningRNN( cell_type='rnn', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, ) small_rnn_solver = CaptioningSolver(small_rnn_model, small_data, update_rule='adam', num_epochs=1, batch_size=25, optim_config={ 'learning_rate': 5e-3, }, lr_decay=0.95, verbose=True, print_every=10, ) small_rnn_solver.train() # Plot the training losses plt.plot(small_rnn_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show()
small_data = load_coco_data(max_train=50) small_lstm_model = CaptioningRNN( cell_type='lstm', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, dtype=np.float32, ) small_lstm_solver = CaptioningSolver(small_lstm_model, small_data, update_rule='adam', num_epochs=50, batch_size=25, optim_config={ 'learning_rate': 5e-3, }, lr_decay=0.995, verbose=True, print_every=10, ) small_lstm_solver.train() # Plot the training losses plt.plot(small_lstm_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() # generate caption