コード例 #1
0
class MyCaptioningModel(object):
    
    def __init__(self, data, max_train_data=2000, pca=True, hidden_dim=512, wordvec_dim=256, num_epochs=50, batch_size=50, lr=5e-3, lr_decay=0.997):
        
        np.random.seed(231)

        self.small_data = load_coco_data(max_train=max_train_data, pca_features=pca)

        self.small_lstm_model = CaptioningRNN(
                  cell_type='lstm',
                  word_to_idx=data['word_to_idx'],
                  input_dim=data['train_features'].shape[1],
                  hidden_dim=hidden_dim,
                  wordvec_dim=wordvec_dim,
                  dtype=np.float32,
                )
        
        self.small_lstm_solver = CaptioningSolver(self.small_lstm_model, self.small_data,
                   update_rule='adam',
                   num_epochs=50,
                   batch_size=50,
                   optim_config={
                     'learning_rate': lr,
                   },
                   lr_decay=lr_decay,
                   verbose=True, print_every=10,
                 )
        
    def train(self):
        self.small_lstm_solver.train()
コード例 #2
0
def overfit_small_data():
    """
    Similar to the Solver class that we used to train image classification models on the
    previous assignment, on this assignment we use a CaptioningSolver class to train
    image captioning models. Open the file cs231n/captioning_solver.py and read through
    the CaptioningSolver class; it should look very familiar.

    Once you have familiarized yourself with the API, run the following to make sure your
    model overfits a small sample of 100 training examples. You should see a final loss
    of less than 0.1.
    """
    np.random.seed(231)

    small_data = load_coco_data(max_train=50)

    small_rnn_model = CaptioningRNN(
        cell_type='rnn',
        word_to_idx=data['word_to_idx'],
        input_dim=data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
    )
    small_rnn_solver = CaptioningSolver(
        small_rnn_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.95,
        verbose=True,
        print_every=10,
    )
    small_rnn_solver.train()

    # Plot the training losses
    plt.plot(small_rnn_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        gt_captions, features, urls = sample_coco_minibatch(small_data,
                                                            split=split,
                                                            batch_size=2)
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = small_rnn_model.sample(features)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
コード例 #3
0
def overfit_lstm_captioning_model():
    """You should see a final loss less than 0.5."""
    np.random.seed(231)

    small_data = load_coco_data(max_train=50)

    small_lstm_model = CaptioningRNN(
        cell_type='lstm',
        word_to_idx=data['word_to_idx'],
        input_dim=data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
        dtype=np.float32,
    )

    small_lstm_solver = CaptioningSolver(
        small_lstm_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.995,
        verbose=True,
        print_every=10,
    )

    small_lstm_solver.train()

    # Plot the training losses
    plt.plot(small_lstm_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(small_data,
                                          split=split,
                                          batch_size=2)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = small_lstm_model.sample(features)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
コード例 #4
0
          wordvec_dim=256,
          dtype=np.float32,
        )

small_lstm_solver = CaptioningSolver(small_lstm_model, small_data,
           update_rule='adam',
           num_epochs=50,
           batch_size=25,
           optim_config={
             'learning_rate': 5e-3,
           },
           lr_decay=0.995,
           verbose=True, print_every=10,
         )

small_lstm_solver.train()

# Plot the training losses
plt.plot(small_lstm_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()

for split in ['train', 'val']:
  minibatch = sample_coco_minibatch(small_data, split=split, batch_size=2)
  gt_captions, features, urls = minibatch
  gt_captions = decode_captions(gt_captions, data['idx_to_word'])

  sample_captions = small_lstm_model.sample(features)
  sample_captions = decode_captions(sample_captions, data['idx_to_word'])
コード例 #5
0
          hidden_dim=512,
          wordvec_dim=256,
        )

small_rnn_solver = CaptioningSolver(small_rnn_model, small_data,
           update_rule='adam',
           num_epochs=50,
           batch_size=25,
           optim_config={
             'learning_rate': 5e-3,
           },
           lr_decay=0.95,
           verbose=True, print_every=10,
         )

small_rnn_solver.train()

# Plot the training losses
plt.plot(small_rnn_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()


# # Test-time sampling
# Unlike classification models, image captioning models behave very differently at training time and at test time. At training time, we have access to the ground-truth caption, so we feed ground-truth words as input to the RNN at each timestep. At test time, we sample from the distribution over the vocabulary at each timestep, and feed the sample as input to the RNN at the next timestep.
# 
# In the file `cs231n/classifiers/rnn.py`, implement the `sample` method for test-time sampling. After doing so, run the following to sample from your overfitted model on both training and validation data. The samples on training data should be very good; the samples on validation data probably won't make sense.

# In[ ]:
コード例 #6
0
          hidden_dim=512,
          wordvec_dim=256,
        )

small_rnn_solver = CaptioningSolver(small_rnn_model, small_data,
           update_rule='adam',
           num_epochs=1,
           batch_size=25,
           optim_config={
             'learning_rate': 5e-3,
           },
           lr_decay=0.95,
           verbose=True, print_every=10,
         )

small_rnn_solver.train()

# Plot the training losses
plt.plot(small_rnn_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()


## # Test-time sampling
## Unlike classification models, image captioning models behave very differently at training time 
#and at test time. At training time, we have access to the ground-truth caption so we 
#feed ground-truth words as input to the RNN at each timestep. At test time, 
#we sample from the distribution over the vocabulary at each timestep, 
#and feed the sample as input to the RNN at the next timestep.
コード例 #7
0
    wordvec_dim=256,
    dtype=np.float32,
)

good_solver = CaptioningSolver(good_model, small_data2,
                               update_rule='adam',
                               num_epochs=25,
                               batch_size=25,
                               optim_config={
                                   'learning_rate': 5e-3,
                               },
                               lr_decay=0.995,
                               verbose=True, print_every=50,
                               )

good_solver.train()

# Plot the training losses
plt.plot(good_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()

# Notify when finished
from IPython.display import Audio

sound_file = './sound/finish.mp3'
Audio(url=sound_file, autoplay=True)

for split in ['train', 'val']:
コード例 #8
0
ファイル: exp_lstm.py プロジェクト: HrWangChengdu/CS231n
                                                          wordvec_dim=256,
                                                                    dtype=np.float32,
                                                                            )

small_lstm_solver = CaptioningSolver(small_lstm_model, small_data,
                   update_rule='adam',
                              num_epochs=50,
                                         batch_size=25,
                                                    optim_config={
                                                                     'learning_rate': 5e-3,
                                                                                },
                                                               lr_decay=0.995,
                                                                          verbose=True, print_every=10,
                                                                                   )

small_lstm_solver.train()

# Plot the training losses
plt.plot(small_lstm_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()

# generate caption
for split in ['train', 'val']:
  minibatch = sample_coco_minibatch(small_data, split=split, batch_size=2)
  gt_captions, features, urls = minibatch
  gt_captions = decode_captions(gt_captions, data['idx_to_word'])
  sample_captions = small_lstm_model.sample(features)
  sample_captions = decode_captions(sample_captions, data['idx_to_word'])
コード例 #9
0
lstm_solver = CaptioningSolver(
    lstm_model,
    sdata,
    update_rule='adam',
    num_epochs=10,
    batch_size=25,
    optim_config={
        'learning_rate': 5e-3,
    },
    lr_decay=0.995,
    verbose=True,
    print_every=10,
)

lstm_solver.train()

# Plot the training losses
plt.plot(lstm_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()

for split in ['train', 'val']:
    minibatch = sample_coco_minibatch(sdata, split=split, batch_size=2)
    gt_captions, features, urls = minibatch
    gt_captions = decode_captions(gt_captions, data['idx_to_word'])

    sample_captions = lstm_model.sample(features)
    sample_captions = decode_captions(sample_captions, data['idx_to_word'])
コード例 #10
0
ファイル: LSTM_Captioning.py プロジェクト: Paz87/CS231n-2017
          wordvec_dim=256,
          dtype=np.float32,
        )

small_lstm_solver = CaptioningSolver(small_lstm_model, small_data,
           update_rule='adam',
           num_epochs=50,
           batch_size=25,
           optim_config={
             'learning_rate': 5e-3,
           },
           lr_decay=0.995,
           verbose=True, print_every=10,
         )

small_lstm_solver.train()

# Plot the training losses
plt.plot(small_lstm_solver.loss_history)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Training loss history')
plt.show()


# # LSTM test-time sampling
# Modify the `sample` method of the `CaptioningRNN` class to handle the case where `self.cell_type` is `lstm`. This should take fewer than 10 lines of code.
# 
# When you are done run the following to sample from your overfit LSTM model on some training and validation set samples.

# In[ ]:
コード例 #11
0
ファイル: LSTM_Captioning.py プロジェクト: chongyangma/cs231n
    input_dim=data['train_features'].shape[1],
    hidden_dim=1024,
    wordvec_dim=512,
    dtype=np.float32,
)

big_lstm_solver = CaptioningSolver(big_lstm_model, big_coco_data,
                                   update_rule='adam',
                                   num_epochs=30,
                                   batch_size=25,
                                   optim_config={'learning_rate': 5e-3,},
                                   lr_decay=0.995,
                                   verbose=True, print_every=100,
                                  )

big_lstm_solver.train()

def BLEU_score(gt_caption, sample_caption):
    """
    gt_caption: string, ground-truth caption
    sample_caption: string, your model's predicted caption
    Returns unigram BLEU score.
    """
    reference = [x for x in gt_caption.split(' ')
                 if ('<END>' not in x and '<START>' not in x and '<UNK>' not in x)]
    hypothesis = [x for x in sample_caption.split(' ')
                  if ('<END>' not in x and '<START>' not in x and '<UNK>' not in x)]
    BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis, weights = [1])
    return BLEUscore

def evaluate_model(model, med_data):