def main(): ds = Dataset('imdb') params = { 'batch_size': 67, 'shuffle': True, 'num_workers': 8, 'collate_fn': collate_fn } epochs = 4 lr = 0.01 tbptt_steps = 256 training_generator = data.DataLoader(ds, **params) model = CharRNN(input_size=ds.encoder.get_vocab_size(), embedding_size=8, hidden_size=128, output_size=ds.encoder.get_vocab_size(), no_sentiments=3, dense_size=32, padding_idx=ds.encoder.get_id(PADDING_TOKEN), n_layers=1) optimizer = torch.optim.Adam(model.parameters(), lr=lr) step_no = 0 for epoch in range(epochs): print('Epoch: ', epoch) for x_i, y_i, l_i in training_generator: model.reset_intermediate_vars() step_no += 1 print(x_i.size()) batch_loss = 0 for step in range(l_i[0] // tbptt_steps + (l_i[0] % tbptt_steps != 0)): von = tbptt_steps * step bis = min(tbptt_steps * (step + 1), l_i[0]) out = model(x_i[:, von:bis]) if step % 25 == 0: print(model.attn[0].detach().numpy(), model.attn[-1].detach().numpy()) loss = model.loss(out, y_i, l_i, von, bis) batch_loss += loss optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.5) for p in model.parameters(): p.data.add_(-lr, p.grad.data) optimizer.step() model.detach_intermediate_vars() print('Total loss for this batch: ', batch_loss.item()) if step_no % 30 == 1: gen_sample, sentis = model.generate_text( ds.encoder, 'T', 200, 0.7) print_colored_text(gen_sample, sentis, ds.encoder) # Print an example with sentiments print_colored_text(x_i[-1].data.numpy(), get_sentiments(model, x_i[-1], 0.7), ds.encoder)
def run_training(model: CharRNN, dataset, config: dict, validation: bool, valid_dataset): optimizer = torch.optim.Adam(model.parameters(), lr=config['initial_lr']) epoch = load_checkpoint(optimizer, model, config['filename']) if not epoch: epoch = 0 epoch += 1 params = { 'batch_size': config['batch_size'], 'shuffle': False, 'num_workers': 0 if os.name == 'nt' else 8 } data_generator = data.DataLoader(dataset, **params) while epoch < config['epochs'] + 1: model.reset_intermediate_vars() for step, (x_i, y_i, l_i) in enumerate(data_generator): loss = run_forward_pass_and_get_loss(model, x_i, y_i, l_i) # Gradient descent step optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.5) optimizer.step() model.detach_intermediate_vars() if step % 100 == 0: print('Epoch: {} Loss for step {} : {}'.format( epoch, step, round(loss.item(), 4))) if step % 1000 == 1: gen_sample = model.generate_text(dataset.encoder, 't', 200) print_tokens(dataset.encoder.map_ids_to_tokens(gen_sample), config['is_bytes']) save_checkpoint(optimizer, model, epoch, config['filename']) if validation and epoch % 2: bpc = validate(valid_dataset, model) print('BPC on validation set: ', bpc) if epoch in config['lr_schedule']: optimizer = torch.optim.Adam(model.parameters(), lr=config['lr_schedule'][epoch]) epoch += 1
def validate(dataset, model: CharRNN): tmp_hidden = model.hidden tmp_loss_func = model.loss_func model.reset_intermediate_vars() model.loss_func = nn.CrossEntropyLoss(reduction='sum') params = { 'batch_size': 1, 'shuffle': False, 'num_workers': 0 if os.name == 'nt' else 8 } data_generator = data.DataLoader(dataset, **params) cross_entropy = 0 total_length = 0 for x_i, y_i, l_i in data_generator: total_length += l_i.item() cross_entropy += run_forward_pass_and_get_loss(model, x_i, y_i, l_i).item() model.detach_intermediate_vars() perplexity = np.exp(cross_entropy / total_length) bpc = np.log2(perplexity) model.hidden = tmp_hidden model.loss_func = tmp_loss_func return bpc