try: trail_loss = sum(loss_trail) / len(loss_trail) if last_best_loss is None or last_best_loss > trail_loss: print("Loss improved from {} to {}".format(last_best_loss, trail_loss)) save_loc = "./saved_models/skip-best".format(lr, VOCAB_SIZE) print("saving model at {}".format(save_loc)) torch.save(mod.state_dict(), save_loc) last_best_loss = trail_loss except Exception as e: print("Couldn't save model because {}".format(e)) print("Starting training...") #iterations for i in range(0, 100): sentences, lengths = d.fetch_batch(32 * 8) loss, prev, nex, prev_pred, next_pred = mod(sentences, lengths) if i % 10 == 0: debug(i, loss, prev, nex, prev_pred, next_pred) optimizer.zero_grad() loss.backward() optimizer.step()
last_best_loss = trail_loss except Exception as e: sys.stderr.write("Couldn't save model because {}\n".format(e)) # train!!! lr = 3.16e-4 optimizer = torch.optim.Adam(params=mod.parameters(), lr=lr) iter_count_per_epoch = int(math.ceil(sentences_count/batch_size)) sys.stderr.write('iter_count_per_epoch : {}\n'.format(iter_count_per_epoch)) sys.stderr.write("training begin...\n") for epoch in range(0, total_epoch): for i in range(0, iter_count_per_epoch): sentences = d.fetch_batch(batch_size) loss, prev, nex, prev_pred, next_pred = mod(sentences) if i % 10 == 0: debug(epoch, i, loss, prev, nex, prev_pred, next_pred) optimizer.zero_grad() loss.backward() optimizer.step() # save after every epoch if save_model: save_loc_epoch = "./saved_models/skip-{}-epoch".format(epoch) sys.stderr.write("saving model at {}\n".format(save_loc_epoch)) torch.save(mod.state_dict(), save_loc_epoch)
print("Loss improved from {} to {}".format(last_best_loss, trail_loss)) save_loc = "./saved_models/skip-best".format(lr, VOCAB_SIZE) print("saving model at {}".format(save_loc)) torch.save(mod.state_dict(), save_loc) last_best_loss = trail_loss except Exception as e: print("Couldn't save model because {}".format(e)) # In[ ]: print("Starting training...") # a million iterations for i in range(0, 1000000): sentences, lengths = d.fetch_batch(96) loss, prev, nex, prev_pred, next_pred = mod(sentences, lengths) if i % 10 == 0: debug(i, loss, prev, nex, prev_pred, next_pred) optimizer.zero_grad() loss.backward() optimizer.step() # In[ ]: