def train_(self): cur_best = 10000 model = LM(self.unique_words, self.char_vocab, self.max_len, self.embed_dim, self.channels, self.kernels, self.hidden_size) if torch.cuda.is_available(): model.cuda() learning_rate = self.learning_rate criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(self.epochs): model.train(True) hidden_state = [torch.zeros(2, self.batch_size, self.hidden_size).cuda()] * 2 ######## for i in range(0, self.train.size(1)-self.seq_len, self.seq_len): model.zero_grad() inputs = self.train[:, i : i + self.seq_len,:].cuda() # 20 * 35 * 21 targets = self.train_idx[:, (i+1) : (i+1) + self.seq_len].cuda() # 20 * 35 temp = [] for state in hidden_state: temp.append(state.detach()) hidden_state = temp output, hidden_state = model(inputs, hidden_state) # initialize? loss = criterion(output, targets.view(-1)) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 5) # clipping optimizer.step() step = (i+1) // self.seq_len if step % 100 == 0: print ('Epoch %d/%d, Batch x Seq_Len %d/%d, Loss: %.3f, Perplexity: %5.2f' % (epoch, self.epochs, step, self.num_batches//self.seq_len, loss.item(), np.exp(loss.item()))) model.eval() val_loss = self._validate(self.seq_len, self.valid, self.valid_idx, model, hidden_state, criterion) val_perplex = np.exp(val_loss) if cur_best-val_perplex < 1 : # pivot? if learning_rate > 0.03: learning_rate = learning_rate * 0.5 print("Adjusted learning_rate : %.5f"%learning_rate) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) else: pass if val_perplex < cur_best: print("The current best val loss: ", val_loss) cur_best = val_perplex torch.save(model.state_dict(), 'model.pkl')
loss.data[0], np.exp(loss.data[0]))) model.eval() #validate val_loss = validate(seq_len, val_data, val_label, model, hidden_state) val_loss = np.exp(val_loss) if pivot - val_loss < 0.8: if learning_rate > 0.03: learning_rate = learning_rate * 0.5 print(learning_rate) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) pivot = val_loss if val_loss < best_ppl: print(val_loss) best_ppl = val_loss # Save the Model torch.save(model.state_dict(), 'model.pkl') if best_ppl < final_ppl: print('best ppl') print(best_ppl) #Save the final_model torch.save(model.state_dict(), 'model_best.pkl') final_ppl = best_ppl