Exemple #1
0
    def train_(self):


        cur_best = 10000

        model = LM(self.unique_words, self.char_vocab, self.max_len, self.embed_dim, self.channels, self.kernels, self.hidden_size)

        if torch.cuda.is_available():
            model.cuda()
        
        learning_rate = self.learning_rate
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

        for epoch in range(self.epochs):

            model.train(True)

            hidden_state = [torch.zeros(2, self.batch_size, self.hidden_size).cuda()] * 2 ########

            for i in range(0, self.train.size(1)-self.seq_len, self.seq_len):

                model.zero_grad()

                inputs = self.train[:, i : i + self.seq_len,:].cuda() # 20 * 35 * 21
                targets = self.train_idx[:, (i+1) : (i+1) + self.seq_len].cuda() # 20 * 35

                temp = []           

                for state in hidden_state:
                    temp.append(state.detach())
                
                hidden_state = temp

                output, hidden_state = model(inputs, hidden_state) # initialize?
                
                loss = criterion(output, targets.view(-1))
                        
                loss.backward()
                
                nn.utils.clip_grad_norm_(model.parameters(), 5) # clipping
                
                optimizer.step()
                
                step = (i+1) // self.seq_len                    
            
                if step % 100 == 0:        
                    print ('Epoch %d/%d, Batch x Seq_Len %d/%d, Loss: %.3f, Perplexity: %5.2f' % (epoch, self.epochs, step, self.num_batches//self.seq_len, loss.item(), np.exp(loss.item())))
            
            model.eval() 
            val_loss = self._validate(self.seq_len, self.valid, self.valid_idx, model, hidden_state, criterion)
            val_perplex = np.exp(val_loss)
                        
            if cur_best-val_perplex < 1 : # pivot?
            
                if learning_rate > 0.03: 
                    learning_rate = learning_rate * 0.5
                    print("Adjusted learning_rate : %.5f"%learning_rate)
                    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
                
                else:
                    pass
            
            if val_perplex < cur_best:
                print("The current best val loss: ", val_loss)
                cur_best = val_perplex
                torch.save(model.state_dict(), 'model.pkl')
Exemple #2
0
                                                           factor=0.5,
                                                           patience=1,
                                                           verbose=True)

    for epoch in range(num_epochs):

        hidden_state = (to_var(torch.zeros(2, batch_size, hidden_size)),
                        to_var(torch.zeros(2, batch_size, hidden_size)))

        model.train(True)
        for i in range(0, data.size(1) - seq_len, seq_len):

            inputs = to_var(data[:, i:i + seq_len, :])
            targets = to_var(label[:, (i + 1):(i + 1) + seq_len]).contiguous()

            model.zero_grad()

            hidden_state = [state.detach() for state in hidden_state]

            output, hidden_state = model(inputs, hidden_state)

            loss = criterion(output, targets.view(-1))

            loss.backward()

            nn.utils.clip_grad_norm(model.parameters(), 5)
            optimizer.step()

            step = (i + 1) // seq_len
            if step % 100 == 0: