Esempio n. 1
0
    def train(self):
        batch_num = len(self.train_loader)
        best_loss = 1e10
        for epoch in range(1, config.num_epochs + 1):
            self.model.encoder.train()
            self.model.decoder.train()
            print("epoch {}/{} :".format(epoch, config.num_epochs), end="\r")
            start = time.time()

            for batch_idx, train_data in enumerate(self.train_loader, start=1):
                batch_loss = self.step(train_data)

                self.optim.zero_grad()
                batch_loss.backward()
                # gradient clipping
                nn.utils.clip_grad_norm_(self.model.parameters(), config.max_grad_norm)
                self.optim.step()
                batch_loss = batch_loss.detach().item()
                msg = "{}/{} {} - ETA : {} - loss : {:.4f}" \
                    .format(batch_idx, batch_num, progress_bar(batch_idx, batch_num),
                            eta(start, batch_idx, batch_num), batch_loss)
                print(msg, end="\r")

            # compute validation loss for every epoch
            val_loss = self.evaluate(msg)
            if val_loss <= best_loss:
                best_loss = val_loss
                self.save_model(val_loss, epoch)

            print("Epoch {} took {} - final loss : {:.4f} - val loss :{:.4f}"
                  .format(epoch, user_friendly_time(time_since(start)), batch_loss, val_loss))
Esempio n. 2
0
    def train(self):
        global_step = 1
        batch_num = len(self.train_loader)
        best_loss = 1e10
        qa_loss_lst = []
        qg_loss_lst = []
        for epoch in range(1, config.num_epochs + 1):
            start = time.time()
            for step, batch in enumerate(self.train_loader, start=1):
                qa_loss, ca2q_loss = self.model(batch)

                # mean() to average across multiple gpu and back-propagation
                qa_loss = qa_loss.mean() / config.gradient_accumulation_steps
                ca2q_loss = ca2q_loss.mean() / config.gradient_accumulation_steps

                qa_loss.backward(retain_graph=True)
                ca2q_loss.backward()

                qa_loss_lst.append(qa_loss.detach().item())
                qg_loss_lst.append(ca2q_loss.detach().item())
                # clip gradient
                nn.utils.clip_grad_norm_(self.model.module.ca2q_model.parameters(), config.max_grad_norm)

                # update params
                if step % config.gradient_accumulation_steps == 0:
                    self.qa_opt.step()
                    self.qg_opt.step()
                    # zero grad
                    self.qa_opt.zero_grad()
                    self.qg_opt.zero_grad()
                    global_step += 1
                    avg_qa_loss = sum(qa_loss_lst)
                    avg_qg_loss = sum(qg_loss_lst)
                    # empty list
                    qa_loss_lst = []
                    qg_loss_lst = []
                    msg = "{}/{} {} - ETA : {} - qa_loss: {:.2f}, ca2q_loss :{:.2f}" \
                        .format(step, batch_num, progress_bar(step, batch_num),
                                eta(start, step, batch_num),
                                avg_qa_loss, avg_qg_loss)
                    print(msg, end="\r")

            val_qa_loss, val_qg_loss = self.evaluate(msg)
            if val_qg_loss <= best_loss:
                best_loss = val_qg_loss
                self.save_model(val_qg_loss, epoch)

            print("Epoch {} took {} - final loss : {:.4f} -  qa_loss :{:.4f}, qg_loss :{:.4f}"
                  .format(epoch, user_friendly_time(time_since(start)), ca2q_loss, val_qa_loss, val_qg_loss))
Esempio n. 3
0
    def train(self):
        global_step = 1
        batch_num = len(self.train_loader)
        best_loss = 1e10
        qa_loss_lst = []
        self.model.train()
        for epoch in range(1, 4):
            start = time.time()
            for step, batch in enumerate(self.train_loader, start=1):

                input_ids, input_mask, segment_ids, start_positions, end_positions = batch
                seq_len = torch.sum(torch.sign(input_ids), 1)
                max_len = torch.max(seq_len)
                input_ids = input_ids[:, :max_len].to(config.device)
                input_mask = input_mask[:, : max_len].to(config.device)
                segment_ids = segment_ids[:, :max_len].to(config.device)
                start_positions = start_positions.to(config.device)
                end_positions = end_positions.to(config.device)
                loss = self.model(input_ids, segment_ids, input_mask, start_positions, end_positions)

                # mean() to average across multiple gpu and back-propagation
                loss /= config.gradient_accumulation_steps
                loss.backward()
                qa_loss_lst.append(loss)
                # update params
                if step % config.gradient_accumulation_steps == 0:
                    self.qa_opt.step()
                    # zero grad
                    self.qa_opt.zero_grad()
                    global_step += 1
                    avg_qa_loss = sum(qa_loss_lst)
                    # empty list
                    qa_loss_lst = []
                    msg = "{}/{} {} - ETA : {} - qa_loss: {:.2f}" \
                        .format(step, batch_num, progress_bar(step, batch_num),
                                eta(start, step, batch_num),
                                avg_qa_loss)
                    print(msg, end="\r")

            val_loss = self.evaluate(msg)
            if val_loss <= best_loss:
                best_loss = val_loss
                self.save_model(val_loss, epoch)

            print("Epoch {} took {} - final loss : {:.4f} -  val_loss :{:.4f}"
                  .format(epoch, user_friendly_time(time_since(start)), loss, val_loss))
    def train(self):
        batch_num = len(self.train_loader)
        best_loss = 1e10
        for epoch in range(1, config.num_epochs + 1):
            self.model.train()
            print("epoch {}/{} :".format(epoch, config.num_epochs), end="\r")
            start = time.time()
            # halving the learning rate after epoch 8

            if epoch >= 8 and epoch % 2 == 0:
                self.lr *= 0.5
                state_dict = self.optim.state_dict()
                for param_group in state_dict["param_groups"]:
                    param_group["lr"] = self.lr
                self.optim.load_state_dict(state_dict)

            for batch_idx, train_data in enumerate(self.train_loader, start=1):
                batch_loss = self.step(train_data)

                self.model.zero_grad()
                batch_loss.backward()
                # gradient clipping
                nn.utils.clip_grad_norm_(self.model.parameters(),
                                         config.max_grad_norm)

                self.optim.step()
                batch_loss = batch_loss.detach().item()
                msg = "{}/{} {} - ETA : {} - loss : {:.4f}" \
                    .format(batch_idx, batch_num, progress_bar(batch_idx, batch_num),
                            eta(start, batch_idx, batch_num), batch_loss)
                print(msg, end="\r")

            val_loss = self.evaluate(msg)
            if val_loss <= best_loss:
                best_loss = val_loss
                self.save_model(val_loss, epoch)
                # the condition of saving new checkpoints

            print("Epoch {} took {} - final loss : {:.4f} - val loss :{:.4f}".
                  format(epoch, user_friendly_time(time_since(start)),
                         batch_loss, val_loss))