Beispiel #1
0
 def save_parameters(self, num):
     """
     Save the trained parameters
     :param num:
     :return:
     """
     logger.info('saving parameters for doc-rep module on step=%d' % num)
     save_model(self,
                num,
                model_weight_path=self.out_pt_weight_path + '-' + str(num),
                checkpoint_path=self.out_pt_checkpoint_path)
Beispiel #2
0
 def save_parameters(self, num):
     """
     Save the trained parameters
     :param num:
     :return:
     """
     logger.info('saving parameters for {} module on steps={}'.format(
         self.model.__class__.__name__, num))
     save_model(self,
                num,
                model_weight_path=self.out_weight_path + '-' + str(num),
                checkpoint_path=self.out_checkpoint_path)
Beispiel #3
0
 def save_parameters(self, num):
     """
     Save the trained parameters
     :param num:
     :return:
     """
     logger.info('Saving agent parameters for state module on step=%d' %
                 num)
     save_model(self,
                num,
                model_weight_path=self.out_state_weight_path + '-' +
                str(num),
                checkpoint_path=self.out_state_checkpoint_path)
    def train(self,
              model,
              vocab,
              train_loader,
              valid_loader_list,
              loss_type,
              start_epoch,
              num_epochs,
              args,
              evaluate_every=1000,
              last_metrics=None,
              early_stop=10,
              opt_name="adam"):
        """
        Training
        args:
            model: Model object
            train_loader: DataLoader object of the training set
            valid_loader_list: a list of Validation DataLoader objects
            start_epoch: start epoch (> 0 if you resume the process)
            num_epochs: last epoch
        """
        history = []
        best_valid_val = 1000000000
        smoothing = args.label_smoothing
        early_stop_criteria, early_stop_val = early_stop.split(",")[0], int(
            early_stop.split(",")[1])
        count_stop = 0

        logging.info("name " + args.name)

        if opt_name == "adam":
            opt = torch.optim.Adam(model.parameters(), lr=args.lr)
        elif opt_name == "sgd":
            opt = torch.optim.SGD(model.parameters(), lr=args.lr)
        else:
            opt = None

        for epoch in range(start_epoch, num_epochs):
            total_loss, total_cer, total_wer, total_char, total_word = 0, 0, 0, 0, 0
            total_time = 0

            start_iter = 0
            final_train_losses = []
            final_train_cers = []

            logging.info("TRAIN")
            print("TRAIN")
            model.train()
            pbar = tqdm(iter(train_loader),
                        leave=True,
                        total=len(train_loader))
            max_len = 0
            for i, (data) in enumerate(pbar, start=start_iter):
                torch.cuda.empty_cache()
                src, trg, src_percentages, src_lengths, trg_lengths = data
                max_len = max(max_len, src.size(-1))

                opt.zero_grad()

                try:
                    if args.cuda:
                        src = src.cuda()
                        trg = trg.cuda()

                    start_time = time.time()
                    loss, cer, num_char = self.train_one_batch(
                        model, vocab, src, trg, src_percentages, src_lengths,
                        trg_lengths, smoothing, loss_type)
                    total_cer += cer
                    total_char += num_char
                    loss.backward()

                    if args.clip:
                        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       args.max_norm)

                    opt.step()
                    total_loss += loss.item()

                    end_time = time.time()
                    diff_time = end_time - start_time
                    total_time += diff_time

                    pbar.set_description(
                        "(Epoch {}) TRAIN LOSS:{:.4f} CER:{:.2f}% LR:{:.7f} TOTAL TIME:{:.7f}"
                        .format((epoch + 1), total_loss / (i + 1),
                                total_cer * 100 / total_char, self.get_lr(opt),
                                total_time))
                except Exception as e:
                    print(e)
                    # del loss
                    try:
                        torch.cuda.empty_cache()
                        src = src.cpu()
                        trg = trg.cpu()
                        src_splits, src_lengths_splits, trg_lengths_splits, trg_splits, src_percentages_splits = iter(
                            src.split(2, dim=0)), iter(
                                src_lengths.split(2, dim=0)), iter(
                                    trg_lengths.split(2, dim=0)), iter(
                                        trg.split(2, dim=0)), iter(
                                            src_percentages.split(2, dim=0))
                        j = 0

                        start_time = time.time()
                        for src, trg, src_lengths, trg_lengths, src_percentages in zip(
                                src_splits, trg_splits, src_lengths_splits,
                                trg_lengths_splits, src_percentages_splits):
                            opt.zero_grad()
                            torch.cuda.empty_cache()
                            if args.cuda:
                                src = src.cuda()
                                trg = trg.cuda()

                            start_time = time.time()
                            loss, cer, num_char = self.train_one_batch(
                                model, vocab, src, trg, src_percentages,
                                src_lengths, trg_lengths, smoothing, loss_type)
                            total_cer += cer
                            total_char += num_char
                            loss.backward()

                            if args.clip:
                                torch.nn.utils.clip_grad_norm_(
                                    model.parameters(), args.max_norm)

                            opt.step()
                            total_loss += loss.item()
                            j += 1

                        end_time = time.time()
                        diff_time = end_time - start_time
                        total_time += diff_time
                        logging.info(
                            "probably OOM, autosplit batch. succeeded")
                        print("probably OOM, autosplit batch. succeeded")
                    except:
                        logging.info(
                            "probably OOM, autosplit batch. skip batch")
                        print("probably OOM, autosplit batch. skip batch")
                        continue

            pbar.set_description(
                "(Epoch {}) TRAIN LOSS:{:.4f} CER:{:.2f}% LR:{:.7f} TOTAL TIME:{:.7f}"
                .format((epoch + 1),
                        total_loss / (i + 1), total_cer * 100 / total_char,
                        self.get_lr(opt), total_time))

            final_train_loss = total_loss / (len(train_loader))
            final_train_cer = total_cer * 100 / total_char

            final_train_losses.append(final_train_loss)
            final_train_cers.append(final_train_cer)

            logging.info(
                "(Epoch {}) TRAIN LOSS:{:.4f} CER:{:.2f}% LR:{:.7f}".format(
                    (epoch + 1), final_train_loss, final_train_cer,
                    self.get_lr(opt)))

            # evaluate
            if (epoch + 1) % evaluate_every == 0:
                print("")
                logging.info("VALID")
                model.eval()

                final_valid_losses = []
                final_valid_cers = []
                for ind in range(len(valid_loader_list)):
                    valid_loader = valid_loader_list[ind]

                    total_valid_loss, total_valid_cer, total_valid_wer, total_valid_char, total_valid_word = 0, 0, 0, 0, 0
                    valid_pbar = tqdm(iter(valid_loader),
                                      leave=True,
                                      total=len(valid_loader))
                    for i, (data) in enumerate(valid_pbar):
                        torch.cuda.empty_cache()

                        src, trg, src_percentages, src_lengths, trg_lengths = data
                        try:
                            if args.cuda:
                                src = src.cuda()
                                trg = trg.cuda()
                            loss, cer, num_char = self.train_one_batch(
                                model, vocab, src, trg, src_percentages,
                                src_lengths, trg_lengths, smoothing, loss_type)
                            total_valid_cer += cer
                            total_valid_char += num_char

                            total_valid_loss += loss.item()
                            valid_pbar.set_description(
                                "VALID SET {} LOSS:{:.4f} CER:{:.2f}%".format(
                                    ind, total_valid_loss / (i + 1),
                                    total_valid_cer * 100 / total_valid_char))
                            # valid_pbar.set_description("(Epoch {}) VALID LOSS:{:.4f} CER:{:.2f}% WER:{:.2f}%".format(
                            # (epoch+1), total_valid_loss/(i+1), total_valid_cer*100/total_valid_char, total_valid_wer*100/total_valid_word))
                        except:
                            try:
                                torch.cuda.empty_cache()
                                src = src.cpu()
                                trg = trg.cpu()
                                src_splits, src_lengths_splits, trg_lengths_splits, trg_splits, trg_transcript_splits, src_percentages_splits = iter(
                                    src.split(2, dim=0)), iter(
                                        src_lengths.split(2, dim=0)), iter(
                                            trg_lengths.split(2, dim=0)), iter(
                                                trg.split(2, dim=0)), iter(
                                                    trg_transcript.split(2,
                                                                         dim=0)
                                                ), iter(
                                                    src_percentages.split(
                                                        2, dim=0))
                                j = 0
                                for src, trg, src_lengths, trg_lengths, src_percentages in zip(
                                        src_splits, trg_splits,
                                        src_lengths_splits, trg_lengths_splits,
                                        src_percentages_splits):
                                    opt.zero_grad()
                                    torch.cuda.empty_cache()
                                    if args.cuda:
                                        src = src.cuda()
                                        trg = trg.cuda()

                                    loss, cer, num_char = self.train_one_batch(
                                        model, vocab, src, trg,
                                        src_percentages, src_lengths,
                                        trg_lengths, smoothing, loss_type)
                                    total_valid_cer += cer
                                    total_valid_char += num_char

                                    if args.clip:
                                        torch.nn.utils.clip_grad_norm_(
                                            model.parameters(), args.max_norm)

                                    total_valid_loss += loss.item()
                                    j += 1
                                valid_pbar.set_description(
                                    "VALID SET {} LOSS:{:.4f} CER:{:.2f}%".
                                    format(
                                        ind, total_valid_loss / (i + 1),
                                        total_valid_cer * 100 /
                                        total_valid_char))

                                logging.info(
                                    "probably OOM, autosplit batch. succeeded")
                                print(
                                    "probably OOM, autosplit batch. succeeded")
                            except:
                                logging.info(
                                    "probably OOM, autosplit batch. skip batch"
                                )
                                print(
                                    "probably OOM, autosplit batch. skip batch"
                                )
                                continue

                    final_valid_loss = total_valid_loss / (len(valid_loader))
                    final_valid_cer = total_valid_cer * 100 / total_valid_char

                    final_valid_losses.append(final_valid_loss)
                    final_valid_cers.append(final_valid_cer)
                    print("VALID SET {} LOSS:{:.4f} CER:{:.2f}%".format(
                        ind, final_valid_loss, final_valid_cer))
                    logging.info("VALID SET {} LOSS:{:.4f} CER:{:.2f}%".format(
                        ind, final_valid_loss, final_valid_cer))

                metrics = {}
                avg_valid_loss = sum(final_valid_losses) / len(
                    final_valid_losses)
                avg_valid_cer = sum(final_valid_cers) / len(final_valid_cers)
                metrics["avg_train_loss"] = sum(final_train_losses) / len(
                    final_train_losses)
                metrics["avg_valid_loss"] = sum(final_valid_losses) / len(
                    final_valid_losses)
                metrics["avg_train_cer"] = sum(final_train_cers) / len(
                    final_train_cers)
                metrics["avg_valid_cer"] = sum(final_valid_cers) / len(
                    final_valid_cers)
                metrics["train_loss"] = final_train_losses
                metrics["valid_loss"] = final_valid_losses
                metrics["train_cer"] = final_train_cers
                metrics["valid_cer"] = final_valid_cers
                metrics["history"] = history
                history.append(metrics)

                print("AVG VALID LOSS:{:.4f} AVG CER:{:.2f}%".format(
                    sum(final_valid_losses) / len(final_valid_losses),
                    sum(final_valid_cers) / len(final_valid_cers)))
                logging.info("AVG VALID LOSS:{:.4f} AVG CER:{:.2f}%".format(
                    sum(final_valid_losses) / len(final_valid_losses),
                    sum(final_valid_cers) / len(final_valid_cers)))

                if epoch % args.save_every == 0:
                    save_model(model,
                               vocab, (epoch + 1),
                               opt,
                               metrics,
                               args,
                               best_model=False)

                # save the best model
                early_stop_criteria, early_stop_val
                if early_stop_criteria == "cer":
                    print("CRITERIA: CER")
                    if best_valid_val > avg_valid_cer:
                        count_stop = 0
                        best_valid_val = avg_valid_cer
                        save_model(model,
                                   vocab, (epoch + 1),
                                   opt,
                                   metrics,
                                   args,
                                   best_model=True)
                    else:
                        print("count_stop:", count_stop)
                        count_stop += 1
                else:
                    print("CRITERIA: LOSS")
                    if best_valid_val > avg_valid_loss:
                        count_stop = 0
                        best_valid_val = avg_valid_loss
                        save_model(model,
                                   vocab, (epoch + 1),
                                   opt,
                                   metrics,
                                   args,
                                   best_model=True)
                    else:
                        count_stop += 1
                        print("count_stop:", count_stop)

                if count_stop >= early_stop_val:
                    logging.info("EARLY STOP")
                    print("EARLY STOP\n")
                    break
Beispiel #5
0
    def train(self,
              model,
              train_loader,
              train_sampler,
              valid_loader_list,
              opt,
              loss_type,
              start_epoch,
              num_epochs,
              label2id,
              id2label,
              last_metrics=None):
        """
        Training
        args:
            model: Model object
            train_loader: DataLoader object of the training set
            valid_loader_list: a list of Validation DataLoader objects
            opt: Optimizer object
            start_epoch: start epoch (> 0 if you resume the process)
            num_epochs: last epoch
            last_metrics: (if resume)
        """
        history = []
        start_time = time.time()
        best_valid_loss = 1000000000 if last_metrics is None else last_metrics[
            'valid_loss']
        smoothing = constant.args.label_smoothing

        logging.info("name " + constant.args.name)

        for epoch in range(start_epoch, num_epochs):
            sys.stdout.flush()
            total_loss, total_cer, total_wer, total_char, total_word = 0, 0, 0, 0, 0

            start_iter = 0

            logging.info("TRAIN")
            model.train()
            pbar = tqdm(iter(train_loader),
                        leave=True,
                        total=len(train_loader))
            for i, (data) in enumerate(pbar, start=start_iter):
                src, tgt, src_percentages, src_lengths, tgt_lengths = data

                if constant.USE_CUDA:
                    src = src.cuda()
                    tgt = tgt.cuda()

                opt.zero_grad()

                pred, gold, hyp_seq, gold_seq = model(src,
                                                      src_lengths,
                                                      tgt,
                                                      verbose=False)

                try:  # handle case for CTC
                    strs_gold, strs_hyps = [], []
                    for ut_gold in gold_seq:
                        str_gold = ""
                        for x in ut_gold:
                            if int(x) == constant.PAD_TOKEN:
                                break
                            str_gold = str_gold + id2label[int(x)]
                        strs_gold.append(str_gold)
                    for ut_hyp in hyp_seq:
                        str_hyp = ""
                        for x in ut_hyp:
                            if int(x) == constant.PAD_TOKEN:
                                break
                            str_hyp = str_hyp + id2label[int(x)]
                        strs_hyps.append(str_hyp)
                except Exception as e:
                    print(e)
                    logging.info("NaN predictions")
                    continue

                seq_length = pred.size(1)
                sizes = Variable(src_percentages.mul_(int(seq_length)).int(),
                                 requires_grad=False)

                loss, num_correct = calculate_metrics(
                    pred,
                    gold,
                    input_lengths=sizes,
                    target_lengths=tgt_lengths,
                    smoothing=smoothing,
                    loss_type=loss_type)

                if loss.item() == float('Inf'):
                    logging.info("Found infinity loss, masking")
                    loss = torch.where(loss != loss, torch.zeros_like(loss),
                                       loss)  # NaN masking
                    continue

                # if constant.args.verbose:
                #     logging.info("GOLD", strs_gold)
                #     logging.info("HYP", strs_hyps)

                for j in range(len(strs_hyps)):
                    strs_hyps[j] = strs_hyps[j].replace(
                        constant.SOS_CHAR, '').replace(constant.EOS_CHAR, '')
                    strs_gold[j] = strs_gold[j].replace(
                        constant.SOS_CHAR, '').replace(constant.EOS_CHAR, '')
                    cer = calculate_cer(strs_hyps[j].replace(' ', ''),
                                        strs_gold[j].replace(' ', ''))
                    wer = calculate_wer(strs_hyps[j], strs_gold[j])
                    total_cer += cer
                    total_wer += wer
                    total_char += len(strs_gold[j].replace(' ', ''))
                    total_word += len(strs_gold[j].split(" "))

                loss.backward()

                if constant.args.clip:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   constant.args.max_norm)

                opt.step()

                total_loss += loss.item()
                non_pad_mask = gold.ne(constant.PAD_TOKEN)
                num_word = non_pad_mask.sum().item()

                pbar.set_description(
                    "(Epoch {}) TRAIN LOSS:{:.4f} CER:{:.2f}% LR:{:.7f}".
                    format((epoch + 1), total_loss / (i + 1),
                           total_cer * 100 / total_char, opt._rate))
            logging.info(
                "(Epoch {}) TRAIN LOSS:{:.4f} CER:{:.2f}% LR:{:.7f}".format(
                    (epoch + 1), total_loss / (len(train_loader)),
                    total_cer * 100 / total_char, opt._rate))

            # evaluate
            print("")
            logging.info("VALID")
            model.eval()

            for ind in range(len(valid_loader_list)):
                valid_loader = valid_loader_list[ind]

                total_valid_loss, total_valid_cer, total_valid_wer, total_valid_char, total_valid_word = 0, 0, 0, 0, 0
                valid_pbar = tqdm(iter(valid_loader),
                                  leave=True,
                                  total=len(valid_loader))
                for i, (data) in enumerate(valid_pbar):
                    src, tgt, src_percentages, src_lengths, tgt_lengths = data

                    if constant.USE_CUDA:
                        src = src.cuda()
                        tgt = tgt.cuda()

                    pred, gold, hyp_seq, gold_seq = model(src,
                                                          src_lengths,
                                                          tgt,
                                                          verbose=False)

                    seq_length = pred.size(1)
                    sizes = Variable(src_percentages.mul_(
                        int(seq_length)).int(),
                                     requires_grad=False)

                    loss, num_correct = calculate_metrics(
                        pred,
                        gold,
                        input_lengths=sizes,
                        target_lengths=tgt_lengths,
                        smoothing=smoothing,
                        loss_type=loss_type)

                    if loss.item() == float('Inf'):
                        logging.info("Found infinity loss, masking")
                        loss = torch.where(loss != loss,
                                           torch.zeros_like(loss),
                                           loss)  # NaN masking
                        continue

                    try:  # handle case for CTC
                        strs_gold, strs_hyps = [], []
                        for ut_gold in gold_seq:
                            str_gold = ""
                            for x in ut_gold:
                                if int(x) == constant.PAD_TOKEN:
                                    break
                                str_gold = str_gold + id2label[int(x)]
                            strs_gold.append(str_gold)
                        for ut_hyp in hyp_seq:
                            str_hyp = ""
                            for x in ut_hyp:
                                if int(x) == constant.PAD_TOKEN:
                                    break
                                str_hyp = str_hyp + id2label[int(x)]
                            strs_hyps.append(str_hyp)
                    except Exception as e:
                        print(e)
                        logging.info("NaN predictions")
                        continue

                    for j in range(len(strs_hyps)):
                        strs_hyps[j] = strs_hyps[j].replace(
                            constant.SOS_CHAR,
                            '').replace(constant.EOS_CHAR, '')
                        strs_gold[j] = strs_gold[j].replace(
                            constant.SOS_CHAR,
                            '').replace(constant.EOS_CHAR, '')
                        cer = calculate_cer(strs_hyps[j].replace(' ', ''),
                                            strs_gold[j].replace(' ', ''))
                        wer = calculate_wer(strs_hyps[j], strs_gold[j])
                        total_valid_cer += cer
                        total_valid_wer += wer
                        total_valid_char += len(strs_gold[j].replace(' ', ''))
                        total_valid_word += len(strs_gold[j].split(" "))

                    total_valid_loss += loss.item()
                    valid_pbar.set_description(
                        "VALID SET {} LOSS:{:.4f} CER:{:.2f}%".format(
                            ind, total_valid_loss / (i + 1),
                            total_valid_cer * 100 / total_valid_char))
                logging.info("VALID SET {} LOSS:{:.4f} CER:{:.2f}%".format(
                    ind, total_valid_loss / (len(valid_loader)),
                    total_valid_cer * 100 / total_valid_char))

            metrics = {}
            metrics["train_loss"] = total_loss / len(train_loader)
            metrics["valid_loss"] = total_valid_loss / (len(valid_loader))
            metrics["train_cer"] = total_cer
            metrics["train_wer"] = total_wer
            metrics["valid_cer"] = total_valid_cer
            metrics["valid_wer"] = total_valid_wer
            metrics["history"] = history
            history.append(metrics)

            if epoch % constant.args.save_every == 0:
                save_model(model, (epoch + 1),
                           opt,
                           metrics,
                           label2id,
                           id2label,
                           best_model=False)

            # save the best model
            if best_valid_loss > total_valid_loss / len(valid_loader):
                best_valid_loss = total_valid_loss / len(valid_loader)
                save_model(model, (epoch + 1),
                           opt,
                           metrics,
                           label2id,
                           id2label,
                           best_model=True)

            if constant.args.shuffle:
                logging.info("SHUFFLE")
                print("SHUFFLE")
                train_sampler.shuffle(epoch)
Beispiel #6
0
    def train(self,
              model,
              train_loader,
              train_sampler,
              valid_loaders,
              opt,
              loss_type,
              start_epoch,
              num_epochs,
              label2id,
              id2label,
              last_metrics=None,
              logger=None):
        """
        Training
        args:
            model: Model object
            train_loader: DataLoader object of the training set
            valid_loaders: list of DataLoader object of the validation set
            opt: Optimizer object
            start_epoch: start epoch (> 0 if you resume the process)
            num_epochs: last epoch
            last_metrics: (if resume)
        """
        if logger is not None:
            sys.out = logger

        start_time = time.time()
        best_valid_loss = 1000000000 if last_metrics is None else last_metrics[
            'valid_loss']
        smoothing = constant.args.label_smoothing

        history = []

        for epoch in range(start_epoch, num_epochs):
            sys.out.flush()
            total_loss, total_cer, total_wer, total_char, total_word = 0, 0, 0, 0, 0
            start_iter = 0

            print("TRAIN")
            model.train()
            pbar = tqdm(iter(train_loader),
                        leave=True,
                        total=len(train_loader))
            for i, (data) in enumerate(pbar, start=start_iter):
                src, tgt, src_percentages, src_lengths, tgt_lengths = data

                if constant.USE_CUDA:
                    src = src.cuda()
                    tgt = tgt.cuda()

                opt.optimizer.zero_grad()

                pred, gold, hyp_seq, gold_seq = model(
                    src,
                    input_lengths=src_lengths,
                    padded_target=tgt,
                    verbose=constant.args.verbose)

                strs_gold = [
                    "".join([id2label[int(x)] for x in gold])
                    for gold in gold_seq
                ]
                strs_hyps = [
                    "".join([id2label[int(x)] for x in hyp]) for hyp in hyp_seq
                ]

                loss, num_correct = calculate_metrics(
                    pred,
                    gold,
                    smoothing=smoothing,
                    loss_type=loss_type,
                    input_lengths=src_lengths,
                    target_lengths=tgt_lengths)

                if constant.args.verbose:
                    print("GOLD", strs_gold)
                    print("HYP", strs_hyps)

                for j in range(len(strs_hyps)):
                    cer = calculate_cer(strs_hyps[j], strs_gold[j])
                    wer = calculate_wer(strs_hyps[j], strs_gold[j])
                    total_cer += cer
                    total_wer += wer
                    total_char += len(strs_gold[j])
                    total_word += len(strs_gold[j].split(" "))

                loss.backward()
                opt.optimizer.step()

                total_loss += loss.detach().item()
                non_pad_mask = gold.ne(constant.PAD_TOKEN)
                num_word = non_pad_mask.sum().item()

                pbar.set_description(
                    "(Epoch {}) TRAIN LOSS:{:.4f} CER:{:.2f}% WER:{:.2f}%".
                    format((epoch + 1), total_loss / (i + 1),
                           total_cer * 100 / total_char,
                           total_wer * 100 / total_word))
            print(
                "(Epoch {}) TRAIN LOSS:{:.4f} CER:{:.2f}% WER:{:.2f}%".format(
                    (epoch + 1), total_loss / (len(train_loader)),
                    total_cer * 100 / total_char,
                    total_wer * 100 / total_word))

            print("VALID")
            all_valid_loss = []
            for valid_task_id in range(len(valid_loaders)):
                model.eval()
                sys.out.flush()

                valid_loader = valid_loaders[valid_task_id]

                total_valid_loss, total_valid_cer, total_valid_wer, total_valid_char, total_valid_word = 0, 0, 0, 0, 0
                valid_pbar = tqdm(iter(valid_loader),
                                  leave=True,
                                  total=len(valid_loader))
                for i, (data) in enumerate(valid_pbar):
                    src, tgt, src_percentages, src_lengths, tgt_lengths = data

                    if constant.USE_CUDA:
                        src = src.cuda()
                        tgt = tgt.cuda()

                    pred, gold, hyp_seq, gold_seq = model(
                        src,
                        input_lengths=src_lengths,
                        padded_target=tgt,
                        verbose=constant.args.verbose)
                    loss, num_correct = calculate_metrics(
                        pred,
                        gold,
                        smoothing=smoothing,
                        loss_type=loss_type,
                        input_lengths=src_lengths,
                        target_lengths=tgt_lengths)

                    strs_gold = [
                        "".join([id2label[int(x)] for x in gold])
                        for gold in gold_seq
                    ]
                    strs_hyps = [
                        "".join([id2label[int(x)] for x in hyp])
                        for hyp in hyp_seq
                    ]

                    for j in range(len(strs_hyps)):
                        cer = calculate_cer(strs_hyps[j], strs_gold[j])
                        wer = calculate_wer(strs_hyps[j], strs_gold[j])
                        total_valid_cer += cer
                        total_valid_wer += wer
                        total_valid_char += len(strs_gold[j])
                        total_valid_word += len(strs_gold[j].split(" "))

                    total_valid_loss += loss.detach().item()
                    valid_pbar.set_description(
                        "(Epoch {}) TASK:{} VALID LOSS:{:.4f} CER:{:.2f}% WER:{:.2f}%"
                        .format((epoch + 1), valid_task_id,
                                total_valid_loss / (i + 1),
                                total_valid_cer * 100 / total_valid_char,
                                total_valid_wer * 100 / total_valid_word))
                all_valid_loss.append(total_valid_loss / len(valid_pbar))
                print(
                    "(Epoch {}) TASK:{} VALID LOSS:{:.4f} CER:{:.2f}% WER:{:.2f}%"
                    .format((epoch + 1), valid_task_id,
                            total_valid_loss / (len(valid_loader)),
                            total_valid_cer * 100 / total_valid_char,
                            total_valid_wer * 100 / total_valid_word))

            metrics = {}
            metrics["train_loss"] = total_loss / len(train_loader)
            metrics["valid_loss"] = np.mean(np.array(all_valid_loss))
            metrics["valid_losses"] = all_valid_loss
            metrics["train_cer"] = total_cer
            metrics["train_wer"] = total_wer
            metrics["valid_cer"] = total_valid_cer
            metrics["valid_wer"] = total_valid_wer
            metrics["history"] = history
            history.append(metrics)

            if epoch % constant.args.save_every == 0:
                save_model(model, (epoch + 1),
                           opt,
                           metrics,
                           label2id,
                           id2label,
                           best_model=False)

            # save the best model
            if best_valid_loss > total_valid_loss / len(valid_loader):
                best_valid_loss = total_valid_loss / len(valid_loader)
                save_model(model, (epoch + 1),
                           opt,
                           metrics,
                           label2id,
                           id2label,
                           best_model=True)

            if constant.args.shuffle:
                print("SHUFFLE")
                train_sampler.shuffle(epoch)
Beispiel #7
0
            trainer.writer.add_scalars('iter_loss/loss_d',
                                       {'train': loss_d.item()},
                                       trainer.num_updates)
            trainer.writer.add_scalars('iter_loss/loss_g',
                                       {'train': loss_g.item()},
                                       trainer.num_updates)

            # freed memory
            torch.cuda.empty_cache()

            i += 1

    # log train stats
    train_loss_gen /= num_batches
    train_loss_dis /= num_batches

    trainer.writer.add_scalars('epoch_loss/loss_d', {'train': train_loss_dis},
                               trainer.num_updates)
    trainer.writer.add_scalars('epoch_loss/loss_g', {'train': train_loss_gen},
                               trainer.num_updates)

    print('Epoch {}: Loss D - {:.5f}, Loss G - {:.5f}'.format(
        current_epoch, train_loss_dis, train_loss_gen))

    # save model
    save_model(trainer.gen, trainer.g_optimizer, current_epoch,
               trainer.num_updates, chkpdir, 'gen')
    save_model(trainer.dis, trainer.d_optimizer, current_epoch,
               trainer.num_updates, chkpdir, 'dis')