Ejemplo n.º 1
0
    def __init__(self, src_DAMSM_CNN, src_DAMSM_RNN, tgt_DAMSM_RNN, netG, netsD,
                 netG_optimizer, netD_optimizers, train_loader, scaler, opt):
        self.src_DAMSM_CNN = src_DAMSM_CNN
        self.src_DAMSM_RNN = src_DAMSM_RNN
        self.tgt_DAMSM_RNN = tgt_DAMSM_RNN
        self.netG = netG
        self.netsD = netsD
        self.netG_optimizer = netG_optimizer
        self.netD_optimizers = netD_optimizers
        self.train_loader = train_loader
        self.scaler = scaler
        self.opt = opt

        self.save_model_dir = opt.save_model_dir
        self.save_image_dir = opt.save_image_dir
        self.stage_num = opt.stage_num

        self.device = get_device(self.netG)
        self.avg_param_G = copy_params(self.netG)
        self.real_labels = torch.FloatTensor(opt.batch_size).fill_(1).to(self.device)
        self.fake_labels = torch.FloatTensor(opt.batch_size).fill_(0).to(self.device)
        self.match_labels = torch.arange(opt.batch_size).to(self.device)
        self.noise = torch.FloatTensor(opt.batch_size, opt.noise_dim).to(self.device)
        self.fixed_noise = torch.FloatTensor(opt.batch_size, opt.noise_dim).normal_(0, 1).to(self.device)
        self.src_word_embs, self.src_sent_emb, self.src_mask, self.tgt_word_embs, self.tgt_sent_emb, self.tgt_mask = self.get_fixed_embs()

        self.logger = get_logger(opt.save_log_path, overwrite=opt.overwrite)
        self.logger.info(args2string(opt))
 def _get_cp_avg_bleu(self, use_beam=False, return_sentences=False):
     back_up_params = copy_params(self.model)
     avg_params = self._cp_avg()
     load_params(self.model, avg_params)
     pred_words = self.generator.generate_loader(self.data_loader, use_beam)
     state_dict = get_state_dict(self.model)
     load_params(self.model, back_up_params)
     return pred_words, state_dict
Ejemplo n.º 3
0
 def save_fixed_images(self, epoch_i):
     backup_para = copy_params(self.netG)
     load_params(self.netG, self.avg_param_G)
     fixed_imgs, _, _, _ = self.netG(
         self.fixed_noise, self.src_word_embs, self.src_sent_emb, self.src_mask,
         self.tgt_word_embs, self.tgt_sent_emb, self.tgt_mask,
     )
     filename = f"{self.save_image_dir}/{epoch_i}.png"
     save_image(fixed_imgs[-1], filename) 
     load_params(self.netG, backup_para)
    def _cp_avg(self):
        queue_num = len(self.queue)
        assert queue_num <= self.cp_avg_num

        params = copy_params(self.model, to_cpu=True)
        self.queue.append(params)
        self.sum_params = [sum_p + p for sum_p, p in zip(self.sum_params, params)]

        if queue_num < self.cp_avg_num:
            avg_params = [sum_p / (queue_num + 1) for sum_p in self.sum_params]
        else:
            old_params = self.queue.popleft()
            self.sum_params = [sum_p - old_p for sum_p, old_p in zip(self.sum_params, old_params)]
            avg_params = [sum_p / self.cp_avg_num for sum_p in self.sum_params]

        return avg_params
Ejemplo n.º 5
0
 def save_model(self, cnt, model_name):
     backup_para = copy_params(self.netG)
     load_params(self.netG, self.avg_param_G)
     save_dict = {
         "cnt": cnt,
         "netG": get_state_dict(self.netG),
         "optimG": self.netG_optimizer.state_dict(),
         "scaler": self.scaler.state_dict(),
         "settings": self.opt,
     }
     for i in range(self.stage_num):
         netD_name = "netD_" + str(64 * 2**i)
         optimD_name = "optimD_" + str(64 * 2**i)
         save_dict[netD_name] = get_state_dict(self.netsD[i])
         save_dict[optimD_name] = self.netD_optimizers[i].state_dict()
     torch.save(save_dict, f"{self.save_model_dir}/{model_name}")
     load_params(self.netG, backup_para)
    def __init__(self, model, data_loader, references=None, bpe=None,
                 cp_avg_num=1, beam_size=4, len_penalty=1.0):
        assert cp_avg_num >= 1
        self.model = model
        self.data_loader = data_loader
        self.references = references
        self.cp_avg_num = cp_avg_num
        if cp_avg_num > 1:
            self.queue = deque()
            params = copy_params(self.model, to_cpu=True)
            self.sum_params = [torch.zeros(p.shape) for p in params]

        self.generator = SentenceGenerator(
            model=model,
            tgt_index2word=data_loader.dataset.tgt_index2word,
            bpe=bpe,
            beam_size=beam_size,
            len_penalty=len_penalty
        )
Ejemplo n.º 7
0
    def train(self, start_cnt):
        self.device = get_device(self.MNMT)

        start_all = time()
        for epoch_cnt in range(start_cnt, self.opt.max_epoch + 1):
            self.logger.info(f"\n[ Epoch {epoch_cnt} ]")

            # --- train MNMT ---
            start_span = time()
            backup_para = copy_params(self.netG)

            load_params(self.netG, self.avg_param_G)
            logs = self.MNMT_train_epoch()
            time_span = (time() - start_span) / 60
            self.logger.info(f"{logs}, time : {time_span:.2f} min")

            # --- valid MNMT ---
            if self.validator is not None:
                state_dict = self.validation(epoch_cnt)
            else:
                state_dict = get_state_dict(self.MNMT)
            if self.stop_cnt == self.opt.early_stop:
                break
            self.save_models(epoch_cnt, state_dict, f"epoch_{epoch_cnt}.pth")

            # --- train T2I ---
            start_span = time()
            load_params(self.netG, backup_para)
            for _ in range(self.opt.T2I_per_MNMT):
                D_logs, G_logs = self.T2I_train_epoch()
            time_span = (time() - start_span) / 60
            self.logger.info(f"{D_logs}\n{G_logs}\ntime : {time_span:.2f} min")

        time_all = (time() - start_all) / 3600
        self.logger.info(
            f"\nbest_epoch : {self.best_cnt}, best_score : {self.best_bleu_score}, time : {time_all:.2f} h"
        )
Ejemplo n.º 8
0
    def __init__(self,
                 MNMT,
                 src_DAMSM_CNN,
                 src_DAMSM_RNN,
                 tgt_DAMSM_RNN,
                 netG,
                 netsD,
                 MNMT_optimizer,
                 netG_optimizer,
                 netD_optimizers,
                 DAMSM_optimizer,
                 MNMT_loader,
                 T2I_loader,
                 scaler,
                 scheduler,
                 opt,
                 validator=None):
        self.MNMT = MNMT
        self.src_DAMSM_CNN = src_DAMSM_CNN
        self.src_DAMSM_RNN = src_DAMSM_RNN
        self.tgt_DAMSM_RNN = tgt_DAMSM_RNN
        self.netG = netG
        self.netsD = netsD
        self.MNMT_optimizer = MNMT_optimizer
        self.netG_optimizer = netG_optimizer
        self.netD_optimizers = netD_optimizers
        self.DAMSM_optimizer = DAMSM_optimizer
        self.MNMT_loader = MNMT_loader
        self.T2I_loader = T2I_loader
        self.scaler = scaler
        self.scheduler = scheduler
        self.opt = opt
        self.validator = validator

        self.save_model_dir = opt.save_model_dir
        self.save_image_dir = opt.save_image_dir
        self.stage_num = opt.stage_num

        self.device = get_device(self.netG)
        self.avg_param_G = copy_params(self.netG)
        self.real_labels = torch.FloatTensor(opt.T2I_batch_size).fill_(1).to(
            self.device)
        self.fake_labels = torch.FloatTensor(opt.T2I_batch_size).fill_(0).to(
            self.device)
        self.match_labels = torch.LongTensor(range(opt.T2I_batch_size)).to(
            self.device)
        self.T2I_noise = torch.FloatTensor(opt.T2I_batch_size,
                                           opt.noise_dim).to(self.device)
        self.MNMT_noise = torch.FloatTensor(opt.MNMT_batch_size,
                                            opt.noise_dim).to(self.device)
        self.model_connector = ModelConnector(
            opt.T2I_batch_size,
            opt.train_words_limit,
            MNMT_id2word=MNMT_loader.dataset.tgt_index2word,
            T2I_word2id=T2I_loader.dataset.tgt_word2index,
            bpe=opt.bpe,
        )

        self.logger = get_logger(opt.save_log_path, overwrite=opt.overwrite)
        self.logger.info(args2string(opt))

        self.best_bleu_score = 0.
        self.best_cnt = 0
        self.stop_cnt = 0