Ejemplo n.º 1
0
class Instructor:
    def __init__(self, model_name: str, args):
        self.model_name = model_name
        self.args = args
        self.encoder = Encoder(self.args.add_noise).to(self.args.device)
        self.decoder = Decoder(self.args.upsample_mode).to(self.args.device)
        self.pretrainDataset = None
        self.pretrainDataloader = None
        self.pretrainOptimizer = None
        self.pretrainScheduler = None
        self.RHO_tensor = None
        self.pretrain_batch_cnt = 0
        self.writer = None
        self.svmDataset = None
        self.svmDataloader = None
        self.testDataset = None
        self.testDataloader = None
        self.svm = SVC(C=self.args.svm_c,
                       kernel=self.args.svm_ker,
                       verbose=True,
                       max_iter=self.args.svm_max_iter)
        self.resnet = Resnet(use_pretrained=True,
                             num_classes=self.args.classes,
                             resnet_depth=self.args.resnet_depth,
                             dropout=self.args.resnet_dropout).to(
                                 self.args.device)
        self.resnetOptimizer = None
        self.resnetScheduler = None
        self.resnetLossFn = None

    def _load_data_by_label(self, label: str) -> list:
        ret = []
        LABEL_PATH = os.path.join(self.args.TRAIN_PATH, label)
        for dir_path, _, file_list in os.walk(LABEL_PATH, topdown=False):
            for file_name in file_list:
                file_path = os.path.join(dir_path, file_name)
                img_np = imread(file_path)
                img = img_np.copy()
                img = img.tolist()
                ret.append(img)
        return ret

    def _load_all_data(self):
        all_data = []
        all_labels = []
        for label_id in range(0, self.args.classes):
            expression = LabelEnum(label_id)
            sub_data = self._load_data_by_label(expression.name)
            sub_labels = [label_id] * len(sub_data)
            all_data.extend(sub_data)
            all_labels.extend(sub_labels)
        return all_data, all_labels

    def _load_test_data(self):
        file_map = pd.read_csv(
            os.path.join(self.args.RAW_PATH, 'submission.csv'))
        test_data = []
        img_names = []
        for file_name in file_map['file_name']:
            file_path = os.path.join(self.args.TEST_PATH, file_name)
            img_np = imread(file_path)
            img = img_np.copy()
            img = img.tolist()
            test_data.append(img)
            img_names.append(file_name)
        return test_data, img_names

    def trainAutoEncoder(self):
        self.writer = SummaryWriter(
            os.path.join(self.args.LOG_PATH, self.model_name))
        all_data, all_labels = self._load_all_data()
        self.pretrainDataset = FERDataset(all_data,
                                          labels=all_labels,
                                          args=self.args)
        self.pretrainDataloader = DataLoader(dataset=self.pretrainDataset,
                                             batch_size=self.args.batch_size,
                                             shuffle=True,
                                             num_workers=self.args.num_workers)
        self.pretrainOptimizer = torch.optim.Adam([{
            'params':
            self.encoder.parameters(),
            'lr':
            self.args.pretrain_lr
        }, {
            'params':
            self.decoder.parameters(),
            'lr':
            self.args.pretrain_lr
        }])
        tot_steps = math.ceil(
            len(self.pretrainDataloader) /
            self.args.cumul_batch) * self.args.epochs
        self.pretrainScheduler = get_linear_schedule_with_warmup(
            self.pretrainOptimizer,
            num_warmup_steps=0,
            num_training_steps=tot_steps)
        self.RHO_tensor = torch.tensor(
            [self.args.rho for _ in range(self.args.embed_dim)],
            dtype=torch.float).unsqueeze(0).to(self.args.device)
        epochs = self.args.epochs
        for epoch in range(1, epochs + 1):
            print()
            print(
                "================ AutoEncoder Training Epoch {:}/{:} ================"
                .format(epoch, epochs))
            print(" ---- Start training ------>")
            self.epochTrainAutoEncoder(epoch)
            print()
        self.writer.close()

    def epochTrainAutoEncoder(self, epoch):
        self.encoder.train()
        self.decoder.train()

        cumul_loss = 0
        cumul_steps = 0
        cumul_samples = 0

        self.pretrainOptimizer.zero_grad()
        cumulative_batch = 0

        for idx, (images, labels) in enumerate(tqdm(self.pretrainDataloader)):
            batch_size = images.shape[0]
            images, labels = images.to(self.args.device), labels.to(
                self.args.device)

            embeds = self.encoder(images)
            outputs = self.decoder(embeds)

            loss = torch.nn.functional.mse_loss(outputs, images)
            if self.args.use_sparse:
                rho_hat = torch.mean(embeds, dim=0, keepdim=True)
                sparse_penalty = self.args.regulizer_weight * torch.nn.functional.kl_div(
                    input=torch.nn.functional.log_softmax(rho_hat, dim=-1),
                    target=torch.nn.functional.softmax(self.RHO_tensor,
                                                       dim=-1))
                loss = loss + sparse_penalty

            loss_each = loss / self.args.cumul_batch
            loss_each.backward()

            cumulative_batch += 1
            cumul_steps += 1
            cumul_loss += loss.detach().cpu().item() * batch_size
            cumul_samples += batch_size

            if cumulative_batch >= self.args.cumul_batch:
                torch.nn.utils.clip_grad_norm_(self.encoder.parameters(),
                                               max_norm=self.args.max_norm)
                torch.nn.utils.clip_grad_norm_(self.decoder.parameters(),
                                               max_norm=self.args.max_norm)
                self.pretrainOptimizer.step()
                self.pretrainScheduler.step()
                self.pretrainOptimizer.zero_grad()
                cumulative_batch = 0

            if cumul_steps >= self.args.disp_period or idx + 1 == len(
                    self.pretrainDataloader):
                print(" -> cumul_steps={:} loss={:}".format(
                    cumul_steps, cumul_loss / cumul_samples))
                self.pretrain_batch_cnt += 1
                self.writer.add_scalar('batch-loss',
                                       cumul_loss / cumul_samples,
                                       global_step=self.pretrain_batch_cnt)
                self.writer.add_scalar('encoder_lr',
                                       self.pretrainOptimizer.state_dict()
                                       ['param_groups'][0]['lr'],
                                       global_step=self.pretrain_batch_cnt)
                self.writer.add_scalar('decoder_lr',
                                       self.pretrainOptimizer.state_dict()
                                       ['param_groups'][1]['lr'],
                                       global_step=self.pretrain_batch_cnt)
                cumul_steps = 0
                cumul_loss = 0
                cumul_samples = 0

        self.saveAutoEncoder(epoch)

    def saveAutoEncoder(self, epoch):
        encoderPath = os.path.join(
            self.args.CKPT_PATH,
            self.model_name + "--Encoder" + "--EPOCH-{:}".format(epoch))
        decoderPath = os.path.join(
            self.args.CKPT_PATH,
            self.model_name + "--Decoder" + "--EPOCH-{:}".format(epoch))
        print("-----------------------------------------------")
        print("  -> Saving AutoEncoder {:} ......".format(self.model_name))
        torch.save(self.encoder.state_dict(), encoderPath)
        torch.save(self.decoder.state_dict(), decoderPath)
        print("  -> Successfully saved AutoEncoder.")
        print("-----------------------------------------------")

    def generateAutoEncoderTestResultSamples(self, sample_cnt):
        self.encoder.eval()
        self.decoder.eval()
        print('  -> Generating samples with AutoEncoder ...')
        save_path = os.path.join(self.args.SAMPLE_PATH, self.model_name)
        if not os.path.exists(save_path):
            os.mkdir(save_path)
        with torch.no_grad():
            for dir_path, _, file_list in os.walk(self.args.TEST_PATH,
                                                  topdown=False):
                sample_file_list = random.choices(file_list, k=sample_cnt)
                for file_name in sample_file_list:
                    file_path = os.path.join(dir_path, file_name)
                    img_np = imread(file_path)
                    img = img_np.copy()
                    img = ToTensor()(img)
                    img = img.reshape(1, 1, 48, 48)
                    img = img.to(self.args.device)
                    embed = self.encoder(img)
                    out = self.decoder(embed).cpu()
                    out = out.reshape(1, 48, 48)
                    out_img = ToPILImage()(out)
                    out_img.save(os.path.join(save_path, file_name))
        print('  -> Done sampling from AutoEncoder with test pictures.')

    def loadAutoEncoder(self, epoch):
        encoderPath = os.path.join(
            self.args.CKPT_PATH,
            self.model_name + "--Encoder" + "--EPOCH-{:}".format(epoch))
        decoderPath = os.path.join(
            self.args.CKPT_PATH,
            self.model_name + "--Decoder" + "--EPOCH-{:}".format(epoch))
        print("-----------------------------------------------")
        print("  -> Loading AutoEncoder {:} ......".format(self.model_name))
        self.encoder.load_state_dict(torch.load(encoderPath))
        self.decoder.load_state_dict(torch.load(decoderPath))
        print("  -> Successfully loaded AutoEncoder.")
        print("-----------------------------------------------")

    def generateExtractedFeatures(
            self, data: torch.FloatTensor) -> torch.FloatTensor:
        """
        :param data: (batch, channel, l, w)
        :return: embed: (batch, embed_dim)
        """
        with torch.no_grad():
            data = data.to(self.args.device)
            embed = self.encoder(data)
            embed = embed.detach().cpu()
            return embed

    def trainSVM(self, load: bool):
        svm_path = os.path.join(self.args.CKPT_PATH, self.model_name + '--svm')
        self.loadAutoEncoder(self.args.epochs)
        self.encoder.eval()
        self.decoder.eval()
        if load:
            print('  -> Loaded from SVM trained model.')
            self.svm = joblib.load(svm_path)
            return
        print()
        print("================ SVM Training Starting ================")
        all_data, all_labels = self._load_all_data()
        all_length = len(all_data)
        self.svmDataset = FERDataset(all_data,
                                     labels=all_labels,
                                     use_da=False,
                                     args=self.args)
        self.svmDataloader = DataLoader(dataset=self.svmDataset,
                                        batch_size=self.args.batch_size,
                                        shuffle=False,
                                        num_workers=self.args.num_workers)
        print("  -> Converting to extracted features ...")
        cnt = 0
        all_embeds = []
        all_labels = []
        for images, labels in self.svmDataloader:
            cnt += 1
            embeds = self.generateExtractedFeatures(images)
            all_embeds.extend(embeds.tolist())
            all_labels.extend(labels.reshape(-1).tolist())
        print('  -> Start SVM fit ...')
        self.svm.fit(X=all_embeds, y=all_labels)
        # self.svm.fit(X=all_embeds[0:3], y=[0, 1, 2])
        joblib.dump(self.svm, svm_path)
        print("  -> Done training for SVM.")

    def genTestResult(self, from_svm=True):
        print()
        print('-------------------------------------------------------')
        print('  -> Generating test result for {:} ...'.format(
            'SVM' if from_svm else 'Resnet'))
        test_data, img_names = self._load_test_data()
        test_length = len(test_data)
        self.testDataset = FERDataset(test_data,
                                      filenames=img_names,
                                      use_da=False,
                                      args=self.args)
        self.testDataloader = DataLoader(dataset=self.testDataset,
                                         batch_size=self.args.batch_size,
                                         shuffle=False,
                                         num_workers=self.args.num_workers)
        str_preds = []
        for images, filenames in self.testDataloader:
            if from_svm:
                embeds = self.generateExtractedFeatures(images)
                preds = self.svm.predict(X=embeds)
            else:
                self.resnet.eval()
                outs = self.resnet(
                    images.repeat(1, 3, 1, 1).to(self.args.device))
                preds = outs.max(-1)[1].cpu().tolist()
            str_preds.extend([LabelEnum(pred).name for pred in preds])
        # generate submission
        assert len(str_preds) == len(img_names)
        submission = pd.DataFrame({'file_name': img_names, 'class': str_preds})
        submission.to_csv(os.path.join(self.args.DATA_PATH, 'submission.csv'),
                          index=False,
                          index_label=False)
        print('  -> Done generation of submission.csv with model {:}'.format(
            self.model_name))

    def epochTrainResnet(self, epoch):
        self.resnet.train()

        cumul_loss = 0
        cumul_acc = 0
        cumul_steps = 0
        cumul_samples = 0
        cumulative_batch = 0

        self.resnetOptimizer.zero_grad()

        for idx, (images, labels) in enumerate(tqdm(self.pretrainDataloader)):
            batch_size = images.shape[0]
            images, labels = images.to(self.args.device), labels.to(
                self.args.device)
            images += torch.randn(images.shape).to(
                images.device) * self.args.add_noise
            images = images.repeat(1, 3, 1, 1)

            outs = self.resnet(images)
            preds = outs.max(-1)[1].unsqueeze(dim=1)
            cur_acc = (preds == labels).type(torch.int).sum().item()

            loss = self.resnetLossFn(outs, labels.squeeze(dim=1))

            loss_each = loss / self.args.cumul_batch
            loss_each.backward()

            cumulative_batch += 1
            cumul_steps += 1
            cumul_loss += loss.detach().cpu().item() * batch_size
            cumul_acc += cur_acc
            cumul_samples += batch_size

            if cumulative_batch >= self.args.cumul_batch:
                torch.nn.utils.clip_grad_norm_(self.resnet.parameters(),
                                               max_norm=self.args.max_norm)
                self.resnetOptimizer.step()
                self.resnetScheduler.step()
                self.resnetOptimizer.zero_grad()
                cumulative_batch = 0

            if cumul_steps >= self.args.disp_period or idx + 1 == len(
                    self.pretrainDataloader):
                print(" -> cumul_steps={:} loss={:} acc={:}".format(
                    cumul_steps, cumul_loss / cumul_samples,
                    cumul_acc / cumul_samples))
                self.pretrain_batch_cnt += 1
                self.writer.add_scalar('batch-loss',
                                       cumul_loss / cumul_samples,
                                       global_step=self.pretrain_batch_cnt)
                self.writer.add_scalar('batch-acc',
                                       cumul_acc / cumul_samples,
                                       global_step=self.pretrain_batch_cnt)
                self.writer.add_scalar(
                    'resnet_lr',
                    self.resnetOptimizer.state_dict()['param_groups'][0]['lr'],
                    global_step=self.pretrain_batch_cnt)
                cumul_steps = 0
                cumul_loss = 0
                cumul_acc = 0
                cumul_samples = 0

        if epoch % 10 == 0:
            self.saveResnet(epoch)

    def saveResnet(self, epoch):
        resnetPath = os.path.join(
            self.args.CKPT_PATH,
            self.model_name + "--Resnet" + "--EPOCH-{:}".format(epoch))
        print("-----------------------------------------------")
        print("  -> Saving Resnet {:} ......".format(self.model_name))
        torch.save(self.resnet.state_dict(), resnetPath)
        print("  -> Successfully saved Resnet.")
        print("-----------------------------------------------")

    def loadResnet(self, epoch):
        resnetPath = os.path.join(
            self.args.CKPT_PATH,
            self.model_name + "--Resnet" + "--EPOCH-{:}".format(epoch))
        print("-----------------------------------------------")
        print("  -> Loading Resnet {:} ......".format(self.model_name))
        self.resnet.load_state_dict(torch.load(resnetPath))
        print("  -> Successfully loaded Resnet.")
        print("-----------------------------------------------")

    def trainResnet(self):
        self.writer = SummaryWriter(
            os.path.join(self.args.LOG_PATH, self.model_name))
        all_data, all_labels = self._load_all_data()
        self.pretrainDataset = FERDataset(all_data,
                                          labels=all_labels,
                                          args=self.args)
        self.pretrainDataloader = DataLoader(dataset=self.pretrainDataset,
                                             batch_size=self.args.batch_size,
                                             shuffle=True,
                                             num_workers=self.args.num_workers)
        self.resnetOptimizer = self.getResnetOptimizer()
        tot_steps = math.ceil(
            len(self.pretrainDataloader) /
            self.args.cumul_batch) * self.args.epochs
        self.resnetScheduler = get_linear_schedule_with_warmup(
            self.resnetOptimizer,
            num_warmup_steps=tot_steps * self.args.warmup_rate,
            num_training_steps=tot_steps)
        self.resnetLossFn = torch.nn.CrossEntropyLoss(
            weight=torch.tensor([
                9.40661861,
                1.00104606,
                0.56843877,
                0.84912748,
                1.02660468,
                1.29337298,
                0.82603942,
            ],
                                dtype=torch.float,
                                device=self.args.device))
        epochs = self.args.epochs
        for epoch in range(1, epochs + 1):
            print()
            print(
                "================ Resnet Training Epoch {:}/{:} ================"
                .format(epoch, epochs))
            print(" ---- Start training ------>")
            self.epochTrainResnet(epoch)
            print()
        self.writer.close()

    def getResnetOptimizer(self):
        if self.args.resnet_optim == 'SGD':
            return torch.optim.SGD([{
                'params': self.resnet.baseParameters(),
                'lr': self.args.resnet_base_lr,
                'weight_decay': self.args.weight_decay,
                'momentum': self.args.resnet_momentum
            }, {
                'params': self.resnet.finetuneParameters(),
                'lr': self.args.resnet_ft_lr,
                'weight_decay': self.args.weight_decay,
                'momentum': self.args.resnet_momentum
            }],
                                   lr=self.args.resnet_base_lr)
        elif self.args.resnet_optim == 'Adam':
            return torch.optim.Adam([{
                'params': self.resnet.baseParameters(),
                'lr': self.args.resnet_base_lr
            }, {
                'params': self.resnet.finetuneParameters(),
                'lr': self.args.resnet_ft_lr,
                'weight_decay': self.args.weight_decay
            }])
Ejemplo n.º 2
0
# Mode
print('==> Building model..')
encoder = Encoder(mask=mask)
decoder = Decoder(mask=mask)
classifier = Classifier()
encoder = encoder.to(device)
decoder = decoder.to(device)
classifier = classifier.to(device)
if device == 'cuda':
    cudnn.benchmark = True
if args.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/ckpt_' + codir + '.t7')
    encoder.load_state_dict(checkpoint['encoder'])
    decoder.load_state_dict(checkpoint['decoder'])
    classifier.load_state_dict(checkpoint['classifier'])
    best_loss = checkpoint['loss']
    start_epoch = checkpoint['epoch']

# # in the cloud
# for param in encoder.parameters():
#     param.requires_grad = False

criterion1 = nn.MSELoss()
criterion2 = nn.CrossEntropyLoss()
optimizer1 = optim.SGD(encoder.parameters(),
                       lr=args.lr,
                       momentum=0.9,
                       weight_decay=9e-4)
Ejemplo n.º 3
0
class Model(nn.Module):
    def __init__(self, config):
        super(Model, self).__init__()
        self.config = config

        # 定义嵌入层
        self.embedding = Embedding(config.num_vocab,  # 词汇表大小
                                   config.embedding_size,  # 嵌入层维度
                                   config.pad_id,  # pad_id
                                   config.dropout)

        # post编码器
        self.post_encoder = Encoder(config.post_encoder_cell_type,  # rnn类型
                                    config.embedding_size,  # 输入维度
                                    config.post_encoder_output_size,  # 输出维度
                                    config.post_encoder_num_layers,  # rnn层数
                                    config.post_encoder_bidirectional,  # 是否双向
                                    config.dropout)  # dropout概率

        # response编码器
        self.response_encoder = Encoder(config.response_encoder_cell_type,
                                        config.embedding_size,  # 输入维度
                                        config.response_encoder_output_size,  # 输出维度
                                        config.response_encoder_num_layers,  # rnn层数
                                        config.response_encoder_bidirectional,  # 是否双向
                                        config.dropout)  # dropout概率

        # 先验网络
        self.prior_net = PriorNet(config.post_encoder_output_size,  # post输入维度
                                  config.latent_size,  # 潜变量维度
                                  config.dims_prior)  # 隐藏层维度

        # 识别网络
        self.recognize_net = RecognizeNet(config.post_encoder_output_size,  # post输入维度
                                          config.response_encoder_output_size,  # response输入维度
                                          config.latent_size,  # 潜变量维度
                                          config.dims_recognize)  # 隐藏层维度

        # 初始化解码器状态
        self.prepare_state = PrepareState(config.post_encoder_output_size+config.latent_size,
                                          config.decoder_cell_type,
                                          config.decoder_output_size,
                                          config.decoder_num_layers)

        # 解码器
        self.decoder = Decoder(config.decoder_cell_type,  # rnn类型
                               config.embedding_size,  # 输入维度
                               config.decoder_output_size,  # 输出维度
                               config.decoder_num_layers,  # rnn层数
                               config.dropout)  # dropout概率

        # 输出层
        self.projector = nn.Sequential(
            nn.Linear(config.decoder_output_size, config.num_vocab),
            nn.Softmax(-1)
        )

    def forward(self, inputs, inference=False, max_len=60, gpu=True):
        if not inference:  # 训练
            id_posts = inputs['posts']  # [batch, seq]
            len_posts = inputs['len_posts']  # [batch]
            id_responses = inputs['responses']  # [batch, seq]
            len_responses = inputs['len_responses']  # [batch, seq]
            sampled_latents = inputs['sampled_latents']  # [batch, latent_size]
            len_decoder = id_responses.size(1) - 1

            embed_posts = self.embedding(id_posts)  # [batch, seq, embed_size]
            embed_responses = self.embedding(id_responses)  # [batch, seq, embed_size]
            # state: [layers, batch, dim]
            _, state_posts = self.post_encoder(embed_posts.transpose(0, 1), len_posts)
            _, state_responses = self.response_encoder(embed_responses.transpose(0, 1), len_responses)
            if isinstance(state_posts, tuple):
                state_posts = state_posts[0]
            if isinstance(state_responses, tuple):
                state_responses = state_responses[0]
            x = state_posts[-1, :, :]  # [batch, dim]
            y = state_responses[-1, :, :]  # [batch, dim]

            # p(z|x)
            _mu, _logvar = self.prior_net(x)  # [batch, latent]
            # p(z|x,y)
            mu, logvar = self.recognize_net(x, y)  # [batch, latent]
            # 重参数化
            z = mu + (0.5 * logvar).exp() * sampled_latents  # [batch, latent]

            # 解码器的输入为回复去掉end_id
            decoder_inputs = embed_responses[:, :-1, :].transpose(0, 1)  # [seq-1, batch, embed_size]
            decoder_inputs = decoder_inputs.split([1] * len_decoder, 0)  # 解码器每一步的输入 seq-1个[1, batch, embed_size]
            first_state = self.prepare_state(torch.cat([z, x], 1))  # [num_layer, batch, dim_out]

            outputs = []
            for idx in range(len_decoder):
                if idx == 0:
                    state = first_state  # 解码器初始状态
                decoder_input = decoder_inputs[idx]  # 当前时间步输入 [1, batch, embed_size]
                # output: [1, batch, dim_out]
                # state: [num_layer, batch, dim_out]
                output, state = self.decoder(decoder_input, state)
                assert output.squeeze().equal(state[0][-1])
                outputs.append(output)

            outputs = torch.cat(outputs, 0).transpose(0, 1)  # [batch, seq-1, dim_out]
            output_vocab = self.projector(outputs)  # [batch, seq-1, num_vocab]

            return output_vocab, _mu, _logvar, mu, logvar
        else:  # 测试
            id_posts = inputs['posts']  # [batch, seq]
            len_posts = inputs['len_posts']  # [batch]
            sampled_latents = inputs['sampled_latents']  # [batch, latent_size]
            batch_size = id_posts.size(0)

            embed_posts = self.embedding(id_posts)  # [batch, seq, embed_size]
            # state = [layers, batch, dim]
            _, state_posts = self.post_encoder(embed_posts.transpose(0, 1), len_posts)
            if isinstance(state_posts, tuple):  # 如果是lstm则取h
                state_posts = state_posts[0]  # [layers, batch, dim]
            x = state_posts[-1, :, :]  # 取最后一层 [batch, dim]

            # p(z|x)
            _mu, _logvar = self.prior_net(x)  # [batch, latent]
            # 重参数化
            z = _mu + (0.5 * _logvar).exp() * sampled_latents  # [batch, latent]

            first_state = self.prepare_state(torch.cat([z, x], 1))  # [num_layer, batch, dim_out]
            done = torch.tensor([0] * batch_size).bool()
            first_input_id = (torch.ones((1, batch_size)) * self.config.start_id).long()
            if gpu:
                done = done.cuda()
                first_input_id = first_input_id.cuda()

            outputs = []
            for idx in range(max_len):
                if idx == 0:  # 第一个时间步
                    state = first_state  # 解码器初始状态
                    decoder_input = self.embedding(first_input_id)  # 解码器初始输入 [1, batch, embed_size]
                else:
                    decoder_input = self.embedding(next_input_id)  # [1, batch, embed_size]
                # output: [1, batch, dim_out]
                # state: [num_layers, batch, dim_out]
                output, state = self.decoder(decoder_input, state)
                outputs.append(output)

                vocab_prob = self.projector(output)  # [1, batch, num_vocab]
                next_input_id = torch.argmax(vocab_prob, 2)  # 选择概率最大的词作为下个时间步的输入 [1, batch]

                _done = next_input_id.squeeze(0) == self.config.end_id  # 当前时间步完成解码的 [batch]
                done = done | _done  # 所有完成解码的
                if done.sum() == batch_size:  # 如果全部解码完成则提前停止
                    break

            outputs = torch.cat(outputs, 0).transpose(0, 1)  # [batch, seq, dim_out]
            output_vocab = self.projector(outputs)  # [batch, seq, num_vocab]

            return output_vocab, _mu, _logvar, None, None

    def print_parameters(self):
        r""" 统计参数 """
        total_num = 0  # 参数总数
        for param in self.parameters():
            num = 1
            if param.requires_grad:
                size = param.size()
                for dim in size:
                    num *= dim
            total_num += num
        print(f"参数总数: {total_num}")

    def save_model(self, epoch, global_step, path):
        r""" 保存模型 """
        torch.save({'embedding': self.embedding.state_dict(),
                    'post_encoder': self.post_encoder.state_dict(),
                    'response_encoder': self.response_encoder.state_dict(),
                    'prior_net': self.prior_net.state_dict(),
                    'recognize_net': self.recognize_net.state_dict(),
                    'prepare_state': self.prepare_state.state_dict(),
                    'decoder': self.decoder.state_dict(),
                    'projector': self.projector.state_dict(),
                    'epoch': epoch,
                    'global_step': global_step}, path)

    def load_model(self, path):
        r""" 载入模型 """
        checkpoint = torch.load(path, map_location=torch.device('cpu'))
        self.embedding.load_state_dict(checkpoint['embedding'])
        self.post_encoder.load_state_dict(checkpoint['post_encoder'])
        self.response_encoder.load_state_dict(checkpoint['response_encoder'])
        self.prior_net.load_state_dict(checkpoint['prior_net'])
        self.recognize_net.load_state_dict(checkpoint['recognize_net'])
        self.prepare_state.load_state_dict(checkpoint['prepare_state'])
        self.decoder.load_state_dict(checkpoint['decoder'])
        self.projector.load_state_dict(checkpoint['projector'])
        epoch = checkpoint['epoch']
        global_step = checkpoint['global_step']
        return epoch, global_step