Ejemplo n.º 1
0
def valid_process(global_t, model, valid_loader, valid_config, unlabeled_epoch,
                  tb_writer, logger, cur_best_score):
    valid_loader.epoch_init(valid_config.batch_size, shuffle=False)
    model.eval()
    loss_records = {}
    while True:
        batch = valid_loader.next_sentiment_batch()
        if batch is None:  # end of epoch
            break
        title, context, target, target_lens, sentiment_label = batch
        title, context, target, target_lens, sentiment_label = \
            to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens), to_tensor(
                sentiment_label)
        valid_loss = model.valid_AE(global_t, title, context, target,
                                    target_lens, sentiment_label)
        for loss_name, loss_value in valid_loss:
            v = loss_records.get(loss_name, [])
            if loss_name == 'min_valid_loss' and loss_value < cur_best_score[
                    'min_valid_loss']:
                cur_best_score['min_valid_loss'] = loss_value
                cur_best_score['min_epoch'] = unlabeled_epoch
                cur_best_score['min_step'] = global_t

            v.append(loss_value)
            loss_records[loss_name] = v

    log = ""
    for loss_name, loss_values in loss_records.items():
        log = log + loss_name + ':%.4f  ' % (np.mean(loss_values))
        if args.visual:
            tb_writer.add_scalar(loss_name, np.mean(loss_values), global_t)

    logger.info(log)
Ejemplo n.º 2
0
    def align(self, valid_loader):
        self.seq_encoder.eval()
        self.decoder.eval()
        choice_statistic = [0.0 for _ in range(self.n_components)]
        while True:
            # batch是一个以情感为key的dict
            batch = valid_loader.next_sentiment_batch()
            if batch is None:
                break

            title, context, target, target_lens, sentiments = batch
            title, context, target, target_lens = \
                to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens)

            title_last_hidden, _ = self.seq_encoder(title)
            context_last_hidden, _ = self.seq_encoder(context)
            c = torch.cat((title_last_hidden, context_last_hidden),
                          1)  # (batch, 2 * hidden_size * 2)
            current_statistic = self.sample_code_prior_sentiment(c, True)
            choice_statistic = [
                choice_statistic[i] + current_statistic[i]
                for i in range(self.n_components)
            ]

        print("%s distribution: %s" %
              (valid_loader.name, str(choice_statistic)[1:-1]))
Ejemplo n.º 3
0
def train_process(global_t, model, train_loader):
    model.train()
    loss_records = []

    finish_train = False
    sent_names = ['pos', 'neu', 'neg']
    for sent_name in sent_names:
        batch = train_loader[sent_name].next_sentiment_batch()
        if batch is None:  # end of epoch
            finish_train = True
            return model, finish_train, None, global_t

        title, context, target, target_lens = batch
        title, context, target, target_lens = \
            to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens)

        loss_AE, global_t = model.train_AE(global_t,
                                           title,
                                           context,
                                           target,
                                           target_lens,
                                           sent_name=sent_name)
        loss_records.extend(loss_AE)

    return model, finish_train, loss_records, global_t
Ejemplo n.º 4
0
def pre_valid_process(global_t, model, valid_loader, valid_config, tb_writer,
                      logger):
    sent_names = ['pos', 'neu', 'neg']
    for sent_name in sent_names:
        valid_loader[sent_name].epoch_init(valid_config.batch_size,
                                           shuffle=False)
        model.eval()
        loss_records = {}

        while True:
            batch = valid_loader[sent_name].next_batch()
            if batch is None:  # end of epoch
                break

            target, target_lens = batch
            target, target_lens = to_tensor(target), to_tensor(target_lens)
            valid_loss, global_t = model.valid_VAE(global_t,
                                                   target,
                                                   target_lens,
                                                   sent_name=sent_name)
            for loss_name, loss_value in valid_loss:
                v = loss_records.get(loss_name, [])
                v.append(loss_value)
                loss_records[loss_name] = v

        log = 'Valid: iter {} sentiment {} Validation\n'.format(
            global_t, sent_name)
        for loss_name, loss_values in loss_records.items():
            log = log + loss_name + ':%.4f  ' % (np.mean(loss_values))
            if tb_writer:
                tb_writer.add_scalar(loss_name, np.mean(loss_values), global_t)

        logger.info(log)
Ejemplo n.º 5
0
def test_process(model, test_loader, test_config, logger):
    # 训练完一个epoch,用测试集的标题生成一次诗

    test_loader.epoch_init(test_config.batch_size, shuffle=True)

    poem_count = 0
    predict_results = {0: [], 1: [], 2: []}
    while True:
        model.eval()
        batch = test_loader.next_batch_test()  # test data使用专门的batch
        if batch is None:
            break
        batch_size = batch.shape[0]
        poem_count += 1
        if poem_count % 10 == 0:
            print("Predicted {} poems".format(poem_count))
        if batch is None:
            break
        title_list = batch  # batch size是1,一个batch写一首诗
        title_tensor = to_tensor(title_list)
        # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果

        for i in range(3):
            # import pdb
            # pdb.set_trace()
            sentiment_label = np.zeros(batch_size, dtype=np.int64)
            for id in range(batch_size):
                sentiment_label[id] = int(i)
            sentiment_label = to_tensor(sentiment_label)
            output_poem, output_tokens = model.test(
                title_tensor, title_list, sentiment_label=sentiment_label)

            if poem_count % 80 == 0:
                logger.info("Sentiment {} Poem {}\n".format(i, output_poem))

            predict_results[i] += (np.array(output_tokens)[:, :7].tolist())

    # Predict sentiment use the sort net
    from collections import defaultdict
    neg = defaultdict(int)
    neu = defaultdict(int)
    pos = defaultdict(int)
    total = defaultdict(int)
    for i in range(3):
        _, neg[i], neu[i], pos[i] = test_sentiment(predict_results[i])
        total[i] = neg[i] + neu[i] + pos[i]

    for i in range(3):
        logger.info("%d%%\t%d%%\t%d%%" %
                    (neg[i] * 100 / total[i], neu[i] * 100 / total[i],
                     pos[i] * 100 / total[i]))
    print("Done testing")
Ejemplo n.º 6
0
def train_process(global_t, model, train_loader, config, sentiment_data=False):
    model.train()
    loss_records = []
    sentiment_label = None
    if sentiment_data:
        batch = train_loader.next_sentiment_batch()
        finish_train = False
        if batch is None:  # end of epoch
            finish_train = True
            return model, finish_train, None, global_t
        title, context, target, target_lens, sentiment_label = batch
        title, context, target, target_lens, sentiment_label = \
            to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens), to_tensor(sentiment_label)
    else:
        batch = train_loader.next_batch()
        finish_train = False
        if batch is None:  # end of epoch
            finish_train = True
            return model, finish_train, None, global_t
        title, context, target, target_lens = batch
        title, context, target, target_lens = \
            to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens)

    # global_t, title, context, target, target_lens,
    loss_AE, global_t = model.train_AE(
        global_t, title, context, target, target_lens,
        sentiment_label)  # 输入topic,last句,当前句,当前句长度
    loss_records.extend(loss_AE)

    return model, finish_train, loss_records, global_t
Ejemplo n.º 7
0
    def train_D(self, title, context, target, target_lens):
        self.seq_encoder.eval()
        self.discriminator.train()
        self.optimizer_D.zero_grad()

        batch_size = context.size(0)

        title_last_hidden, _ = self.seq_encoder(title)
        context_last_hidden, _ = self.seq_encoder(context)
        c = torch.cat((title_last_hidden, context_last_hidden),
                      1)  # (batch, 2, hidden_size * 2)
        x, _ = self.seq_encoder(target[:, 1:], target_lens - 1)
        post_z = self.sample_code_post(x, c)
        errD_post = torch.mean(
            self.discriminator(torch.cat(
                (post_z.detach(), c.detach()), 1))) * self.n_d_loss
        errD_post.backward(one)

        prior_z = self.sample_code_prior(c)
        errD_prior = torch.mean(
            self.discriminator(torch.cat(
                (prior_z.detach(), c.detach()), 1))) * self.n_d_loss
        errD_prior.backward(minus_one)
        # import pdb
        # pdb.set_trace()

        alpha = to_tensor(torch.rand(batch_size, 1))
        alpha = alpha.expand(prior_z.size())
        interpolates = alpha * prior_z.data + ((1 - alpha) * post_z.data)
        interpolates = Variable(interpolates, requires_grad=True)

        d_input = torch.cat((interpolates, c.detach()), 1)
        disc_interpolates = torch.mean(self.discriminator(d_input))
        gradients = torch.autograd.grad(
            outputs=disc_interpolates,
            inputs=interpolates,
            grad_outputs=to_tensor(torch.ones(disc_interpolates.size())),
            create_graph=True,
            retain_graph=True,
            only_inputs=True)[0]
        gradient_penalty = (
            (gradients.contiguous().view(gradients.size(0), -1).norm(2, dim=1)
             - 1)**2).mean() * self.lambda_gp
        gradient_penalty.backward()

        self.optimizer_D.step()
        costD = -(errD_prior - errD_post) + gradient_penalty
        return [('train_loss_D', costD.item())]
Ejemplo n.º 8
0
    def forward(self, inputs, input_lens=None, noise=False):

        # if self.embedding is not None:
        inputs = self.embedding(inputs)  # 过embedding

        batch_size, seq_len, emb_size = inputs.size(
        )  # (batch, len, emb_size) len是12,即标题的最大长度

        # inputs = F.dropout(inputs, 0.5, self.training)  # embedding先不做dropout

        if input_lens is not None:
            input_lens_sorted, indices = input_lens.sort(descending=True)
            inputs_sorted = inputs.index_select(0, indices)
            inputs = pack_padded_sequence(inputs_sorted,
                                          input_lens_sorted.data.tolist(),
                                          batch_first=True)

        # inputs: (batch, len, emb_dim)
        # init_hidden: (2, batch, n_hidden)
        init_hidden = to_tensor(
            torch.zeros(self.n_layers * (1 + self.bidirectional), batch_size,
                        self.hidden_size))
        # hids: (batch, len, 2 * n_hidden)
        # h_n: (2, batch, n_hidden)
        # self.rnn.flatten_parameters()
        hids, h_n = self.rnn(inputs, init_hidden)

        if input_lens is not None:
            _, inv_indices = indices.sort()
            hids, lens = pad_packed_sequence(hids, batch_first=True)
            hids = hids.index_select(0, inv_indices)
            h_n = h_n.index_select(1, inv_indices)

        # h_n (1, 2, batch, n_hidden) 按层排列
        h_n = h_n.view(self.n_layers, (1 + self.bidirectional), batch_size,
                       self.hidden_size)
        # 取最后一层 (2, batch, n_hidden)
        h_n = h_n[-1]  # 取last hidden的最后一层作为encoder的last hidden并返回
        # (batch_size, 1 * 2 * hidden_size) 后面全给弄到一起
        enc = h_n.transpose(1, 0).contiguous().view(batch_size, -1)

        if noise and self.noise_radius > 0:
            gauss_noise = to_tensor(
                torch.normal(means=torch.zeros(enc.size()),
                             std=self.noise_radius))
            enc = enc + gauss_noise

        return enc, hids
Ejemplo n.º 9
0
    def testing(self, init_hidden, maxlen, go_id, mode="greedy"):
        batch_size = init_hidden.size(0)
        assert batch_size == 1

        decoder_input = to_tensor(torch.LongTensor([[go_id]]).view(1, 1))  # (batch, 1)
        # input: (batch=1, len=1, emb_size)
        decoder_input = self.embedding(decoder_input)  # (batch, 1, emb_dim)
        # hidden: (batch=1, 2, hidden_size * 2)
        decoder_hidden = init_hidden.unsqueeze(0)  # (1, batch, 4*hidden+z_size)
        pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64)

        for di in range(maxlen - 1):  # decode要的是从<s>后一位开始,因此总长度是max_len-1
            # 输入decoder
            decoder_output, decoder_hidden = self.rnn(decoder_input, decoder_hidden)  # (1, 1, hidden)
            decoder_output = self.out(decoder_output.contiguous().view(-1, self.hidden_size))  # (1, vocab_size)
            # import pdb
            # pdb.set_trace()
            if mode == "greedy":
                topi = decoder_output.max(1, keepdim=True)[1]
            else:
                topi = decoder_output.max(1, keepdim=True)[1]
                topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1)
            # 拿到pred_outs以返回

            ni = topi.squeeze().cpu().numpy()
            pred_outs[:, di] = ni
            # 为下一次decode准备输入字
            decoder_input = self.embedding(topi)

        # 结束for完成一句诗的token预测
        return pred_outs
Ejemplo n.º 10
0
    def testing(self, init_hidden, encoder_output, maxlen, go_id, mode="greedy"):
        batch_size = init_hidden.size(0)
        assert batch_size == 1
        decoder_input = to_tensor(torch.LongTensor([go_id]))  # (batch)
        decoder_hidden = init_hidden
        pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64)

        for di in range(maxlen - 1):  # 从第一个字decode到</s> 共maxlen-1位

            embedded = self.embedder(decoder_input).unsqueeze(1)  # (batch, 1, emb_dim)
            # embedded = self.drop_out(embedded)
            mutual_info = torch.cat((decoder_hidden, embedded),
                                    dim=2)  # (batch, 1, emb_dim + n_hidden)
            attn_weight = self.attn(mutual_info)  # (batch, 1, 10)
            attn_weight = F.softmax(attn_weight, dim=2)  # (batch, 1, 10)
            attn_applied = torch.bmm(attn_weight, encoder_output)  # (batch, 1, n_hidden)

            rnn_input = torch.cat((attn_applied, embedded), dim=2)  # (batch, 1, n_hidden+emb_dim)
            rnn_input = self.attn_combine(rnn_input)  # (batch, 1, n_hidden)
            rnn_input = F.relu(rnn_input)  # (batch, 1, n_hidden)
            decoder_output, decoder_hidden = self.rnn(rnn_input, decoder_hidden.contiguous())
            decoder_output = self.soft(self.out(decoder_output.contiguous().squeeze(1)))
            if mode == "greedy":
                topi = decoder_output.max(1, keepdim=True)[1]  # [0]是概率,[1]是idx
            else:
                topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1)

            ni = topi.squeeze().cpu().numpy()
            pred_outs[:, di] = ni

            decoder_input = topi[0]

        return pred_outs
Ejemplo n.º 11
0
    def sampling(self, init_hidden, maxlen, go_id, eos_id, mode='greedy'):
        batch_size = init_hidden.size(0)  # batch_size等于调用时候的repeat
        sample_lens = np.zeros(batch_size, dtype=np.int)  # (batch中每一个测试样本的生成句的长度)

        decoder_input = to_tensor(torch.LongTensor([[go_id] * batch_size]).view(batch_size,1))
        decoder_input = self.embedding(decoder_input)
        decoder_hidden = init_hidden.unsqueeze(0)
        pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64)
        # import pdb
        # pdb.set_trace()
        for di in range(maxlen - 1):

            # 为什么相同的decoder_input(重复了10遍)输入同一个decoder得到的结果不一样呢??
            # self.rnn.flatten_parameters()
            decoder_output, decoder_hidden = self.rnn(decoder_input, decoder_hidden)
            decoder_output = self.out(decoder_output.contiguous().view(-1, self.hidden_size))  # (batch, vocab_size)
            if mode == 'greedy':
                topi = decoder_output.max(1, keepdim=True)[1]
            elif mode == 'sample':
                topi = torch.multinomial(F.softmax(decoder_output[:,-1], dim=1), 1)

            ni = topi.squeeze().cpu().numpy()
            pred_outs[:, di] = ni
            decoder_input = self.embedding(topi)

        # 结束for生成了一句话
        for i in range(batch_size):
            for word in pred_outs[i]:
                if word == eos_id:
                    break
                sample_lens[i] = sample_lens[i] + 1
        return pred_outs, sample_lens
Ejemplo n.º 12
0
def test_process(model, test_loader, test_config, logger):
    # 训练完一个epoch,用测试集的标题生成一次诗

    test_loader.epoch_init(test_config.batch_size, shuffle=False)

    poem_count = 0
    import random
    sent_labels = []
    for _ in range(4):
        sent_labels.append(random.sample(['0', '1', '2'], k=1)[0])
    logger.info("Random choose sentiment {}".format(" ".join(sent_labels)))
    while True:
        model.eval()
        batch = test_loader.next_batch_test()  # test data使用专门的batch
        poem_count += 1
        if poem_count % 10 == 0:
            print("Predicted {} poems".format(poem_count))
        if batch is None:
            break
        title_list = batch  # batch size是1,一个batch写一首诗
        title_tensor = to_tensor(title_list)
        # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果

        output_poem, output_tokens = model.test(title_tensor,
                                                title_list,
                                                sent_labels=sent_labels)
        logger.info(output_poem)

    print("Done testing")
Ejemplo n.º 13
0
    def test(self, title_tensor, title_words, sent_labels):
        self.eval()
        name_dict = {'0': 'neg', '1': 'neu', '2': 'pos'}
        batch_size = title_tensor.size(0)
        assert batch_size == 1
        tem = [[2, 3] + [0] * (self.maxlen - 2)]
        pred_poems = []
        # 过滤掉标题中的<s> </s> 0,只为了打印
        title_tokens = [self.vocab[e] for e in title_words[0].tolist() if e not in [0, self.eos_id, self.go_id]]
        pred_poems.append(title_tokens)

        gen_words = ""
        gen_tokens = []

        for i in range(4):
            tem = to_tensor(np.array(tem))
            context = tem
            if i == 0:
                context_last_hidden, _ = self.layers["seq_encoder"](title_tensor)
            else:
                context_last_hidden, _ = self.layers["seq_encoder"](context)
            title_last_hidden, _ = self.layers["seq_encoder"](title_tensor)
            z = to_tensor(torch.randn([batch_size, self.z_size]))

            final_info = torch.cat([title_last_hidden, context_last_hidden, z], dim=1)
            pred_tokens = self.layers["vae_decoder_{}".format(name_dict[sent_labels[i]])].testing(init_hidden=self.layers["init_decoder"](final_info), maxlen=self.maxlen, go_id=self.go_id, mode="greedy")
            pred_tokens = pred_tokens[0].tolist()

            if len(pred_tokens) >= self.maxlen:
                tem = [pred_tokens[0: self.maxlen]]
            else:
                tem = [[0] * (self.maxlen - len(pred_tokens)) + pred_tokens]

            pred_words = [self.vocab[e] for e in pred_tokens[:-1] if e != self.eos_id and e != 0 and e != self.go_id]
            pred_poems.append(pred_words)
            gen_tokens.append(pred_tokens)

        for i in range(5):
            if i == 0:
                cur_line = " ".join(pred_poems[i])
            else:
                cur_line = " ".join(pred_poems[i]) + sent_labels[i-1]
            gen_words = gen_words + cur_line + '\n'

        return gen_words, gen_tokens
Ejemplo n.º 14
0
 def forward(self, target):
     batch_size = target.size(0)
     target = self.fc(target)
     mu = self.target_to_mu(target)
     logsigma = self.target_to_logsigma(target)
     std = torch.exp(0.5 * logsigma)
     epsilon = to_tensor(torch.randn([batch_size, self.z_size]))
     z = epsilon * std + mu
     return mu, logsigma, z
Ejemplo n.º 15
0
    def test(self, title_tensor, title_words, sentiment_label):
        self.seq_encoder.eval()
        self.decoder.eval()
        assert title_tensor.size(0) == 1
        tem = [[2, 3] + [0] * (self.maxlen - 2)]
        pred_poems = []
        # 过滤掉标题中的<s> </s> 0,只为了打印
        title_tokens = [
            self.vocab[e] for e in title_words[0].tolist()
            if e not in [0, self.eos_id, self.go_id]
        ]
        pred_poems.append(title_tokens)

        gen_words = "\n"
        gen_tokens = []
        for i in range(4):
            tem = to_tensor(np.array(tem))
            context = tem
            if i == 0:
                context_last_hidden, _ = self.seq_encoder(title_tensor)
            else:
                context_last_hidden, _ = self.seq_encoder(context)

            title_last_hidden, _ = self.seq_encoder(title_tensor)
            sentiment = self.sent_embedder(sentiment_label)

            condition_prior = torch.cat(
                (title_last_hidden, context_last_hidden), dim=1)
            # z_prior, prior_mu, prior_logvar, _, _ = self.sample_code_prior(condition_prior, mask_type=mask_type)
            z_prior, prior_mu, prior_logvar = self.sample_code_prior(
                condition_prior, sentiment_label)
            final_info = torch.cat((z_prior, condition_prior, sentiment), 1)

            pred_tokens = self.decoder.testing(
                init_hidden=self.init_decoder_hidden(final_info),
                maxlen=self.maxlen,
                go_id=self.go_id,
                mode="greedy")
            pred_tokens = pred_tokens[0].tolist()

            if len(pred_tokens) >= self.maxlen:
                tem = [pred_tokens[0:self.maxlen]]
            else:
                tem = [[0] * (self.maxlen - len(pred_tokens)) + pred_tokens]

            pred_words = [
                self.vocab[e] for e in pred_tokens[:-1]
                if e != self.eos_id and e != 0 and e != self.go_id
            ]
            pred_poems.append(pred_words)
            gen_tokens.append(pred_tokens)

        for line in pred_poems:
            cur_line = " ".join(line)
            gen_words = gen_words + cur_line + '\n'

        return gen_words, gen_tokens
Ejemplo n.º 16
0
    def forward(self, context):
        batch_size, _ = context.size()  # prior: (batch, 4 * hidden)
        context = self.fc(context)
        mu = self.context_to_mu(context)
        logsigma = self.context_to_logsigma(context)
        std = torch.exp(0.5 * logsigma)

        epsilon = to_tensor(torch.randn([batch_size, self.z_size]))
        z = epsilon * std + mu
        return z, mu, logsigma
Ejemplo n.º 17
0
def apply_gaussian_blur(adv_img, kernel_size=5, sigma=0):

    #if sigma=0, cv2.GaussianBlur caluclates sigma from kernel size.

    N, ch, H, W = adv_img.shape

    for i in range(N):
        img = helper.to_numpy(adv_img[i])
        img = cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=sigma)
        adv_img[i] = helper.to_tensor(img)

    return adv_img
Ejemplo n.º 18
0
 def test_VAE(self, sent_name, batch_size=1):
     z = to_tensor(torch.randn([batch_size, self.z_size]))
     pred_tokens = self.layers["vae_decoder_{}".format(sent_name)].testing(
         init_hidden=self.layers["init_decoder_hidden"](z),
         maxlen=self.maxlen,
         go_id=self.go_id,
         mode="greedy")
     pred_words = []
     # import pdb
     # pdb.set_trace()
     for b_id in range(pred_tokens.shape[0]):
         pred_words.append([self.vocab[e] for e in pred_tokens[b_id][:-1] if e != self.eos_id and e != 0 and e != self.go_id])
     return pred_words
Ejemplo n.º 19
0
def valid_process_sentiment(model, valid_poem_loader, valid_config,
                            global_iter, num, tb_writer, logger,
                            cur_best_score_labeled):
    valid_poem_loader.epoch_init(valid_config.batch_size, shuffle=False)
    model.eval()
    loss_records = {}
    while True:
        batch = valid_poem_loader.next_sentiment_batch()
        if batch is None:  # end of epoch
            break

        title, context, target, target_lens, sentiment_mask = batch
        title, context, target, target_lens, sentiment_mask = \
            to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens), to_tensor(sentiment_mask)
        valid_loss = model.valid(title, context, target, target_lens,
                                 sentiment_mask)
        for loss_name, loss_value in valid_loss:
            v = loss_records.get(loss_name, [])
            v.append(loss_value)
            loss_records[loss_name] = v

    log = 'Valid: Global iter {} Validation\n'.format(global_iter)
    for loss_name, loss_values in loss_records.items():
        # import pdb
        # pdb.set_trace()
        if loss_name == 'valid_loss_AE' and np.mean(
                loss_values) < cur_best_score_labeled['min_valid_loss_label']:
            log += "\nFOUND a new best valid loss in global %d, num %d\n" % (
                global_iter, num)
            cur_best_score_labeled['min_valid_loss_label'] = np.mean(
                loss_values)
            cur_best_score_labeled['min_global_itr_label'] = global_iter
            cur_best_score_labeled['min_num_label'] = num

        log = log + loss_name + ':%.4f  ' % (np.mean(loss_values))
        if args.visual:
            tb_writer.add_scalar(loss_name, np.mean(loss_values), global_iter)

    logger.info(log)
Ejemplo n.º 20
0
    def sample_from_specific_latent_area(self, sentiment_label):
        batch_size = sentiment_label.size(0)
        z_origin = to_tensor(torch.randn([batch_size, self.z_size]))

        # 使用sentiment_label mask掉每一个未使用当前情感的句子,最后再组合即得到所有采样
        mask_pos = sentiment_label.gt(1).view(-1, 1).expand(batch_size, self.z_size)
        mask_neu = sentiment_label.eq(1).view(-1, 1).expand(batch_size, self.z_size)
        mask_neg = sentiment_label.lt(1).view(-1, 1).expand(batch_size, self.z_size)

        z_pos = z_origin.mul(mask_pos)
        z_neu = z_origin.mul(mask_neu)
        z_neg = z_origin.mul(mask_neg)
        return {'pos': z_pos, 'neu': z_neu, 'neg': z_neg}
Ejemplo n.º 21
0
def valid_process(model, valid_loader, valid_config, global_iter,
                  unlabeled_epoch, batch_idx, tb_writer, logger,
                  cur_best_score):
    valid_loader.epoch_init(valid_config.batch_size, shuffle=False)
    model.eval()
    loss_records = {}

    while True:
        batch = valid_loader.next_batch()
        if batch is None:  # end of epoch
            break

        title, context, target, target_lens = batch
        title, context, target, target_lens = \
            to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens)
        valid_loss = model.valid(title, context, target, target_lens)
        for loss_name, loss_value in valid_loss:
            v = loss_records.get(loss_name, [])
            if loss_name == 'min_valid_loss' and loss_value < cur_best_score[
                    'min_valid_loss']:
                cur_best_score['min_valid_loss'] = loss_value
                cur_best_score['min_global_itr'] = global_iter
                cur_best_score['min_epoch'] = unlabeled_epoch
                cur_best_score['min_itr'] = batch_idx

            v.append(loss_value)
            loss_records[loss_name] = v

    log = 'Global iter {} Validation:'.format(global_iter)
    for loss_name, loss_values in loss_records.items():
        # import pdb
        # pdb.set_trace()
        log = log + loss_name + ':%.4f  ' % (np.mean(loss_values))
        if args.visual:
            tb_writer.add_scalar(loss_name, np.mean(loss_values), global_iter)

    logger.info(log)
Ejemplo n.º 22
0
    def forward(self, context, sent_label):
        # import pdb
        # pdb.set_trace()
        sentiment = self.sent_embedder(sent_label)

        batch_size, _ = context.size()  # prior: (batch, 4 * hidden)
        cond = torch.cat([context, sentiment], dim=1)
        context = self.fc(cond)
        mu = self.context_to_mu(context)
        logsigma = self.context_to_logsigma(context) 
        std = torch.exp(0.5 * logsigma)
        
        epsilon = to_tensor(torch.randn([batch_size, self.z_size]))
        z = epsilon * std + mu  
        return z, mu, logsigma
Ejemplo n.º 23
0
    def test(self, title, title_list, batch_size):
        self.encoder.eval()
        self.decoder.eval()

        assert title.size(0) == 1
        tem = title[0][0: self.maxlen].unsqueeze(0)

        pred_poems = []
        title_tokens = [self.vocab[e] for e in title_list[0].tolist() if e not in [0, self.eos_id, self.go_id]]
        pred_poems.append(title_tokens)

        for sent_id in range(4):
            context = tem
            if type(context) is list:
                vec_context = np.zeros((batch_size, self.maxlen), dtype=np.int64)
                for b_id in range(batch_size):
                    vec_context[b_id, :] = np.array(context[b_id])
                context = to_tensor(vec_context)

            encoder_last_hidden, encoder_output = self.encoder(context)
            batch_size = encoder_last_hidden.size(0)
            hidden_size = encoder_last_hidden.size(1) // 2
            # (1, 1, n_hidden)
            last_hidden = encoder_last_hidden.view(batch_size, 2, -1)[:, -1, :].unsqueeze(0)
            # (batch, len, n_hidden)
            encoder_output = encoder_output.view(batch_size, -1, 2, hidden_size)[:, :, -1]

            # decode_words 是完整的一句诗
            decode_words = self.decoder.testing(init_hidden=last_hidden, encoder_output=encoder_output,
                                             maxlen=self.maxlen, go_id=self.go_id, mode="greedy")

            decode_words = decode_words[0].tolist()
            # import pdb
            # pdb.set_trace()
            if len(decode_words) > self.maxlen:
                tem = [decode_words[0: self.maxlen]]
            else:
                tem = [[0] * (self.maxlen - len(decode_words)) + decode_words]

            pred_tokens = [self.vocab[e] for e in decode_words[:-1] if e != self.eos_id and e != 0]
            pred_poems.append(pred_tokens)

        gen = ''
        for line in pred_poems:
            true_str = " ".join(line)
            gen = gen + true_str + '\n'

        return gen
Ejemplo n.º 24
0
    def sampling(self, init_hidden, encoder_output, maxlen, go_id, eos_id, mode='greedy'):

        batch_size = init_hidden.size(1)
        decoder_input = to_tensor(torch.LongTensor(batch_size * [go_id]))  # (batch, 1)
        decoder_hidden = init_hidden  # (1, batch, hidden)
        pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64)
        sample_lens = np.zeros(batch_size, dtype=np.int64)

        for di in range(maxlen - 1):  # 从第一个字decode到</s> 共maxlen-1位

            embedded = self.embedder(decoder_input).unsqueeze(1)  # (batch, 1, emb_dim)
            # embedded = self.drop_out(embedded)
            mutual_info = torch.cat((decoder_hidden.squeeze(0).unsqueeze(1), embedded),
                                    dim=2)  # (batch, 1, emb_dim + n_hidden)
            attn_weight = self.attn(mutual_info)  # (batch, 1, 10)
            attn_weight = F.softmax(attn_weight, dim=2)  # (batch, 1, 10)
            attn_applied = torch.bmm(attn_weight, encoder_output)  # (batch, 1, n_hidden)

            rnn_input = torch.cat((attn_applied, embedded), dim=2)  # (batch, 1, n_hidden+emb_dim)
            rnn_input = self.attn_combine(rnn_input)  # (batch, 1, n_hidden)
            rnn_input = F.relu(rnn_input)  # (batch, 1, n_hidden)
            decoder_output, decoder_hidden = self.rnn(rnn_input, decoder_hidden.contiguous())
            decoder_output = self.soft(self.out(decoder_output.contiguous().squeeze(1)))
            if mode == "greedy":
                topi = decoder_output.max(1, keepdim=True)[1]  # [0]是概率,[1]是idx
            else:
                topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1)

            ni = topi.squeeze().cpu().numpy()
            pred_outs[:, di] = ni

            decoder_input = topi.squeeze(1)

        # import pdb
        # pdb.set_trace()
        for i in range(batch_size):
            for word in pred_outs[i]:
                if word == eos_id:
                    break
                sample_lens[i] = sample_lens[i] + 1
        return pred_outs, sample_lens
Ejemplo n.º 25
0
    def forward(self, context, sent_label):
        # import pdb
        # pdb.set_trace()
        sentiment = self.sent_embedder(sent_label)

        batch_size, _ = context.size()  # prior: (batch, 4 * hidden)
        cond = torch.cat([context, sentiment], dim=1)
        context = self.fc(cond)

        # 接下来。不必再指定某一个高斯分布了,直接当做一个多维的高斯分布来做
        # 情感的指定由最初的输入来做
        # 需要想办法把情感当做condition的一部分,做embedding,才能最大限度地做到可控
        # 而并不是分成若干个不同的先验分布

        mus = self.context_to_mus(context)
        logsigmas = self.context_to_logsigmas(context)

        # 再对选出的高斯分布进行采样
        stds = torch.exp(0.5 * logsigmas)  # (batch, 5 * z_size)
        epsilons = to_tensor(torch.randn([batch_size, self.z_size]))
        z = epsilons * stds + mus  # (batch, 5, z_size)

        return z, mus, logsigmas
Ejemplo n.º 26
0
def test_process(model, test_loader, test_config, logger):
    # 训练完一个epoch,用测试集的标题生成一次诗

    # mask_types = ['negative', 'positive', 'neutral']
    model.eval()
    output_poems = ""
    test_loader.epoch_init(test_config.batch_size, shuffle=False)
    while True:
        model.eval()  # eval()主要影响BatchNorm, dropout等操作
        batch = test_loader.next_batch_test()  # test data使用专门的batch
        if batch is None:
            break
        title_list, headers = batch  # batch size是1,一个batch写一首诗
        title_tensor = to_tensor(title_list)

        # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果
        # output_poem = 'Global iter: {}\n'.format(global_iter)
        output_poem = model.test(title_tensor=title_tensor,
                                 title_words=title_list,
                                 headers=headers)
        output_poems += output_poem
    logger.info(output_poems)

    print("Done testing")
Ejemplo n.º 27
0
    def test(self, title_tensor, title_words, headers):
        self.seq_encoder.eval()
        self.discriminator.eval()
        self.decoder.eval()
        # tem初始化为[2,3,0,0,0,0,0,0,0]

        tem = [[2, 3] + [0] * (self.maxlen - 2)]
        pred_poems = []

        title_tokens = [
            self.vocab[e] for e in title_words[0].tolist()
            if e not in [0, self.eos_id, self.go_id]
        ]
        pred_poems.append(title_tokens)
        for sent_id in range(4):
            tem = to_tensor(np.array(tem))
            context = tem

            # vec_context = np.zeros((batch_size, self.maxlen), dtype=np.int64)
            # for b_id in range(batch_size):
            #     vec_context[b_id, :] = np.array(context[b_id])
            # context = to_tensor(vec_context)

            title_last_hidden, _ = self.seq_encoder(
                title_tensor)  # (batch=1, 2*hidden)
            if sent_id == 0:
                context_last_hidden, _ = self.seq_encoder(
                    title_tensor)  # (batch=1, 2*hidden)
            else:
                context_last_hidden, _ = self.seq_encoder(
                    context)  # (batch=1, 2*hidden)
            c = torch.cat((title_last_hidden, context_last_hidden),
                          1)  # (batch, 4*hidden_size)
            # 由于一次只有一首诗,batch_size = 1,因此不必repeat
            prior_z = self.sample_code_prior(c)

            # decode_words 是完整的一句诗
            decode_words = self.decoder.testing(
                init_hidden=self.init_decoder_hidden(torch.cat((prior_z, c),
                                                               1)),
                maxlen=self.maxlen,
                go_id=self.go_id,
                mode="greedy",
                header=headers[sent_id])

            decode_words = decode_words[0].tolist()
            # import pdb
            # pdb.set_trace()
            if len(decode_words) > self.maxlen:
                tem = [decode_words[0:self.maxlen]]
            else:
                tem = [[0] * (self.maxlen - len(decode_words)) + decode_words]

            pred_tokens = [
                self.vocab[e] for e in decode_words[:-1]
                if e != self.eos_id and e != 0
            ]
            pred_poems.append(pred_tokens)

        gen = ''
        for line in pred_poems:
            true_str = " ".join(line)
            gen = gen + true_str + '\n'

        return gen
Ejemplo n.º 28
0
def main():

    # config for training
    config = Config()
    print("Normal train config:")
    pp(config)

    valid_config = Config()
    valid_config.dropout = 0
    valid_config.batch_size = 20

    # config for test
    test_config = Config()
    test_config.dropout = 0
    test_config.batch_size = 1

    with_sentiment = config.with_sentiment

    ###############################################################################
    # Load data
    ###############################################################################
    # sentiment data path:  ../ final_data / poem_with_sentiment.txt
    # 该path必须命令行显示输入LoadPoem,因为defaultNonehjk
    # 处理pretrain数据和完整诗歌数据

    # api = LoadPoem(args.train_data_dir, args.test_data_dir, args.max_vocab_size)
    api = LoadPoem(corpus_path=args.train_data_dir,
                   test_path=args.test_data_dir,
                   max_vocab_cnt=config.max_vocab_cnt,
                   with_sentiment=with_sentiment)

    # 交替训练,准备大数据集
    poem_corpus = api.get_tokenized_poem_corpus(
        type=1 + int(with_sentiment))  # corpus for training and validation
    test_data = api.get_tokenized_test_corpus()  # 测试数据
    # 三个list,每个list中的每一个元素都是 [topic, last_sentence, current_sentence]
    train_poem, valid_poem, test_poem = poem_corpus["train"], poem_corpus[
        "valid"], test_data["test"]

    train_loader = SWDADataLoader("Train", train_poem, config)
    valid_loader = SWDADataLoader("Valid", valid_poem, config)
    test_loader = SWDADataLoader("Test", test_poem, config)

    print("Finish Poem data loading, not pretraining or alignment test")

    if not args.forward_only:
        # LOG #
        log_start_time = str(datetime.now().strftime('%Y%m%d%H%M'))
        if not os.path.isdir('./output'):
            os.makedirs('./output')
        if not os.path.isdir('./output/{}'.format(args.expname)):
            os.makedirs('./output/{}'.format(args.expname))
        if not os.path.isdir('./output/{}/{}'.format(args.expname,
                                                     log_start_time)):
            os.makedirs('./output/{}/{}'.format(args.expname, log_start_time))

        # save arguments
        json.dump(
            vars(args),
            open(
                './output/{}/{}/args.json'.format(args.expname,
                                                  log_start_time), 'w'))

        logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.DEBUG, format="%(message)s")
        fh = logging.FileHandler("./output/{}/{}/logs.txt".format(
            args.expname, log_start_time))
        # add the handlers to the logger
        logger.addHandler(fh)
        logger.info(vars(args))

        tb_writer = SummaryWriter("./output/{}/{}/tb_logs".format(
            args.expname, log_start_time)) if args.visual else None

        if config.reload_model:
            model = load_model(config.model_name)
        else:
            if args.model == "mCVAE":
                model = CVAE_GMP(config=config, api=api)
            elif args.model == 'CVAE':
                model = CVAE(config=config, api=api)
            else:
                model = Seq2Seq(config=config, api=api)
            if use_cuda:
                model = model.cuda()

        # if corpus.word2vec is not None and args.reload_from<0:
        #     print("Loaded word2vec")
        #     model.embedder.weight.data.copy_(torch.from_numpy(corpus.word2vec))
        #     model.embedder.weight.data[0].fill_(0)

        ###############################################################################
        # Start training
        ###############################################################################
        # model依然是PoemWAE_GMP保持不变,只不过,用这部分数据强制训练其中一个高斯先验分布
        # pretrain = True

        cur_best_score = {
            'min_valid_loss': 100,
            'min_global_itr': 0,
            'min_epoch': 0,
            'min_itr': 0
        }

        train_loader.epoch_init(config.batch_size, shuffle=True)

        # model = load_model(3, 3)
        epoch_id = 0
        global_t = 0
        while epoch_id < config.epochs:

            while True:  # loop through all batches in training data
                # train一个batch
                model, finish_train, loss_records, global_t = \
                    train_process(global_t=global_t, model=model, train_loader=train_loader, config=config, sentiment_data=with_sentiment)
                if finish_train:
                    test_process(model=model,
                                 test_loader=test_loader,
                                 test_config=test_config,
                                 logger=logger)
                    # evaluate_process(model=model, valid_loader=valid_loader, log_start_time=log_start_time, global_t=global_t, epoch=epoch_id, logger=logger, tb_writer=tb_writer, api=api)
                    # save model after each epoch
                    save_model(model=model,
                               epoch=epoch_id,
                               global_t=global_t,
                               log_start_time=log_start_time)
                    logger.info(
                        'Finish epoch %d, current min valid loss: %.4f \
                     correspond epoch: %d  itr: %d \n\n' %
                        (cur_best_score['min_valid_loss'],
                         cur_best_score['min_global_itr'],
                         cur_best_score['min_epoch'],
                         cur_best_score['min_itr']))
                    # 初始化下一个unlabeled data epoch的训练
                    # unlabeled_epoch += 1
                    epoch_id += 1
                    train_loader.epoch_init(config.batch_size, shuffle=True)
                    break
                # elif batch_idx >= start_batch + config.n_batch_every_iter:
                #     print("Finish unlabel epoch %d batch %d to %d" %
                #           (unlabeled_epoch, start_batch, start_batch + config.n_batch_every_iter))
                #     start_batch += config.n_batch_every_iter
                #     break

                # 写一下log
                if global_t % config.log_every == 0:
                    log = 'Epoch id %d: step: %d/%d: ' \
                          % (epoch_id, global_t % train_loader.num_batch, train_loader.num_batch)
                    for loss_name, loss_value in loss_records:
                        if loss_name == 'avg_lead_loss':
                            continue
                        log = log + loss_name + ':%.4f ' % loss_value
                        if args.visual:
                            tb_writer.add_scalar(loss_name, loss_value,
                                                 global_t)
                    logger.info(log)

                # valid
                if global_t % config.valid_every == 0:
                    # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger)
                    valid_process(
                        global_t=global_t,
                        model=model,
                        valid_loader=valid_loader,
                        valid_config=valid_config,
                        unlabeled_epoch=
                        epoch_id,  # 如果sample_rate_unlabeled不是1,这里要在最后加一个1
                        tb_writer=tb_writer,
                        logger=logger,
                        cur_best_score=cur_best_score)
                # if batch_idx % (train_loader.num_batch // 3) == 0:
                #     test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger)
                if global_t % config.test_every == 0:
                    test_process(model=model,
                                 test_loader=test_loader,
                                 test_config=test_config,
                                 logger=logger)

    # forward_only 测试
    else:
        expname = 'sentInput'
        time = '202101191105'

        model = load_model(
            './output/{}/{}/model_global_t_13596_epoch3.pckl'.format(
                expname, time))
        test_loader.epoch_init(test_config.batch_size, shuffle=False)
        if not os.path.exists('./output/{}/{}/test/'.format(expname, time)):
            os.mkdir('./output/{}/{}/test/'.format(expname, time))
        output_file = [
            open('./output/{}/{}/test/output_0.txt'.format(expname, time),
                 'w'),
            open('./output/{}/{}/test/output_1.txt'.format(expname, time),
                 'w'),
            open('./output/{}/{}/test/output_2.txt'.format(expname, time), 'w')
        ]

        poem_count = 0
        predict_results = {0: [], 1: [], 2: []}
        titles = {0: [], 1: [], 2: []}
        sentiment_result = {0: [], 1: [], 2: []}
        # Get all poem predictions
        while True:
            model.eval()
            batch = test_loader.next_batch_test()  # test data使用专门的batch
            poem_count += 1
            if poem_count % 10 == 0:
                print("Predicted {} poems".format(poem_count))
            if batch is None:
                break
            title_list = batch  # batch size是1,一个batch写一首诗
            title_tensor = to_tensor(title_list)
            # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果
            for i in range(3):
                sentiment_label = np.zeros(1, dtype=np.int64)
                sentiment_label[0] = int(i)
                sentiment_label = to_tensor(sentiment_label)
                output_poem, output_tokens = model.test(
                    title_tensor, title_list, sentiment_label=sentiment_label)

                titles[i].append(output_poem.strip().split('\n')[0])
                predict_results[i] += (np.array(output_tokens)[:, :7].tolist())

        # Predict sentiment use the sort net
        from collections import defaultdict
        neg = defaultdict(int)
        neu = defaultdict(int)
        pos = defaultdict(int)
        total = defaultdict(int)
        for i in range(3):
            _, neg[i], neu[i], pos[i] = test_sentiment(predict_results[i])
            total[i] = neg[i] + neu[i] + pos[i]

        for i in range(3):
            print("%d%%\t%d%%\t%d%%" %
                  (neg * 100 / total, neu * 100 / total, pos * 100 / total))

        for i in range(3):
            write_predict_result_to_file(titles[i], predict_results[i],
                                         sentiment_result[i], output_file[i])
            output_file[i].close()

        print("Done testing")
Ejemplo n.º 29
0
def main():
    # config for training
    config = Config()
    print("Normal train config:")
    pp(config)

    valid_config = Config()
    valid_config.dropout = 0
    valid_config.batch_size = 20

    # config for test
    test_config = Config()
    test_config.dropout = 0
    test_config.batch_size = 1

    with_sentiment = config.with_sentiment

    pretrain = False

    ###############################################################################
    # Logs
    ###############################################################################
    log_start_time = str(datetime.now().strftime('%Y%m%d%H%M'))
    if not os.path.isdir('./output'):
        os.makedirs('./output')
    if not os.path.isdir('./output/{}'.format(args.expname)):
        os.makedirs('./output/{}'.format(args.expname))
    if not os.path.isdir('./output/{}/{}'.format(args.expname,
                                                 log_start_time)):
        os.makedirs('./output/{}/{}'.format(args.expname, log_start_time))

    # save arguments
    json.dump(
        vars(args),
        open('./output/{}/{}/args.json'.format(args.expname, log_start_time),
             'w'))

    logger = logging.getLogger(__name__)
    logging.basicConfig(level=logging.DEBUG, format="%(message)s")
    fh = logging.FileHandler("./output/{}/{}/logs.txt".format(
        args.expname, log_start_time))
    # add the handlers to the logger
    logger.addHandler(fh)
    logger.info(vars(args))

    tb_writer = SummaryWriter("./output/{}/{}/tb_logs".format(
        args.expname, log_start_time)) if args.visual else None

    ###############################################################################
    # Model
    ###############################################################################
    # vocab and rev_vocab
    with open(args.vocab_path) as vocab_file:
        vocab = vocab_file.read().strip().split('\n')
        rev_vocab = {vocab[idx]: idx for idx in range(len(vocab))}

    if not pretrain:
        pass
        # assert config.reload_model
        # model = load_model(config.model_name)
    else:
        if args.model == "multiVAE":
            model = multiVAE(config=config, vocab=vocab, rev_vocab=rev_vocab)
        else:
            model = CVAE(config=config, vocab=vocab, rev_vocab=rev_vocab)
        if use_cuda:
            model = model.cuda()
    ###############################################################################
    # Load data
    ###############################################################################

    if pretrain:
        from collections import defaultdict
        api = LoadPretrainPoem(corpus_path=args.pretrain_data_dir,
                               vocab_path="data/vocab.txt")

        train_corpus, valid_corpus = defaultdict(list), defaultdict(list)
        divide = 50000
        train_corpus['pos'], valid_corpus['pos'] = api.data[
            'pos'][:divide], api.data['pos'][divide:]
        train_corpus['neu'], valid_corpus['neu'] = api.data[
            'neu'][:divide], api.data['neu'][divide:]
        train_corpus['neg'], valid_corpus['neg'] = api.data[
            'neg'][:divide], api.data['neg'][divide:]

        token_corpus = defaultdict(dict)
        token_corpus['pos'], token_corpus['neu'], token_corpus['neg'] = \
            api.get_tokenized_poem_corpus(train_corpus['pos'], valid_corpus['pos']), \
            api.get_tokenized_poem_corpus(train_corpus['neu'], valid_corpus['neu']), \
            api.get_tokenized_poem_corpus(train_corpus['neg'], valid_corpus['neg']),
        # train_loader_dict = {'pos': }

        train_loader = {
            'pos': SWDADataLoader("Train", token_corpus['pos']['train'],
                                  config),
            'neu': SWDADataLoader("Train", token_corpus['neu']['train'],
                                  config),
            'neg': SWDADataLoader("Train", token_corpus['neg']['train'],
                                  config)
        }

        valid_loader = {
            'pos': SWDADataLoader("Train", token_corpus['pos']['valid'],
                                  config),
            'neu': SWDADataLoader("Train", token_corpus['neu']['valid'],
                                  config),
            'neg': SWDADataLoader("Train", token_corpus['neg']['valid'],
                                  config)
        }
        ###############################################################################
        # Pretrain three VAEs
        ###############################################################################

        epoch_id = 0
        global_t = 0
        init_train_loaders(train_loader, config)
        while epoch_id < config.epochs:

            while True:  # loop through all batches in training data
                # train一个batch

                model, finish_train, loss_records, global_t = \
                    pre_train_process(global_t=global_t, model=model, train_loader=train_loader)
                if finish_train:
                    if epoch_id > 5:
                        save_model(model=model,
                                   epoch=epoch_id,
                                   global_t=global_t,
                                   log_start_time=log_start_time)
                    epoch_id += 1
                    init_train_loaders(train_loader, config)
                    break
                # 写一下log
                if global_t % config.log_every == 0:
                    pre_log_process(epoch_id=epoch_id,
                                    global_t=global_t,
                                    train_loader=train_loader,
                                    loss_records=loss_records,
                                    logger=logger,
                                    tb_writer=tb_writer)

                # valid
                if global_t % config.valid_every == 0:
                    # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger)
                    pre_valid_process(global_t=global_t,
                                      model=model,
                                      valid_loader=valid_loader,
                                      valid_config=valid_config,
                                      tb_writer=tb_writer,
                                      logger=logger)
                if global_t % config.test_every == 0:
                    pre_test_process(model=model, logger=logger)
    ###############################################################################
    # Train the big model
    ###############################################################################
    api = LoadPoem(corpus_path=args.train_data_dir,
                   vocab_path="data/vocab.txt",
                   test_path=args.test_data_dir,
                   max_vocab_cnt=config.max_vocab_cnt,
                   with_sentiment=with_sentiment)
    from collections import defaultdict
    token_corpus = defaultdict(dict)
    token_corpus['pos'], token_corpus['neu'], token_corpus['neg'] = \
        api.get_tokenized_poem_corpus(api.train_corpus['pos'], api.valid_corpus['pos']), \
        api.get_tokenized_poem_corpus(api.train_corpus['neu'], api.valid_corpus['neu']), \
        api.get_tokenized_poem_corpus(api.train_corpus['neg'], api.valid_corpus['neg']),

    train_loader = {
        'pos': SWDADataLoader("Train", token_corpus['pos']['train'], config),
        'neu': SWDADataLoader("Train", token_corpus['neu']['train'], config),
        'neg': SWDADataLoader("Train", token_corpus['neg']['train'], config)
    }

    valid_loader = {
        'pos': SWDADataLoader("Train", token_corpus['pos']['valid'], config),
        'neu': SWDADataLoader("Train", token_corpus['neu']['valid'], config),
        'neg': SWDADataLoader("Train", token_corpus['neg']['valid'], config)
    }
    test_poem = api.get_tokenized_test_corpus()['test']  # 测试数据
    test_loader = SWDADataLoader("Test", test_poem, config)

    print("Finish Poem data loading, not pretraining or alignment test")

    if not args.forward_only:
        # model依然是PoemWAE_GMP保持不变,只不过,用这部分数据强制训练其中一个高斯先验分布
        # pretrain = True

        cur_best_score = {
            'min_valid_loss': 100,
            'min_global_itr': 0,
            'min_epoch': 0,
            'min_itr': 0
        }

        # model = load_model(3, 3)
        epoch_id = 0
        global_t = 0
        init_train_loaders(train_loader, config)
        while epoch_id < config.epochs:

            while True:  # loop through all batches in training data
                # train一个batch
                model, finish_train, loss_records, global_t = \
                    train_process(global_t=global_t, model=model, train_loader=train_loader)
                if finish_train:
                    if epoch_id > 5:
                        save_model(model=model,
                                   epoch=epoch_id,
                                   global_t=global_t,
                                   log_start_time=log_start_time)
                    epoch_id += 1
                    init_train_loaders(train_loader, config)
                    break

                # 写一下log
                if global_t % config.log_every == 0:
                    pre_log_process(epoch_id=epoch_id,
                                    global_t=global_t,
                                    train_loader=train_loader,
                                    loss_records=loss_records,
                                    logger=logger,
                                    tb_writer=tb_writer)

                # valid
                if global_t % config.valid_every == 0:
                    valid_process(global_t=global_t,
                                  model=model,
                                  valid_loader=valid_loader,
                                  valid_config=valid_config,
                                  tb_writer=tb_writer,
                                  logger=logger)
                # if batch_idx % (train_loader.num_batch // 3) == 0:
                #     test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger)
                if global_t % config.test_every == 0:
                    test_process(model=model,
                                 test_loader=test_loader,
                                 test_config=test_config,
                                 logger=logger)

        # forward_only 测试
    else:
        expname = 'trainVAE'
        time = '202101231631'

        model = load_model(
            './output/{}/{}/model_global_t_26250_epoch9.pckl'.format(
                expname, time))
        test_loader.epoch_init(test_config.batch_size, shuffle=False)
        if not os.path.exists('./output/{}/{}/test/'.format(expname, time)):
            os.mkdir('./output/{}/{}/test/'.format(expname, time))
        output_file = [
            open('./output/{}/{}/test/output_0.txt'.format(expname, time),
                 'w'),
            open('./output/{}/{}/test/output_1.txt'.format(expname, time),
                 'w'),
            open('./output/{}/{}/test/output_2.txt'.format(expname, time), 'w')
        ]
        poem_count = 0
        predict_results = {0: [], 1: [], 2: []}
        titles = {0: [], 1: [], 2: []}
        sentiment_result = {0: [], 1: [], 2: []}
        # sent_dict = {0: ['0', '1', '1', '0'], 1: ['2', '1', '2', '2'], 2: ['1', '0', '1', '2']}
        sent_dict = {
            0: ['0', '0', '0', '0'],
            1: ['1', '1', '1', '1'],
            2: ['2', '2', '2', '2']
        }
        # Get all poem predictions
        while True:
            model.eval()
            batch = test_loader.next_batch_test()  # test data使用专门的batch
            poem_count += 1
            if poem_count % 10 == 0:
                print("Predicted {} poems".format(poem_count))
            if batch is None:
                break
            title_list = batch  # batch size是1,一个batch写一首诗
            title_tensor = to_tensor(title_list)
            # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果
            for i in range(3):
                sent_labels = sent_dict[i]
                for _ in range(4):
                    sent_labels.append(str(i))

                output_poem, output_tokens = model.test(
                    title_tensor, title_list, sent_labels=sent_labels)

                titles[i].append(output_poem.strip().split('\n')[0])
                predict_results[i] += (np.array(output_tokens)[:, :7].tolist())

        # Predict sentiment use the sort net
        from collections import defaultdict
        neg = defaultdict(int)
        neu = defaultdict(int)
        pos = defaultdict(int)
        total = defaultdict(int)
        for i in range(3):
            cur_sent_result, neg[i], neu[i], pos[i] = test_sentiment(
                predict_results[i])
            sentiment_result[i] = cur_sent_result
            total[i] = neg[i] + neu[i] + pos[i]

        for i in range(3):
            print("%d%%\t%d%%\t%d%%" % (neg[i] * 100 / total[i], neu[i] * 100 /
                                        total[i], pos[i] * 100 / total[i]))

        for i in range(3):
            write_predict_result_to_file(titles[i], predict_results[i],
                                         sentiment_result[i], output_file[i])
            output_file[i].close()

        print("Done testing")
Ejemplo n.º 30
0
    def forward(self, context, sentiment_mask=None, mask_type=None):
        batch_size, _ = context.size()
        context = self.fc(context)
        mus = self.context_to_mus(context)
        logsigmas = self.context_to_logsigmas(context)
        stds = torch.exp(0.5 * logsigmas)  # (batch, 5 * z_size)
        # epsilons (batch, 5 * z_size)
        epsilons = to_tensor(
            torch.randn([batch_size, self.n_components * self.z_size]))
        zi = (epsilons * stds + mus).view(batch_size, self.n_components,
                                          self.z_size)  # (batch, 5, z_size)
        pi = None
        pi_final = None

        if sentiment_mask is None:
            # import pdb
            # pdb.set_trace()
            if mask_type is not None:
                # pi = torch.zeros(batch_size, 5)
                pi = torch.zeros(batch_size, 3)

                # if mask_type == "negative":
                #     pi[:, 0:2] = torch.zeros(batch_size, 2)
                # # mask正向
                # elif mask_type == "positive":
                #     pi[:, 3:5] = torch.zeros(batch_size, 2)
                # # 强行去掉中性情感
                # # mask中性
                # elif mask_type == "neutral":
                #     pi[:, 2] = torch.zeros(batch_size)
                if mask_type == "0":
                    pi[:, 0] = 1
                elif mask_type == "1":
                    pi[:, 1] = 1
                elif mask_type == "2":
                    pi[:, 2] = 1
                # elif mask_type == "3":
                #     pi[:, 3] = 1
                # elif mask_type == "4":
                #     pi[:, 4] = 1
                else:
                    print("Mask type invalid")
                pi_final = pi.cuda()

            else:
                pi = self.pi_net(context)  # (batch, 5)

                pi_hard = F.gumbel_softmax(pi,
                                           tau=self.gumbel_temp,
                                           hard=True,
                                           eps=1e-10)
                pi_soft = F.gumbel_softmax(pi,
                                           tau=self.gumbel_temp,
                                           hard=False,
                                           eps=1e-10)
                pi_final = pi_hard - pi_soft.detach() + pi_soft

            pi_final = pi_final.unsqueeze(1)  # (batch, 1, 5)
            z = torch.bmm(pi_final, zi).squeeze(
                1)  # (batch, 1, z_size) --> (batch, z_size)
            mu = torch.bmm(pi_final,
                           mus.view(batch_size, self.n_components,
                                    self.z_size))  # (batch, z_size)
            logsigma = torch.bmm(
                pi_final,
                logsigmas.view(batch_size, self.n_components,
                               self.z_size))  # (batch, z_size)

        else:
            # mu = self.candidates_mu[force_choice](context)  # (batch, z_size)
            # logsigma = self.candidates_sigma[force_choice](context)  # (batch, z_size)
            # std = torch.exp(0.5 * logsigma)  # (batch, z_size)
            # epsilon = to_tensor(torch.randn([batch_size, self.z_size]))
            # z = epsilon * std + mu  # (batch, z_size)
            # import pdb
            # pdb.set_trace()
            sentiment_mask = sentiment_mask.unsqueeze(1)
            z = torch.bmm(sentiment_mask.float(), zi).squeeze(1)
            mu = torch.bmm(
                sentiment_mask.float(),
                mus.view(batch_size, self.n_components, self.z_size))
            logsigma = torch.bmm(
                sentiment_mask.float(),
                logsigmas.view(batch_size, self.n_components,
                               self.z_size))  # (batch, z_size)

        return z, mu, logsigma, pi, pi_final