コード例 #1
0
def evaluate(data_source):
    model.eval()
    total_loss = 0
    total_kld = 0
    count = 0
    truth_res = []
    pred_res = []
    for batch in data_source:
        data, label = batch.text, batch.label
        data, label = data.cuda(device_id), label.cuda(device_id)
        label.data.sub_(2)
        truth_res += list(label.data)
        args.batch_size = data.size(1)
        model.decoder.bsz = args.batch_size
        model.encoder.bsz = data.size(1)
        model.label.bsz = data.size(1)
        out_ix = data[1:, :].contiguous().view(-1)
        row = range(args.batch_size)
        label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id),
                           requires_grad=False)
        label_2[row, label] = 1
        recon_batch, z, fake_label = model(data[:-1, :])
        _, pred_label = torch.max(fake_label, 1)
        pred_res += list(pred_label.data)
        count += 1
    acc = get_accuracy(truth_res, pred_res)
    print(' acc :%g ' % (acc))
    return acc
コード例 #2
0
def train():
    model.train()
    total_loss = 0
    start_time = time.time()
    model.decoder.bsz = args.batch_size
    truth_res = []
    pred_res = []
    count = 0.0
    iterator = zip(unsup_data, itertools.cycle(train_data))
    for (unbatch, lbatch) in iterator:
        data, label = lbatch.text, lbatch.label
        undata = unbatch.text
        undata = undata.cuda(device_id)
        data, label = data.cuda(device_id), label.cuda(device_id)
        data.volatile = False
        label.volatile = False
        label.data.sub_(2)
        truth_res += list(label.data)
        args.batch_size = data.size(1)
        model.decoder.bsz = args.batch_size
        seq_len = data.size(0) - 1
        out_ix = data[1:, :].contiguous().view(-1)
        unout_ix = undata[1:, :].contiguous().view(-1)
        row = range(args.batch_size)
        label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id),
                           requires_grad=False)
        label_2[row, label] = 1
        model.zero_grad()
        recon_batch, mu, logvar, fake_label = model(data[:-1, :], label_2)
        BCE, KLD = loss_function(recon_batch, out_ix, mu, logvar)
        label_loss = loss_label(fake_label, label_2)
        loss = label_loss + BCE + KLD

        model.decoder.bsz = undata.size(1)
        recon_batch, mu, logvar, _ = model(undata[:-1, :])
        unBCE, unKLD = loss_function(recon_batch, unout_ix, mu, logvar)
        loss += unBCE + unKLD
        if args.model == "bvae":
            noise_loss = model.noise_loss(lr, alpha)
            noise_loss /= args.bptt * len(train_data)
            loss += noise_loss
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()
        count += 1
        total_loss += loss.data
        _, pred_label = torch.max(torch.exp(fake_label), 1)
        pred_res += list(pred_label.data)
        if count % args.log_interval == 0 and count > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | lr {:5.5f} | ms/batch {:5.2f} | '
                  'loss {:5.2f}  | kld {:5.9f}'.format(
                      epoch, lr, elapsed * 1000 / args.log_interval, cur_loss,
                      KLD.data.item()))
            total_loss = 0
            start_time = time.time()
    print('epoch: %d done!\n acc:%g' %
          (epoch, get_accuracy(truth_res, pred_res)))
コード例 #3
0
def get_nll_lm_char(model, en_msg, word2idx, idx2word, nhid=800):
    # msg : (batch_size)
    with torch.no_grad():
        batch_size = len(en_msg)
        num_chars = cuda(torch.FloatTensor([len(msg) for msg in en_msg]))
        #assert (min([len(msg) for msg in en_msg]) > 1)
        #msgs = [msg.split() + ["<eos>"] for msg in msgs]
        msgs_idx = [[
            word2idx[tok] if tok in word2idx else word2idx["<unk>"]
            for tok in msg
        ] for msg in en_msg]
        msgs_len = [len(msg) for msg in msgs_idx]
        msgs_idx = [
            np.lib.pad(msg, (0, max(msgs_len) - ln),
                       'constant',
                       constant_values=(0, 0))
            for (msg, ln) in zip(msgs_idx, msgs_len)
        ]
        data = cuda(torch.LongTensor(msgs_idx)).t()  # (seq_len, batch_size)
        seq_len = data.size(0)

        input, target = data[:-1, :].contiguous(), data[1:, :].contiguous()
        hidden = ( cuda( torch.FloatTensor( 2, batch_size, nhid ).zero_() ), \
                   cuda( torch.FloatTensor( 2, batch_size, nhid ).zero_() ) )
        output, _ = lm_forward(model, input, hidden)
        # output : (seq_len-1, batch_size, voc_size)
        logits = output.contiguous().view(-1, output.size()[-1])
        nll = F.cross_entropy(logits,
                              target.view(-1),
                              ignore_index=0,
                              reduce=False)  # (batch_size, en_msg_len)
        nll = nll.view(
            -1, data.size(1)).t().contiguous()  # (seq_len-1, batch_size)
        nll = nll.sum(dim=-1) / num_chars  # (batch_size)
        return nll
コード例 #4
0
def evaluate(data_source):
    model.eval()
    total_loss = 0
    total_kld = 0
    count = 0
    truth_res = []
    pred_res = []
    for batch in data_source:
        data, label = batch.text, batch.label
        data, label = data.cuda(device_id), label.cuda(device_id)
        label.data.sub_(2)

        truth_res += list(label.data)
        args.batch_size = data.size(1)
        model.decoder.bsz = args.batch_size
        seq_len = data.size(0) - 1
        out_ix = data[1:, :].contiguous().view(-1)
        row = range(args.batch_size)
        label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id),
                           requires_grad=False)
        label_2[row, label] = 1
        recon_batch, mu, logvar, fake_label = model(data[:-1, :], label_2)
        BCE, KLD = loss_function(recon_batch, out_ix, mu, logvar)
        loss = BCE + KLD
        _, pred_label = torch.max(fake_label, 1)
        pred_res += list(pred_label.data)
        total_loss += loss.data.item()
        total_kld += KLD.data.item()
        count += 1
    avg = total_loss / count
    avg_kld = total_kld / count
    acc = get_accuracy(truth_res, pred_res)
    print(' acc :%g avg_loss:%g kld:%g' % (acc, avg, avg_kld))
    return acc
コード例 #5
0
def eval_epoch(model, data_iter, criterion):
    total_loss = 0.
    total_words = 0.
    for (data, target) in data_iter:  #tqdm(
        #data_iter, mininterval=2, desc=' - Training', leave=False):
        data = Variable(data, volatile=True)
        target = Variable(target, volatile=True)
        if opt.cuda:
            data, target = data.cuda(), target.cuda()
        target = target.contiguous().view(-1)
        pred = model.forward(data)
        loss = criterion(pred, target)
        total_loss += loss.data.item()
        total_words += data.size(0) * data.size(1)
    data_iter.reset()
    return math.exp(total_loss / total_words)
コード例 #6
0
    def train_step(self, optimizer, start_time):

        accuracies = torch.zeros(self.log_interval)
        total_loss = 0

        for i, batch in enumerate(self.train_iterator):
            #CLEARING HISTORY
            optimizer.zero_grad

            #GETTING TENSORS
            data, targets = batch.text, batch.label.view(-1)
            targets = targets - 1 #from zero to one
            data, lengths = data[0], data[1]

            #CONVERTING TO CUDA IF ON NEEDED
            if self.cuda:
                data = data.cuda()
                targets = targets.cuda()
                lengths = lengths.cuda()

            if data.size(0) == self.batch_size:
                #GETTING PREDICTIONS
                output, h, A = self.model(data, lengths = lengths)
                predictions = output.view(-1, self.num_classes)

                #GET ACCURACY
                preds = torch.max(predictions, dim = 1)[1]
                pct_correct = float(torch.sum(targets == preds)[0].data[0]/predictions.size(0))
                accuracies[i % self.log_interval] = pct_correct

                if self.weight_saving:
                    #SAVING ATTENTION WEIGHTS
                    self.save_weights(i, data, A, h, preds, targets, 'train')

                #CALCULATING AND PROPAGATING LOSS
                loss = self.objective(predictions, targets)
                loss.backward()

                if self.clip is not None:
                    torch.nn.utils.clip_grad_norm(self.model.parameters(), self.clip)
                if self.optim in ['adam', 'SGD']:
                    optimizer.step()
                elif self.optim == 'vanilla_grad':
                    parameters = filter(lambda p: p.requires_grad, self.model.parameters())
                    for p in parameters:
                        p.data.add_(-self.lr, p.grad.data)

                total_loss += loss.data


                if i % self.log_interval == 0 and i != 0:
                    current_accuracy = float(torch.sum(accuracies)) / float(torch.nonzero(accuracies).size(0))
                    current_loss = total_loss[0] / self.log_interval
                    total_loss = 0
                    elapsed = time() - start_time
                    accuracies = torch.zeros(self.log_interval)
                    print('At time: {elapsed} accuracy is {current_accuracy} and loss is {loss}'\
                            .format(elapsed=elapsed, current_accuracy = current_accuracy, loss = current_loss))

        return optimizer
コード例 #7
0
def batchify(data, bsz):
    data = TEXT.numericalize([data.examples[0].text])
    # Divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
コード例 #8
0
    def make_std_mask(data, pad):
        """
        Create a mask to hide padding and future words.
        """
        data_mask = (data != pad).unsqueeze(-2)
        data_mask = data_mask & Variable(
            subsequent_mask(data.size(-1)).type_as(data_mask.data))

        return data_mask
コード例 #9
0
def evaluate(model, data):
    model.eval()
    total_loss = 0.
    it = iter(data)
    total_count = 0.
    with torch.no_grad():

        hidden = model.init_hidden(BATCH_SIZE, requires_grad=False)
        for i, batch in enumerate(it):
            data, target = batch.text, batch.target
            if USE_CUDA:
                data, target = data.cuda(), target.cuda()
            hidden = repackage_hidden(hidden)
            with torch.no_grad():
                output, hidden = model(data, hidden)
            loss = loss_fn(output.view(-1, VOCAB_SIZE), target.view(-1))
            total_count += np.multiply(*data.size())
            total_loss += loss.item() * np.multiply(*data.size())

    loss = total_loss / total_count
    model.train()
    return loss
コード例 #10
0
def train_generator(model, data_iter, criterion, optimizer):
    total_loss = 0.
    total_words = 0.
    if not opt.server:
        data_iter = tqdm(data_iter,
                         mininterval=2,
                         desc=' - Generator Training',
                         leave=False)
    for each in data_iter:
        data, target = each.text[:, :-1], each.text[:, 1:]
        if opt.cuda:
            data, target = data.cuda(), target.cuda()
        target = target.contiguous().view(-1)
        pred = model.forward(data)
        if len(pred.shape) > 2:
            pred = torch.reshape(pred, (pred.shape[0] * pred.shape[1], -1))
        loss = criterion(pred, target)
        total_loss += loss.data.item()
        total_words += data.size(0) * data.size(1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return math.exp(total_loss / total_words)
コード例 #11
0
def evaluate(data_source):
    model.eval()
    total_loss = 0
    total_kld = 0
    count = 0
    truth_res = []
    pred_res = []
    pred = []
    for batch in data_source:
        data, label = batch.text, batch.label
        data, label = data.cuda(device_id), label.cuda(device_id)
        label.data.sub_(2)

        truth_res += list(label.data)
        args.batch_size = data.size(1)
        model.decoder.bsz = args.batch_size
        seq_len = data.size(0) - 1
        out_ix = data[1:, :].contiguous().view(-1)
        row = range(args.batch_size)
        label_2 = Variable(torch.zeros(args.batch_size, 2).cuda(device_id),
                           requires_grad=False)
        label_2[row, label] = 1
        if args.model == 'bae' or args.model == 'baeg':
            model.encoder.bsz = data.size(1)
            model.label.bsz = data.size(1)
            recon_batch, z, fake_label = model(data[:-1, :])
        else:
            recon_batch, mu, logvar, fake_label = model(data[:-1, :], label_2)

        _, pred_label = torch.max(fake_label, 1)
        pred_res += list(pred_label.data)
        pred.append(fake_label)
        count += 1
    pred = torch.cat(pred, 0)
    acc = get_accuracy(truth_res, pred_res)
    print(' acc :%g' % (acc))
    return pred, truth_res
コード例 #12
0
ファイル: IO.py プロジェクト: fenfei/OpenNMT-py
def make_contexts(batch, window_size=5, word_padding_idx=0, data_type='text'):
    """
        Args:
                batch (Variable): a batch of source or target data.
                data_type (str): type of the source input.
                        Options are [text|img|audio].
        Returns:
                A sequence of context tensors of size (len x batch).
        """
    #def get_context(data):
    #"""
    #Args:
    #data (Tensor): input tensor of size (len x batch).
    #Returns:
    #An iterable of context tensors of size (len x batch).
    #"""
    #padding = [word_padding_idx] * data.size()[1]
    #L = data.size()[0]
    #if L
    #for c in range(1, window_size + 1):
    #yield torch.cat([Variable(torch.LongTensor([padding] * c).cuda()), data[:-c]], 0)
    #for c in range(1, window_size + 1):
    #yield torch.cat([data[c:], Variable(torch.LongTensor([padding] * c).cuda())], 0)

    side = 'src'
    if isinstance(batch.__dict__[side], tuple):
        data = batch.__dict__[side][0]
    else:
        data = batch.__dict__[side]

    if data_type == 'text':
        L, B = data.size()
        pad = [[word_padding_idx] * B] * window_size
        pdata = torch.cat([
            Variable(torch.LongTensor(pad).cuda()), data,
            Variable(torch.LongTensor(pad).cuda())
        ], 0).transpose(1, 0)
        return torch.cat([
            pdata[:,
                  list(range(i, i + window_size)) +
                  list(range(i + window_size, i +
                             2 * window_size))].unsqueeze(0) for i in range(L)
        ], 0)
    else:
        return data
コード例 #13
0
def evaluate_twin(model, data_iter, loss_function):
    model.eval()

    loss_meter = meter.AverageValueMeter()
    loss_meter.reset()

    for batch in tqdm.tqdm(data_iter):
        data = batch.text
        model.batch_size = data.size(1)
        hidden = model.init_hidden()
        if opt.use_gpu:
            data = data.cuda()
        input_, target = Variable(data[:-1, :]), Variable(data[1:, :])
        output = model.work(input_, hidden)

        loss = loss_function(output[0], target.view(-1))
        loss_meter.add(loss.item())

    return loss_meter.value()[0]
コード例 #14
0
    def evaluate(self):
        self.model.eval()
        i = 0
        accuracies = torch.zeros(len(self.test_iterator))
        total_loss = 0
        for i, batch in enumerate(self.test_iterator):
            #GETTING TENSORS
            data, targets = batch.text, batch.label.view(-1)
            data, lengths = data[0], data[1]
            targets = targets - 1

            #CONVERTING TO CUDA IF ON NEEDED
            if self.cuda:
                data = data.cuda()
                targets = targets.cuda()
                lengths = lengths.cuda()

            if data.size(0) == self.batch_size:

                #GETTING PREDICTIONS
                output, h, A = self.model(data, lengths = lengths)
                predictions = output.view(-1, self.num_classes)

                #GET ACCURACY
                preds = torch.max(predictions, dim = 1)[1]
                pct_correct = float(torch.sum(targets == preds)[0].data[0]/predictions.size(0))
                accuracies[i] = pct_correct

                if self.weight_saving:
                    #SAVING ATTENTION WEIGHTS
                    self.save_weights(i, data, A, h, preds, targets, "test")

                #CALCULATING LOSS
                loss = self.objective(predictions, targets)
                total_loss += loss.data


        self.eval_accuracy = float(torch.sum(accuracies)) / float(torch.nonzero(accuracies).size(0))
        print('Done Evaluating: Achieved accuracy of {}'
                .format(self.eval_accuracy))
コード例 #15
0
    def word_dropout(self, inputs):
        """
        Do word dropout: with prob `p_word_dropout`, set the word to '<unk>'.
        """
        if isinstance(inputs, Variable):
            data = inputs.data.clone()
        else:
            data = inputs.clone()

        # Sample masks: elems with val 1 will be set to <unk>
        mask = torch.from_numpy(
            np.random.binomial(1,
                               p=self.p_word_dropout,
                               size=tuple(data.size())).astype('uint8'))

        if self.gpu:
            mask = mask.cuda()

        # Set to <unk>
        data[mask] = self.UNK_IDX

        return Variable(data)
コード例 #16
0
def train_twin(**kwargs):

    for k, v in kwargs.items():
        setattr(opt, k, v)
        # setattr(object, name, value) 设置属性值

    vis = Visualizer(env=opt.env)  # 设置visdom的环境变量

    # 获取数据
    train_iter, valid_iter, test_iter, field = load_data()
    word2ix = field.vocab.stoi
    ix2word = field.vocab.itos
    # 模型定义
    model = lstm_twin(len(word2ix), 300, 150)

    best_model = model
    best_valid_loss = float("inf")

    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    scheduler = t.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       'min',
                                                       min_lr=1e-5)
    # CrossEntropyLoss 会把每个字符的损失求平均,所以损失是个10以内的数,如果加上size_average = False, 就变成一个10000以内的
    # 数了,正好差不多2000倍吧,如果想以每句话为单位,那么就乘上seq_len
    criterion = nn.CrossEntropyLoss()

    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.use_gpu:
        model.cuda()
        criterion.cuda()

    count = 0
    for epoch in range(opt.epoch):
        model.train()
        logging.info("这是第{0}次epoch".format(count + 1))
        cnt = 0

        b_fwd_loss, b_bwd_loss, b_twin_loss, b_all_loss = 0., 0., 0., 0.

        for batch in tqdm.tqdm(
                train_iter
        ):  # tqdm是一个python进度条库,可以封装iterator,it/s表示的就是每秒迭代了多少次
            # 训练
            data = batch.text
            seq_len = data.size(0)
            # 生成一个倒着的序列,因为tensor不支持负步长
            idx = np.arange(seq_len)[::-1].tolist()
            idx = t.LongTensor(idx)
            idx = Variable(idx).cuda()
            model.batch_size = data.size(1)
            hidden1 = model.init_hidden()
            hidden2 = model.init_hidden()
            if opt.use_gpu: data = data.cuda()
            optimizer.zero_grad()

            # 输入和目标错开,CharRNN的做法
            f_input, f_target = Variable(data[:-1, :]), Variable(data[1:, :])
            bx = data.index_select(0, idx)
            b_input, b_target = Variable(bx[:-1, :]), Variable(bx[1:, :])
            # print(f_input.size(),b_input.size())
            f_out, b_out, f_h, b_h = model(f_input, b_input, hidden1, hidden2)

            f_loss = criterion(f_out, f_target.view(-1))
            b_loss = criterion(b_out, b_target.view(-1))
            b_h_inv = b_h.index_select(0, idx[1:])
            b_h_inv = b_h_inv[1:]  #将<sos>去除
            # print(f_h.size(), b_h_inv.size())
            b_h_inv = b_h_inv.detach()
            f_h = f_h[:-1]  #将<eos>去掉
            twin_loss = ((f_h - b_h_inv)**2).mean()
            twin_loss *= 1.5
            all_loss = f_loss + b_loss + twin_loss
            all_loss.backward()

            t.nn.utils.clip_grad_norm(model.parameters(), 5.)
            optimizer.step()

            # 累加
            b_all_loss += all_loss.item()
            b_fwd_loss += f_loss.item()
            b_bwd_loss += b_loss.item()
            b_twin_loss += twin_loss.item()

            # 可视化
            if (1 + cnt) % opt.plot_every == 0:
                vis.plot('all_loss', b_all_loss / opt.plot_every)
                vis.plot('twin_loss', b_twin_loss / opt.plot_every)
                vis.plot('loss', b_fwd_loss / opt.plot_every)
                # logging.info("训练第{}个plot的all_loss:{:f}, f_loss: {:f}, b_loss: {:f}, twin_loss: {:f}"
                #              .format(int((cnt + 1) / opt.plot_every), b_all_loss / opt.plot_every,
                #                      b_fwd_loss / opt.plot_every,
                #                      b_bwd_loss / opt.plot_every, b_twin_loss / opt.plot_every))

                b_fwd_loss, b_bwd_loss, b_twin_loss, b_all_loss = 0., 0., 0., 0.

            cnt += 1
        count += 1

        valid_loss = evaluate_twin(model, valid_iter, criterion)
        scheduler.step(valid_loss)
        logging.info("第%d次验证集的loss为: %f" % (count, valid_loss))
        if valid_loss < best_valid_loss:
            # os.system('rm ' + opt.model_prefix +opt.model + '.pth')
            best_valid_loss = valid_loss
            best_model = model
            t.save(best_model.state_dict(),
                   '%s%s_%d.pth' % (opt.model_prefix, opt.model, count))

        test_loss = evaluate_twin(best_model, test_iter, criterion)
        logging.info("测试集的loss为: %f" % test_loss)

        # 学习率减半
        if epoch in [5, 10, 15]:
            for param_group in optimizer.param_groups:
                lr = param_group['lr']
                lr *= 0.5
                param_group['lr'] = lr
コード例 #17
0
def Bleu(**kwargs):

    for k, v in kwargs.items():
        setattr(opt, k, v)
        # setattr(object, name, value) 设置属性值
    print('Loading model from {}'.format(opt.model_path))
    # 加载词典
    if os.path.exists(opt.pickle_path):
        data = np.load(opt.pickle_path)
        word2ix, ix2word = data['word2ix'].item(), data['ix2word']
    else:
        train_iter, valid_iter, test_iter, field = load_data()
        word2ix = field.vocab.stoi
        ix2word = field.vocab.itos
    # 加载模型
    if opt.model == 'lstm':
        model = lstm(len(word2ix), 300, 150)
    elif opt.model == 'lstm_twin':
        model = lstm_twin(len(word2ix), 300, 150)

    map_location = lambda s, l: s
    state_dict = t.load(opt.model_path, map_location=map_location)
    model.load_state_dict(state_dict)
    if opt.use_gpu:
        model.cuda()
    print("加载完毕")

    # model.eval()
    hypothesis = []
    references = []
    cnt = 0
    for batch in tqdm.tqdm(test_iter):
        cnt += 1
        # batch = next(iter(test_iter))
        data = batch.text
        if opt.model == 'lstm_twin':
            model.batch_size = data.size(1)
            hidden = model.init_hidden()
        if opt.use_gpu:
            data = data.cuda()
        input_, target = Variable(data[:-1, :]), Variable(data[1:, :])
        tmp = target.transpose(0, 1).cpu().numpy()
        # print(tmp)
        print('===========输入==========')
        for ii in tmp:
            ii_ = list(ii)
            for i in ii_:
                print(ix2word[i], end='')
            print('')
            ii_ = ii_[:ii_.index(3) + 1]
            references.append([ii_])

        print('===========输出==========')
        # print(references)

        if opt.model == 'lstm':
            output, _ = model(input_)
            output = output.view(data.size(0) - 1, data.size(1), -1)
        elif opt.model == 'lstm_twin':
            output = model.work(input_, hidden)
            output = output[0].view(data.size(0) - 1, data.size(1), -1)

        # print(output.size())
        top = output.topk(1, dim=2)[1].squeeze().transpose(0, 1)
        top = top.cpu().numpy()
        for ii in top:
            ii_ = list(ii)
            for i in ii_:
                print(ix2word[i], end='')
            print('')
            haha = ii_.index(3) if 3 in ii_ else None
            if (haha):
                ii_ = ii_[:haha + 1]
            hypothesis.append(ii_)

        # if cnt > 10:
        #     break

        # print(hypothesis)
    bleu1 = corpus_bleu(references, hypothesis, weights=(1, 0, 0, 0))
    bleu2 = corpus_bleu(references,
                        hypothesis,
                        weights=(1. / 2., 1. / 2., 0, 0))
    bleu3 = corpus_bleu(references,
                        hypothesis,
                        weights=(1. / 3., 1. / 3., 1. / 3., 0))
    bleu4 = corpus_bleu(references, hypothesis)
    print("bleu1: ", bleu1, "bleu2: ", bleu2, "bleu3: ", bleu3, "bleu4: ",
          bleu4)
コード例 #18
0
def train():
    model.train()
    total_loss = 0
    start_time = time.time()
    model.decoder.bsz = args.batch_size
    truth_res = []
    pred_res = []
    count = 0.0
    iterator = zip(unsup_data, itertools.cycle(train_data))
    for (unbatch, lbatch) in iterator:
        data, label = lbatch.text, lbatch.label
        undata = unbatch.text
        undata = undata.cuda(device_id)
        data, label = data.cuda(device_id), label.cuda(device_id)
        data.volatile = False
        label.volatile = False
        label.data.sub_(2)

        truth_res += list(label.data)
        args.bptt = (data.size(0) + undata.size(0)) / 2
        out_ix = data[1:, :].contiguous().view(-1)
        unout_ix = undata[1:, :].contiguous().view(-1)
        row = range(data.size(1))
        label_2 = Variable(torch.zeros(data.size(1), 2).cuda(device_id),
                           requires_grad=False)
        label_2[row, label] = 1
        model.zero_grad()
        for j in range(J):
            if j == 0:
                model.zero_grad()
                model.decoder.bsz = data.size(1)
                model.encoder.bsz = data.size(1)
                model.label.bsz = data.size(1)
                recon_batch, z, fake_label = model(data[:-1, :])
                model.decoder.bsz = undata.size(1)
                model.encoder.bsz = undata.size(1)
                model.label.bsz = undata.size(1)
                unrecon_batch, unz, _ = model(undata[:-1, :])
                z_sample = Variable(z.data, requires_grad=True)
                z_optimizer = z_opt(z_sample)
                z_optimizer.zero_grad()
                unz_sample = Variable(unz.data, requires_grad=True)
                unz_optimizer = z_opt(unz_sample)
                unz_optimizer.zero_grad()
            else:
                model.zero_grad()
                emb = model.embed(data[:-1, :])
                model.decoder.bsz = data.size(1)
                model.label.bsz = data.size(1)
                fake_label = model.label(emb, z_sample)
                recon_batch = model.decoder(emb, z_sample)
                model.decoder.bsz = undata.size(1)
                model.label.bsz = undata.size(1)
                unemb = model.embed(undata[:-1, :])
                unrecon_batch = model.decoder(unemb, unz_sample)

            BCE = loss_function(recon_batch, out_ix)
            unBCE = loss_function(unrecon_batch, unout_ix)
            label_loss = loss_label(fake_label, label_2)
            noise_loss = model.noise_loss(lr, alpha)
            noise_loss /= args.bptt * len(train_data)
            prior_loss_z = z_prior_loss(z_sample)
            noise_loss_z = z_noise_loss(z_sample)
            prior_loss_z /= args.bptt * len(train_data)
            noise_loss_z /= args.bptt * len(train_data)
            unprior_loss_z = z_prior_loss(unz_sample)
            unnoise_loss_z = z_noise_loss(unz_sample)
            unprior_loss_z /= args.bptt * len(train_data)
            unnoise_loss_z /= args.bptt * len(train_data)
            loss = BCE + unBCE + label_loss + noise_loss + prior_loss_z + noise_loss_z + unprior_loss_z + unnoise_loss_z
            if j > burnin + 1:
                loss_en = en_loss(z_sample, z)
                unloss_en = en_loss(unz_sample, unz)
                loss += loss_en + unloss_en
            if j % 2 == 0:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
                optimizer.step()
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
                z_optimizer.step()
                unz_optimizer.step()
        count += 1

        total_loss += label_loss.data + BCE.data + unBCE.data
        _, pred_label = torch.max(torch.exp(fake_label), 1)
        pred_res += list(pred_label.data)
        if count % args.log_interval == 0 and count > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | lr {:5.5f} | ms/batch {:5.2f} | '
                  'loss {:5.2f}  '.format(epoch, lr,
                                          elapsed * 1000 / args.log_interval,
                                          cur_loss))
            total_loss = 0
            start_time = time.time()
    print('epoch: %d done!\n acc:%g' %
          (epoch, get_accuracy(truth_res, pred_res)))