def check_generic(comm, length, bs):
    assert bs > 0
    assert length > 0
    a = list(range(comm.rank, length, comm.size))
    b = list(range(comm.rank, length, comm.size))
    c = list(range(comm.rank, length, comm.size))

    model = ExampleModel()
    dataset = TupleDataset(a, b, c)
    iterator = SerialIterator(dataset, bs, shuffle=False, repeat=False)
    evaluator = GenericMultiNodeEvaluator(comm, iterator, model)

    results = evaluator(None)

    # Make expected answer
    iterator.reset()
    s = [
        [
            aa + bb + cc  # Same calculation as model
            for aa, bb, cc in batch
        ] for batch in iterator
    ]
    s = comm.gather_obj(s)

    if comm.rank == 0:
        # flatten list of lists gathered
        expected = []
        for e in zip(*s):
            expected.extend(e)

        for e, r in zip(expected, results):
            chainer.testing.assert_allclose(e, r)

    else:
        assert results is None
Exemple #2
0
class UnpairedIterator(iterator.Iterator):
    """An iterator for unpaired dataset which wraps two SerialIterator.
    """
    def __init__(self, dataset1, dataset2, batch_size, repeat=True):
        if len(dataset2) < len(dataset1):
            self._main_iter = SerialIterator(dataset1,
                                             batch_size=batch_size,
                                             repeat=repeat,
                                             shuffle=True)
            self._sub_iter = SerialIterator(dataset2,
                                            batch_size=batch_size,
                                            repeat=True,
                                            shuffle=True)
            self._rev = False
        else:
            self._main_iter = SerialIterator(dataset2,
                                             batch_size=batch_size,
                                             repeat=repeat,
                                             shuffle=True)
            self._sub_iter = SerialIterator(dataset1,
                                            batch_size=batch_size,
                                            repeat=True,
                                            shuffle=True)
            self._rev = True

    def __next__(self):
        if self._rev:
            return [
                x for x in zip(self._sub_iter.next(), self._main_iter.next())
            ]
        else:
            return [
                x for x in zip(self._main_iter.next(), self._sub_iter.next())
            ]

    next = __next__

    @property
    def epoch(self):
        return self._main_iter.epoch

    @property
    def epoch_detail(self):
        return self._main_iter.epoch_detail

    @property
    def previous_epoch_detail(self):
        return self._main_iter.previous_epoch_detail

    def reset(self):
        self._main_iter.reset()
        self._sub_iter.reset()

    @property
    def repeat(self):
        return self._main_iter.repeat

    @property
    def is_new_epoch(self):
        return self._main_iter.is_new_epoch
def get_trainer_and_reporter(
    trial:Trial,
    model:CbLossClassifier,
    iter_test:iterators.SerialIterator,
    iter_train:iterators.SerialIterator,
    batch_converter,
    args,
    device=0,
    best_params={}):

    if best_params != {}:# 過去の best_param 使う場合
        learning_rate = best_params['learning_rate']
    else:
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)

    grad_clipping = trial.suggest_uniform('grad_clipping',0,1.0)

    optimizer = optimizers.SGD(lr=learning_rate)
    optimizer.setup(model)
    optimizer.add_hook(optimizer_hooks.GradientClipping(threshold=grad_clipping))

    updater = training.StandardUpdater(
        iter_train,
        optimizer,
        device=device,
        converter=batch_converter
    )
    
    early_trigger = training.triggers.EarlyStoppingTrigger(
        check_trigger=(1, "epoch"),
        monitor="validation/main/accuracy",
        patients=3,
        mode="max",
        max_trigger=(args.epoch, "epoch")
    )

    trainer = training.Trainer(updater,early_trigger,out='optuna')
    trainer.extend(extensions.Evaluator(iter_test, model,device=device,converter=batch_converter))
    snapshot_writer = training.extensions.snapshot_writers.ThreadQueueWriter()
    trainer.extend(training.extensions.snapshot_object(
        target=model, 
        filename='model_{}.npz'.format(args.desc), 
        writer=snapshot_writer),trigger=(10,'epoch'))

    reporter = extensions.LogReport()
    trainer.extend(reporter)

    trainer.extend(integration.ChainerPruningExtension(
        trial,args.pruning_key,(args.pruning_trigger_epoch,'epoch')))

    iter_test.reset()
    iter_train.reset()

    return trainer,reporter
Exemple #4
0
def train():
    batchsize = 128
    max_epoch = 10
    device = 0
    train_data, test_data = mnist.get_mnist(withlabel=True, ndim=1)
    train_iter = SerialIterator(train_data, batchsize)
    test_iter = SerialIterator(test_data, batchsize, repeat=False, shuffle=False)
    model = MyNetwork()
    if chainer.cuda.available and device >= 0:
        model.to_gpu(device)
    else:
        device = -1
    optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    while train_iter.epoch < max_epoch:
        train_batch = train_iter.next()
        image_train, target_train = concat_examples(train_batch, device)
        prediction_train = model(image_train)
        loss = F.softmax_cross_entropy(prediction_train, target_train)
        model.cleargrads()
        loss.backward()
        optimizer.update()
        if train_iter.is_new_epoch:
            loss_array = float(chainer.backends.cuda.to_cpu(loss.array))
            print("epoch{:2d} train_loss:{:.04f}".format(train_iter.epoch, loss_array))
            test_losses = []
            test_accs = []
            while True:
                test_batch = test_iter.next()
                image_test, target_test = concat_examples(test_batch, device)
                prediction_test = model(image_test)
                loss_test = F.softmax_cross_entropy(prediction_test, target_test)
                test_losses.append(chainer.backends.cuda.to_cpu(loss_test.array))
                acc = F.accuracy(prediction_test, target_test)
                test_accs.append(chainer.backends.cuda.to_cpu(acc.array))
                if test_iter.is_new_epoch:
                    test_iter.reset()
                    break
            mean_loss = np.mean(test_losses)
            mean_acc = np.mean(test_accs)
            print("val_loss:{:.04f} val_accuracy:{:.04f}".format(mean_loss, mean_acc))

    chainer.serializers.save_npz("model.npz", model)
def check_custom(comm, length, bs):
    assert bs > 0
    assert length > 0
    a = list(range(comm.rank, length, comm.size))
    b = list(range(comm.rank, length, comm.size))
    c = list(range(comm.rank, length, comm.size))

    model = ExampleModel()
    dataset = TupleDataset(a, b, c)
    iterator = SerialIterator(dataset, bs, shuffle=False, repeat=False)
    evaluator = CustomMultiNodeEvaluator(comm, iterator, model)

    result = evaluator(None)

    iterator.reset()
    expected = comm.allreduce_obj(sum(2 for batch in iterator))

    if comm.rank == 0:
        assert expected == result

    else:
        assert result is None
optimizer.setup(net)
for param in net.params():
    if param.name != 'b':  # バイアス以外だったら
        param.update_rule.add_hook(WeightDecay(0.0001))  # 重み減衰

## ネットワークを訓練
gpu_id = 0  # 仕様するGPU番号
n_batch = 64  # バッチサイズ
n_epoch = 50  # エポック数
### ネットワークをGPUメモリ上に転送
net.to_gpu(gpu_id)
### ログ
results_train, results_valid = {}, {}
results_train['loss'], results_train['accuracy'] = [], []
results_valid['loss'], results_valid['accuracy'] = [], []
train_iter.reset()  # 上で一度next()が呼ばれているため
count = 1
for epoch in range(n_epoch):
    while True:
        #### ミニバッチの取得
        train_batch = train_iter.next()
        #### xとtに分割
        ##### データをGPUに転送するために,concat_examplesにgpu_idを渡す
        x_train, t_train = chainer.dataset.concat_examples(train_batch, gpu_id)
        ##### 予測値と目的関数を計算
        y_train = net(x_train)
        loss_train = F.softmax_cross_entropy(y_train, t_train)
        acc_train = F.accuracy(y_train, t_train)
        ##### 勾配の初期化と勾配の計算
        net.cleargrads()
        loss_train.backward()
Exemple #7
0
def train(batch_size, epoch_count, lamda, datasetA_folder_path,
          datasetB_folder_path, output_path):
    dataset_A = data_io.dataset_load(datasetA_folder_path)
    train_iter_A = SerialIterator(dataset_A,
                                  batch_size,
                                  repeat=True,
                                  shuffle=True)
    dataset_B = data_io.dataset_load(datasetB_folder_path)
    train_iter_B = SerialIterator(dataset_B,
                                  batch_size,
                                  repeat=True,
                                  shuffle=True)

    g_ab = Generator()
    g_ba = Generator()
    d_a = Discriminator()
    d_b = Discriminator()

    g_ab.to_gpu(0)
    g_ba.to_gpu(0)
    d_a.to_gpu(0)
    d_b.to_gpu(0)

    opt_g_ab = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5)
    opt_g_ab.setup(g_ab)
    opt_g_ba = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5)
    opt_g_ba.setup(g_ba)
    opt_d_a = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5)
    opt_d_a.setup(d_a)
    opt_d_b = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5)
    opt_d_b.setup(d_b)

    iteration = 0
    train_iter_A.reset()
    train_iter_B.reset()

    log_list = []
    image_path = output_path + "image/"
    disA_model_path = output_path + "dis_A/"
    disB_model_path = output_path + "dis_B/"
    genAB_model_path = output_path + "gen_AB/"
    genBA_model_path = output_path + "gen_BA/"
    os.mkdir(output_path)
    os.mkdir(image_path)
    os.mkdir(disA_model_path)
    os.mkdir(disB_model_path)
    os.mkdir(genAB_model_path)
    os.mkdir(genBA_model_path)

    for epoch in range(epoch_count):
        d_a_loss_list = []
        d_b_loss_list = []
        g_AB_loss_list = []
        g_BA_loss_list = []
        while True:
            mini_batch_images_A = train_iter_A.next()
            mini_batch_images_A = np.array(mini_batch_images_A)
            mini_batch_images_A = (mini_batch_images_A - 128.0) / 128.0
            real_a = Variable(np.array(mini_batch_images_A))
            real_a.to_gpu(0)

            mini_batch_images_B = train_iter_B.next()
            mini_batch_images_B = np.array(mini_batch_images_B)
            mini_batch_images_B = (mini_batch_images_B - 128.0) / 128.0
            real_b = Variable(np.array(mini_batch_images_B))
            real_b.to_gpu(0)

            fake_b = g_ab(real_a)
            fake_a = g_ba(real_b)

            reconstr_a = g_ba(fake_b)
            reconstr_b = g_ab(fake_a)

            d_a_real_result = d_a(real_a)
            d_a_fake_result = d_a(fake_a)
            loss_d_a = loss_dis(batch_size, d_a_real_result, d_a_fake_result)

            d_b_real_result = d_b(real_b)
            d_b_fake_result = d_b(fake_b)
            loss_d_b = loss_dis(batch_size, d_b_real_result, d_b_fake_result)

            d_a.cleargrads()
            loss_d_a.backward()
            opt_d_a.update()

            d_b.cleargrads()
            loss_d_b.backward()
            opt_d_b.update()
            """generatorのloss計算"""
            loss_g_ab = loss_gen(batch_size, d_b_fake_result, real_a,
                                 reconstr_a, lamda)
            loss_g_ba = loss_gen(batch_size, d_a_fake_result, real_b,
                                 reconstr_b, lamda)

            g_ab.cleargrads()
            loss_g_ab.backward()
            opt_g_ab.update()

            g_ba.cleargrads()
            loss_g_ba.backward()
            opt_g_ba.update()

            loss_d_a.to_cpu()
            loss_d_b.to_cpu()
            loss_g_ab.to_cpu()
            loss_g_ba.to_cpu()

            iteration += batch_size
            d_a_loss_list.append(loss_d_a.array)
            d_b_loss_list.append(loss_d_b.array)
            g_AB_loss_list.append(loss_g_ab.array)
            g_BA_loss_list.append(loss_g_ba.array)

            if train_iter_A.is_new_epoch or train_iter_B.is_new_epoch:
                break

        real_a.to_cpu()
        fake_b.to_cpu()
        reconstr_a.to_cpu()
        real_b.to_cpu()
        fake_a.to_cpu()
        reconstr_b.to_cpu()
        real_a_images = real_a.array.transpose(0, 2, 3, 1)
        fake_b_images = fake_b.array.transpose(0, 2, 3, 1)
        reconstr_a_images = reconstr_a.array.transpose(0, 2, 3, 1)
        real_b_images = real_b.array.transpose(0, 2, 3, 1)
        fake_a_images = fake_a.array.transpose(0, 2, 3, 1)
        reconstr_b_images = reconstr_b.array.transpose(0, 2, 3, 1)
        data_io.output_images(image_path + str(epoch), real_a_images,
                              fake_b_images, reconstr_a_images, real_b_images,
                              fake_a_images, reconstr_b_images)

        print("epoch: " + str(epoch) + ", interation: " + str(iteration) + \
            ", d_A_loss: " + str(np.mean(d_a_loss_list)) + ", d_B_loss: " + str(np.mean(d_b_loss_list)) + \
            ", g_AB_loss: " + str(np.mean(g_AB_loss_list)) + ", g_BA_loss: " + str(np.mean(g_BA_loss_list)))

        log_json = {"epoch": str(epoch), "interation": str(iteration), \
            "d_A_loss": str(np.mean(d_a_loss_list)), "d_B_loss": str(np.mean(d_b_loss_list)), \
            "g_AB_loss": str(np.mean(g_AB_loss_list)), "g_BA_loss": str(np.mean(g_BA_loss_list))}
        log_list.append(log_json)
        with open(output_path + 'log.json', 'w') as log_file:
            json.dump(log_list, log_file, indent=4)

        if (epoch % 100 == 0):
            g_ab.to_cpu()
            g_ba.to_cpu()
            d_a.to_cpu()
            d_b.to_cpu()
            save_npz(genAB_model_path + str(epoch) + '.npz', g_ab)
            save_npz(genBA_model_path + str(epoch) + '.npz', g_ba)
            save_npz(disA_model_path + str(epoch) + '.npz', d_a)
            save_npz(disB_model_path + str(epoch) + '.npz', d_b)
            g_ab.to_gpu(0)
            g_ba.to_gpu(0)
            d_a.to_gpu(0)
            d_b.to_gpu(0)

    g_ab.to_cpu()
    g_ba.to_cpu()
    d_a.to_cpu()
    d_b.to_cpu()
    save_npz(genAB_model_path + 'last.npz', g_ab)
    save_npz(genBA_model_path + 'last.npz', g_ba)
    save_npz(disA_model_path + 'last.npz', d_a)
    save_npz(disB_model_path + 'last.npz', d_b)
Exemple #8
0
def train(batch_size, epoch_count, dataset_folder_path, n_hidden, output_path):
    dataset = data_io.dataset_load(dataset_folder_path)
    train_iter = SerialIterator(dataset, batch_size, repeat=True, shuffle=True)

    gen = Generator(n_hidden=n_hidden)
    dis = Discriminator()

    gen.to_gpu(0)
    dis.to_gpu(0)

    opt_gen = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5)
    opt_gen.setup(gen)
    opt_dis = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5)
    opt_dis.setup(dis)

    iteration = 0
    train_iter.reset()

    log_list = []
    image_path = output_path + "image/"
    dis_model_path = output_path + "dis/"
    gen_model_path = output_path + "gen/"
    os.mkdir(output_path)
    os.mkdir(image_path)
    os.mkdir(dis_model_path)
    os.mkdir(gen_model_path)

    for epoch in range(epoch_count):
        d_loss_list = []
        g_loss_list = []
        while True:
            mini_batch_images = train_iter.next()
            mini_batch_images = np.array(mini_batch_images)
            mini_batch_images = (mini_batch_images - 128.0) / 128.0
            x_real = Variable(np.array(mini_batch_images))

            x_real.to_gpu(0)
            y_real = dis(x_real)

            noise = xp.random.uniform(-1,
                                      1, (batch_size, n_hidden),
                                      dtype=np.float32)
            z = Variable(noise)
            x_fake = gen(z, batch_size)
            y_fake = dis(x_fake)

            d_loss = loss_dis(batch_size, y_real, y_fake)
            g_loss = loss_gen(batch_size, y_fake)

            dis.cleargrads()
            d_loss.backward()
            opt_dis.update()

            gen.cleargrads()
            g_loss.backward()
            opt_gen.update()

            d_loss.to_cpu()
            g_loss.to_cpu()

            iteration += batch_size
            d_loss_list.append(d_loss.array)
            g_loss_list.append(g_loss.array)

            if train_iter.is_new_epoch:
                break

        x_fake.to_cpu()
        generated_images = x_fake.array
        generated_images = generated_images.transpose(0, 2, 3, 1)
        Image.fromarray(
            np.clip(generated_images[0] * 255, 0.0, 255.0).astype(
                np.uint8)).save(image_path + str(epoch) + ".png")

        print("epoch: " + str(epoch) + ", interation: " + str(iteration) +
              ", d_loss: " + str(np.mean(d_loss_list)) + ", g_loss: " +
              str(np.mean(g_loss_list)))

        log_json = {
            "epoch": str(epoch),
            "iteration": str(iteration),
            "d_loss": str(np.mean(d_loss_list)),
            "g_loss": str(np.mean(g_loss_list))
        }
        log_list.append(log_json)
        with open(output_path + 'log.json', 'w') as log_file:
            json.dump(log_list, log_file, indent=4)

        if (epoch % 100 == 0):
            dis.to_cpu()
            save_npz(dis_model_path + str(epoch) + '.npz', dis)
            gen.to_cpu()
            save_npz(gen_model_path + str(epoch) + '.npz', gen)
            gen.to_gpu(0)
            dis.to_gpu(0)

    logGraph.save_log_graph(output_path + 'log.json',
                            output_path + "lossGraph.png")
    dis.to_cpu()
    save_npz(dis_model_path + 'last.npz', dis)
    gen.to_cpu()
    save_npz(gen_model_path + 'last.npz', gen)
            f'train mean loss:  {sum_loss / train_count}, accuracy: {sum_accuracy / train_count}'
        )

        # evaluation (每轮学习之后,进行一次测试数据集的测试)
        sum_accuracy = 0
        sum_loss = 0
        # enable evaluation mode
        with configuration.using_config('train', False):
            # this is optional but can reduce computational
            with chainer.useing_config('enable_backprop', False):
                for batch in test_iter:
                    x, t = conver.concat_examples(batch, gpuid)
                    x = chainer.Variable(x)
                    t = chainer.Variable(t)
                    loss = model(x, t)
                    sum_loss += float(loss.data) * len(t.data)
                    sum_accuracy += (float(model.accuracy.data) * len(t.data))
        # 迭代器状态还原
        test_iter.reset()
        print(
            f'test mean loss: {sum_loss / test_count}, accuracy: {sum_accuracy / test_count}'
        )
        sum_accuracy = 0
        sum_loss = 0

# save the model and the optimizer (保存模型和优化参数)
print('save the model')
serializers.save_npz('{}/mlp.model'.format(outdir), model)

# 加载模型
# serializers.load_npz('{}/mlp.model'.format(outdir),model)
Exemple #10
0
def main(args):
    random.seed(0)
    np.random.seed(0)
    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        cuda.cupy.random.seed(0)

    dataset, id2ene = load_dataset(args.dataset, args.features, args.redirects)
    print(f'# of examples in dataset: {len(dataset)}')

    def batch2tensors(batch, device):
        xp = cuda.cupy if device >= 0 else np

        xf = xp.zeros((len(batch), args.n_feature), dtype='f')
        xe = xp.zeros((len(batch), args.embed_size), dtype='f')
        t = xp.zeros((len(batch), len(id2ene)), dtype='i')

        for i, item in enumerate(batch):
            for feature_id in item['feature_ids']:
                if feature_id < args.n_feature:
                    xf[i, feature_id] = 1.0

            if item['embedding']:
                xe[i] = xp.array(item['embedding'], dtype='f')

            for ene_id in item['ene_ids']:
                t[i, ene_id] = 1

        x = xp.concatenate((xf, xe), axis=1)

        return x, t

    cv_datasets = get_cross_validation_datasets(dataset, args.cv)
    ys = []
    ts = []
    for split_idx, cv_dataset in enumerate(cv_datasets):
        print(f'cross validation ({split_idx + 1}/{len(cv_datasets)})')
        train, test = cv_dataset
        train_iter = SerialIterator(train, batch_size=args.batch)
        test_iter = SerialIterator(test,
                                   batch_size=args.batch,
                                   repeat=False,
                                   shuffle=False)

        model = ENEClassifier(in_size=args.n_feature + args.embed_size,
                              hidden_size=args.hidden_size,
                              out_size=len(id2ene))

        if args.gpu >= 0:
            model.to_gpu(args.gpu)

        optimizer = optimizers.Adam()
        optimizer.setup(model)
        updater = StandardUpdater(train_iter,
                                  optimizer,
                                  converter=batch2tensors,
                                  device=args.gpu)

        trainer = Trainer(updater, (args.epoch, 'epoch'), out=args.out_dir)
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.snapshot_object(
                model, filename='epoch_{.updater.epoch}.model'))
        trainer.extend(
            extensions.Evaluator(test_iter,
                                 model,
                                 converter=batch2tensors,
                                 device=args.gpu))
        trainer.extend(
            extensions.PrintReport(
                ['epoch', 'main/loss', 'validation/main/loss',
                 'elapsed_time']))
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.run()

        test_iter.reset()
        for batch in test_iter:
            x, t = batch2tensors(batch, device=args.gpu)
            with chainer.using_config('train', False):
                y = model.predict(x)

            ys.append(y)
            ts.append(t)

    y_all = F.concat(ys, axis=0)
    t_all = F.concat(ts, axis=0)

    prediction_matrix = (y_all.data >= 0.5).astype('f')
    reference_matrix = (t_all.data == 1).astype('f')
    accuracy_matrix = prediction_matrix * reference_matrix

    eb_pred = prediction_matrix.sum(
        axis=1)  # entity-based num. of predicted classes
    eb_ref = reference_matrix.sum(
        axis=1)  # entity-based num. of reference classes
    eb_acc = accuracy_matrix.sum(
        axis=1)  # entity-based num. of accurate classes

    eb_nopred = (eb_pred == 0.).astype('f')  # for avoiding zero-division
    eb_precision = (eb_acc / (eb_pred + eb_nopred)).mean()
    eb_recall = (eb_acc / eb_ref).mean()
    eb_f1 = (2 * eb_acc / (eb_pred + eb_ref)).mean()

    cb_pred = prediction_matrix.sum(
        axis=0)  # class-based num. of predicted examples
    cb_ref = reference_matrix.sum(
        axis=0)  # class-based num. of reference examples
    cb_acc = accuracy_matrix.sum(
        axis=0)  # class-based num. of accurate examples

    cb_nopred = (cb_pred == 0.).astype('f')  # for avoiding zero-division
    cb_macro_precision = (cb_acc / (cb_pred + cb_nopred)).mean()
    cb_macro_recall = (cb_acc / cb_ref).mean()
    cb_macro_f1 = (2 * cb_acc / (cb_pred + cb_ref)).mean()

    cb_micro_precision = cb_acc.sum() / cb_pred.sum()
    cb_micro_recall = cb_acc.sum() / cb_ref.sum()
    cb_micro_f1 = (2 * cb_acc.sum()) / (cb_pred.sum() + cb_ref.sum())

    print(f'Entity-based Precision:      {float(eb_precision):.2%}')
    print(f'Entity-based Recall:         {float(eb_recall):.2%}')
    print(f'Entity-based F1 score:       {float(eb_f1):.2%}')

    print(f'Class-based macro Precision: {float(cb_macro_precision):.2%}')
    print(f'Class-based macro Recall:    {float(cb_macro_recall):.2%}')
    print(f'Class-based macro F1 score:  {float(cb_macro_f1):.2%}')

    print(f'Class-based micro Precision: {float(cb_micro_precision):.2%}')
    print(f'Class-based micro Recall:    {float(cb_micro_recall):.2%}')
    print(f'Class-based micro F1 score:  {float(cb_micro_f1):.2%}')

    print(f'writing out classification results')
    with open(Path(args.out_dir) / 'classification_result.json', 'w') as fo:
        for i, item in tqdm(enumerate(dataset)):
            title = item['title']
            predicted_classes = [
                id2ene[j] for j, v in enumerate(prediction_matrix[i])
                if v == 1.0
            ]
            reference_classes = [
                id2ene[j] for j, v in enumerate(reference_matrix[i])
                if v == 1.0
            ]
            out = {
                'title': title,
                'prediction': predicted_classes,
                'reference': reference_classes
            }
            print(json.dumps(out, ensure_ascii=False), file=fo)