Beispiel #1
0
def main(evaluation_set,
         model,
         model_seed,
         domain='alarm',
         data_seed=0,
         batchsize=20,
         max_epoch=100,
         patience=20,
         tot_acq=160,
         use_gpus='all',
         workers_per_gpu=1,
         num_random_samples=100):
    N_pool = 800
    train_args = Namespace(evaluation_set=evaluation_set,
                           model=model,
                           model_seed=model_seed,
                           domain=domain,
                           data_seed=data_seed,
                           batchsize=batchsize,
                           max_epoch=max_epoch,
                           patience=patience,
                           tot_acq=tot_acq,
                           use_gpus=use_gpus,
                           workers_per_gpu=workers_per_gpu)
    scheduler = TrainScheduler(train_args)
    order = list(range(N_pool))
    curves = []
    for _ in trange(num_random_samples):
        random.shuffle(order)
        curve = scheduler.evaluate_order(order)
        curves.append(curve)
    avg_curve = np.mean(curves, axis=0)
    store_baseline(avg_curve, None, 'random', evaluation_set, model,
                   model_seed, domain, data_seed, batchsize, max_epoch,
                   patience, tot_acq)
def main(criterion, model='lstm', model_seed=0, domain='alarm', data_seed=0,
         batchsize=20, max_epoch=100, patience=20, tot_acq=160, evaluation_set='test', gpu_idx=0, log_dir='logs'):
    assert model != 'roberta', 'IDMR for RoBERTa model is not implemented'
    data = pickle.load(open('data/TOP.pkl', 'rb'))[domain]['seeds'][data_seed]
    N_warmstart = len(data['warmstart'])
    N = len(data['pool'])
    try:
        idmr_curve = load_baseline(f'idmr-{criterion}', evaluation_set, model, model_seed, domain, data_seed,
                                   batchsize, max_epoch, patience, tot_acq)['curve']
    except KeyError:
        lens_proportions = None
        accessible_set = data['warmstart'] + data['train_valid'] + data['pool']
        accessible_sents, _ = zip(*accessible_set)
        lens = [len(nltk.word_tokenize(s)) for s in accessible_sents]
        lens_ct = Counter(lens)
        cts = np.array([lens_ct[l] for l in range(max(lens_ct.keys()) + 1)])
        props = cts / sum(cts)
        groups = group_proportions(props, 0.08)
        lens_proportions = dict()
        for i, g in enumerate(groups):
            lo, hi = min(g), max(g)
            sum_props = sum(props[p] for p in range(lo, hi + 1))
            if i == len(groups) - 1:
                hi = 100
            lens_proportions[(lo, hi)] = sum_props
        idmr_curve, idmr_order = idmr(data, criterion, evaluation_set, lens_proportions, model, model_seed,
                                      domain, data_seed, batchsize, max_epoch, patience, tot_acq, gpu_idx)
        print(idmr_curve)
        print(np.mean(idmr_curve))
        store_baseline(idmr_curve, idmr_order, f'idmr-{criterion}', evaluation_set, model, model_seed,
                       domain, data_seed, batchsize, max_epoch, patience, tot_acq)
    plt.figure()
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    baselines = [('max-entropy', 'Max-Entropy', 0), ('bald', 'BALD', 1), ('random', 'Random', 4)]
    if criterion == 'max-entropy':
        baselines.append(('idmr-max-entropy', 'IDMR Max-Ent.', 6))
    elif criterion == 'bald':
        baselines.append(('idmr-bald', 'IDMR BALD', 6))
    optimal_order, _, _ = load_optimal(log_dir, model, model_seed, domain, data_seed,
                                       batchsize, max_epoch, patience, tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model, model_seed, model_seed, domain, data_seed,
                batchsize, max_epoch, patience, tot_acq, None, None, baselines)
    xmin1, xmax1, ymin1, ymax1 = plt.axis()
    plt.xticks(np.linspace(N_warmstart, tot_acq + N_warmstart, 5))
    plt.xlabel('# Data Points')
    plt.ylabel('F1')
    plt.title('Input Distribution-Matching Regularization')
    plt.savefig(f'../figures/intent_classification/idmr_{criterion}.pdf', bbox_inches='tight')
Beispiel #3
0
def main(criterion,
         evaluation_set,
         model_seed=0,
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=250,
         gpu_idx=0):
    data = pickle.load(open('data/restaurant.pkl', 'rb'))['seeds'][data_seed]
    train_set = copy(data['warmstart'])
    model_sel_set = copy(data['train_valid'])
    pool_dict = {i: p for i, p in enumerate(data['pool'])}
    eval_set = data[evaluation_set]
    eval_sents, eval_tags = zip(*eval_set)
    curve = []
    data_order = []
    trainer = get_trainer(model_seed, device=f'cuda:{gpu_idx}')
    trainer.train(train_set,
                  model_sel_set,
                  batchsize,
                  max_epoch,
                  patience,
                  verbose=False)
    f1 = trainer.evaluate_f1(trainer.best_model, eval_sents, eval_tags)
    curve.append(f1)
    for _ in trange(int(tot_acq / batchsize)):
        acquire_idxs = acquire(trainer.best_model, pool_dict, criterion,
                               batchsize)
        data_order.extend(acquire_idxs)
        for idx in acquire_idxs:
            train_set.append(pool_dict[idx])
            del pool_dict[idx]
        trainer = get_trainer(model_seed, device=f'cuda:{gpu_idx}')
        trainer.train(train_set,
                      model_sel_set,
                      batchsize,
                      max_epoch,
                      patience,
                      verbose=False)
        f1 = trainer.evaluate_f1(trainer.best_model, eval_sents, eval_tags)
        curve.append(f1)
    print(curve)
    print(np.mean(curve))
    store_baseline(curve, data_order, criterion, evaluation_set, model_seed,
                   data_seed, batchsize, max_epoch, patience, tot_acq)
Beispiel #4
0
def main(criterion, evaluation_set, model_seed, data_seed, batchsize, max_epoch, patience, tot_acq, log_dir, gpu_idx):
    data = pickle.load(open('data/restaurant.pkl', 'rb'))['seeds'][data_seed]
    N_warmstart = len(data['warmstart'])
    try:
        idmr_curve = load_baseline(f'idmr-{criterion}', evaluation_set, model_seed, data_seed,
                                   batchsize, max_epoch, patience, tot_acq)['curve']
    except KeyError:
        N_pool = len(data['pool'])
        accessible_set = data['warmstart'] + data['train_valid'] + data['pool']
        accessible_sents, _ = zip(*accessible_set)
        lens = [len(s) for s in accessible_sents]
        lens_ct = Counter(lens)
        max_len = max(lens_ct.keys())
        cts = np.array([lens_ct[l] for l in range(max_len + 1)])
        props = cts / sum(cts)
        groups = group_proportions(props, 0.13)
        lens_proportions = dict()
        for i, g in enumerate(groups):
            lo, hi = min(g), max(g)
            sum_props = sum(props[p] for p in range(lo, hi + 1))
            if i == len(groups) - 1:
                hi = 100
            lens_proportions[(lo, hi)] = sum_props
        idmr_curve, idmr_order = idmr(data, evaluation_set, criterion, lens_proportions, model_seed,
                                    batchsize, max_epoch, patience, tot_acq, gpu_idx)
        store_baseline(idmr_curve, idmr_order, f'idmr-{criterion}', evaluation_set, model_seed, data_seed,
                    batchsize, max_epoch, patience, tot_acq)
    print(idmr_curve)
    print(np.mean(idmr_curve))

    plt.figure()
    optimal_order, _, _ = load_optimal(log_dir, model_seed, data_seed, batchsize, max_epoch, patience, tot_acq)
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    display_name = {'min-confidence': 'Min-Confidence', 'normalized-min-confidence': 'Norm.-Min-Conf.',
                    'longest': 'Longest'}[criterion]
    baselines = [(criterion, display_name, 0), (f'idmr-{criterion}', f'IDMR-{display_name.replace("idence", ".")}', 6),
                 ('random', 'Random', 4)]
    plot_curves(optimal_order, xs, evaluation_set, model_seed, model_seed, data_seed, batchsize, max_epoch, patience,
                tot_acq, None, None, baselines)
    plt.tight_layout()
    plt.savefig(f'../figures/named_entity_recognition/idmr_{criterion}.pdf', bbox_inches='tight')
def main(model_seed=0,
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=300,
         evaluation_set='test',
         log_dir='logs',
         tsne_dim=3,
         num_clusters=5,
         gpu_idx=0):
    data = load_data(data_seed)
    N_warmstart = len(data['warmstart'])
    try:
        idmr_curve = load_baseline('idmr-max-entropy', evaluation_set,
                                   model_seed, data_seed, batchsize, max_epoch,
                                   patience, tot_acq)['curve']
    except KeyError:
        data = load_data(data_seed)
        idmr_curve, idmr_order = idmr(data, evaluation_set, model_seed,
                                      batchsize, max_epoch, patience, tot_acq,
                                      tsne_dim, num_clusters, gpu_idx)
        store_baseline(idmr_curve, idmr_order, 'idmr-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
    print(idmr_curve)
    print(np.mean(idmr_curve))

    plt.figure()
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    baselines = [('max-entropy', 'Max-Entropy', 0), ('bald', 'BALD', 1),
                 ('random', 'Random', 4),
                 ('idmr-max-entropy', 'IDMR Max-Ent.', 6)]
    optimal_order, _, _ = load_optimal(log_dir, model_seed, data_seed,
                                       batchsize, max_epoch, patience, tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model_seed, model_seed,
                data_seed, batchsize, max_epoch, patience, tot_acq, None, None,
                baselines)
    plt.title('IDMR Performance Curve')
    plt.savefig('../figures/object_classification/idmr.pdf',
                bbox_inches='tight')
def main(criterion,
         evaluation_set,
         model,
         model_seed,
         domain='alarm',
         data_seed=0,
         batchsize=20,
         max_epoch=100,
         patience=20,
         tot_acq=160,
         gpu_idx=0):
    if model != 'roberta':
        curve, order = active_learn(criterion, evaluation_set, model,
                                    model_seed, domain, data_seed, batchsize,
                                    max_epoch, patience, tot_acq, gpu_idx)
    else:
        curve, order = active_learn_roberta(criterion, evaluation_set, model,
                                            model_seed, domain, data_seed,
                                            batchsize, max_epoch, patience,
                                            tot_acq, gpu_idx)
    print(f'Peformance curve {curve}, quality {np.mean(curve):0.3f}')
    store_baseline(curve, order, criterion, evaluation_set, model, model_seed,
                   domain, data_seed, batchsize, max_epoch, patience, tot_acq)
def main(criterion,
         evaluation_set='test',
         model_seed=0,
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=300,
         gpu_idx=0,
         num_inference_steps=100,
         num_joint_entropy_samples=100000,
         num_eval_steps=5):
    data = load_data(data_seed)
    warmstart = data['warmstart']
    warmstart_X, warmstart_y = map(np.array, zip(*warmstart))
    pool = data['pool']
    model_sel = data['model_sel']
    model_sel_X, model_sel_y = map(np.array, zip(*model_sel))
    model_sel_X = torch.tensor(model_sel_X).float().to(gpu_idx)
    model_sel_y = torch.tensor(model_sel_y).long().to(gpu_idx)
    eval_set = data[evaluation_set]
    eval_X, eval_y = map(np.array, zip(*eval_set))
    eval_X = torch.tensor(eval_X).float().to(gpu_idx)
    eval_y = torch.tensor(eval_y).long().to(gpu_idx)

    pool_dict = {i: (img, y) for i, (img, y) in enumerate(pool)}
    train_X = warmstart_X.copy()
    train_y = warmstart_y.copy()

    if criterion in ['bald', 'batchbald']:
        mcdropout = True
    else:
        mcdropout = False
        num_eval_steps = None
    mcdropout = (
        'bald' in criterion
    )  # True when criterion is 'bald' or 'batchbald', False otherwise
    curve = []
    trainer = get_trainer(model_seed, gpu_idx, mcdropout)
    trainer.train((torch.tensor(train_X).float().to(gpu_idx),
                   torch.tensor(train_y).long().to(gpu_idx)),
                  (model_sel_X, model_sel_y),
                  batchsize,
                  max_epoch,
                  patience,
                  num_eval_steps,
                  verbose=False)
    acc = trainer.evaluate_acc(trainer.best_model, eval_X, eval_y,
                               num_eval_steps)
    curve.append(acc)
    data_order = []
    for _ in trange(int(tot_acq / batchsize)):
        idxs = acquire(trainer.best_model, pool_dict, criterion, batchsize,
                       gpu_idx, num_inference_steps, num_joint_entropy_samples)
        data_order = data_order + idxs
        new_X = np.stack([pool_dict[i][0] for i in idxs], axis=0)
        new_y = np.array([pool_dict[i][1] for i in idxs])
        train_X = np.concatenate((train_X, new_X), axis=0)
        train_y = np.concatenate((train_y, new_y), axis=0)
        for idx in idxs:
            del pool_dict[idx]
        trainer = get_trainer(model_seed, gpu_idx, mcdropout)
        trainer.train((torch.tensor(train_X).float().to(gpu_idx),
                       torch.tensor(train_y).long().to(gpu_idx)),
                      (model_sel_X, model_sel_y),
                      batchsize,
                      max_epoch,
                      patience,
                      num_eval_steps,
                      verbose=False)
        acc = trainer.evaluate_acc(trainer.best_model, eval_X, eval_y,
                                   num_eval_steps)
        curve.append(acc)
    print(curve)
    print(np.mean(curve))
    store_baseline(curve, data_order, criterion, evaluation_set, model_seed,
                   data_seed, batchsize, max_epoch, patience, tot_acq)
def main(model_seed=0,
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=300,
         evaluation_set='test',
         gpu_idx=0,
         log_dir='logs'):
    data = load_data(data_seed)
    num_labels = 10

    N_warmstart = len(data['warmstart'])
    N_pool = len(data['pool'])

    _, warmstart_y = zip(*data['warmstart'])
    _, pool_y = zip(*data['pool'])
    _, eval_y = zip(*data[evaluation_set])

    try:
        l1 = load_baseline(f'odmr-l1-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l1_curve, l1_order = l1['curve'], l1['order']
        print(l1_curve, np.mean(l1_curve))
        l2 = load_baseline(f'odmr-l2-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l2_curve, l2_order = l2['curve'], l2['order']
        print(l2_curve, np.mean(l2_curve))
        l3 = load_baseline(f'odmr-l3-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l3_curve, l3_order = l3['curve'], l3['order']
        print(l3_curve, np.mean(l3_curve))
        l4 = load_baseline(f'odmr-l4-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l4_curve, l4_order = l4['curve'], l4['order']
        print(l4_curve, np.mean(l4_curve))
    except KeyError:
        l1_curve, l1_order = odmr(data,
                                  evaluation_set,
                                  'test',
                                  True,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l1_curve, np.mean(l1_curve))
        store_baseline(l1_curve, l1_order, f'odmr-l1-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
        l2_curve, l2_order = odmr(data,
                                  evaluation_set,
                                  'accessible',
                                  True,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l2_curve, np.mean(l2_curve))
        store_baseline(l2_curve, l2_order, f'odmr-l2-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
        l3_curve, l3_order = odmr(data,
                                  evaluation_set,
                                  'test',
                                  False,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l3_curve, np.mean(l3_curve))
        store_baseline(l3_curve, l3_order, f'odmr-l3-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
        l4_curve, l4_order = odmr(data,
                                  evaluation_set,
                                  'accessible',
                                  False,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l4_curve, np.mean(l4_curve))
        store_baseline(l4_curve, l4_order, f'odmr-l4-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)

    plt.figure(figsize=[20, 4])
    gs = GridSpec(ncols=9,
                  nrows=1,
                  width_ratios=[10, 0.3, 10, 0.7, 10, 0.7, 10, 0.7, 10],
                  wspace=0.05)
    plt.subplot(gs[0, 0])
    baselines = [('max-entropy', 'Max-Entropy', 0),
                 ('odmr-l1-max-entropy', 'Test + True', 1),
                 ('odmr-l2-max-entropy', 'Acce + True', 2),
                 ('odmr-l3-max-entropy', 'Test + Pred', 6),
                 ('odmr-l4-max-entropy', 'Acce + Pred', 8),
                 ('random', 'Random', 4)]
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    optimal_order, optimal_quality, _ = load_optimal(log_dir, model_seed,
                                                     data_seed, batchsize,
                                                     max_epoch, patience,
                                                     tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model_seed, model_seed,
                data_seed, batchsize, max_epoch, patience, tot_acq, None, None,
                baselines)
    plt.xlabel('# Data Points')
    plt.ylabel('Accuracy')
    plt.title('Object Classification')

    plt.subplot(gs[0, 2])
    plot_label_proportion(l1_order, warmstart_y, pool_y, eval_y)
    plt.title('Test + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 3])
    plot_ref_meter(eval_y)

    plt.subplot(gs[0, 4])
    plot_label_proportion(l2_order, warmstart_y, pool_y, eval_y)
    plt.title('Accessible + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 5])
    plot_ref_meter(eval_y)

    plt.subplot(gs[0, 6])
    plot_label_proportion(l3_order, warmstart_y, pool_y, eval_y)
    plt.title('Test + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 7])
    plot_ref_meter(eval_y)

    plt.subplot(gs[0, 8])
    plot_label_proportion(l4_order, warmstart_y, pool_y, eval_y)
    plt.title('Accessible + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.savefig('../figures/object_classification/odmr.pdf',
                bbox_inches='tight')
Beispiel #9
0
def main(criterion,
         model='lstm',
         model_seed=0,
         domain='alarm',
         data_seed=0,
         batchsize=20,
         max_epoch=100,
         patience=20,
         tot_acq=160,
         evaluation_set='test',
         gpu_idx=0,
         log_dir='logs'):
    data = pickle.load(open('data/TOP.pkl', 'rb'))[domain]
    num_labels = int(len(data['intent_label_mapping']) / 2)
    data = data['seeds'][data_seed]
    _, warmstart_labels = zip(*data['warmstart'])
    _, pool_labels = zip(*data['pool'])
    N_warmstart = len(data['warmstart'])
    N = len(data['pool'])
    try:
        l1 = load_baseline(f'odmr-l1-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l1_curve, l1_order = l1['curve'], l1['order']
        print(l1_curve, np.mean(l1_curve))
        l2 = load_baseline(f'odmr-l2-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l2_curve, l2_order = l2['curve'], l2['order']
        print(l2_curve, np.mean(l2_curve))
        l3 = load_baseline(f'odmr-l3-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l3_curve, l3_order = l3['curve'], l3['order']
        print(l3_curve, np.mean(l3_curve))
        l4 = load_baseline(f'odmr-l4-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l4_curve, l4_order = l4['curve'], l4['order']
        print(l4_curve, np.mean(l4_curve))
    except KeyError:
        l1_curve, l1_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'test',
                                  True,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l1_curve, np.mean(l1_curve))
        store_baseline(l1_curve, l1_order, f'odmr-l1-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)
        l2_curve, l2_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'accessible',
                                  True,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l2_curve, np.mean(l2_curve))
        store_baseline(l2_curve, l2_order, f'odmr-l2-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)
        l3_curve, l3_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'test',
                                  False,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l3_curve, np.mean(l3_curve))
        store_baseline(l3_curve, l3_order, f'odmr-l3-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)
        l4_curve, l4_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'accessible',
                                  False,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l4_curve, np.mean(l4_curve))
        store_baseline(l4_curve, l4_order, f'odmr-l4-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)

    plt.figure(figsize=[20, 4])
    gs = GridSpec(ncols=9,
                  nrows=1,
                  width_ratios=[10, 0.3, 10, 0.7, 10, 0.7, 10, 0.7, 10],
                  wspace=0.05)
    plt.subplot(gs[0, 0])
    if criterion == 'max-entropy':
        baselines = [('max-entropy', 'Max-Entropy', 0),
                     ('odmr-l1-max-entropy', 'Test + True', 1),
                     ('odmr-l2-max-entropy', 'Acce + True', 2),
                     ('odmr-l3-max-entropy', 'Test + Pred', 6),
                     ('odmr-l4-max-entropy', 'Acce + Pred', 8),
                     ('random', 'Random', 4)]
    elif criterion == 'bald':
        baselines = [('bald', 'BALD', 0), ('odmr-l1-bald', 'Test + True', 1),
                     ('odmr-l2-bald', 'Acce + True', 2),
                     ('odmr-l3-bald', 'Test + Pred', 6),
                     ('odmr-l4-bald', 'Acce + Pred', 8),
                     ('random', 'Random', 4)]
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    optimal_order, optimal_quality, _ = load_optimal(log_dir, model,
                                                     model_seed, domain,
                                                     data_seed, batchsize,
                                                     max_epoch, patience,
                                                     tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model, model_seed,
                model_seed, domain, data_seed, batchsize, max_epoch, patience,
                tot_acq, None, None, baselines)
    plt.xlabel('# Data Points')
    plt.ylabel('F1')
    plt.title('Intent Classification')

    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1))
    _, test_labels = zip(*data['test'])
    test_label_counts = Counter(test_labels)
    test_label_counts = np.array(
        [test_label_counts[i] for i in range(num_labels)])
    label_ref_cdf = np.cumsum(test_label_counts / sum(test_label_counts))
    label_ref_cdf = list(label_ref_cdf.flat)
    label_ref_cdf.insert(0, 0)

    plt.subplot(gs[0, 2])
    plot_label_distribution(l1_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Test + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 3])
    plot_ref_meter(label_ref_cdf)

    plt.subplot(gs[0, 4])
    plot_label_distribution(l2_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Accessible + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 5])
    plot_ref_meter(label_ref_cdf)

    plt.subplot(gs[0, 6])
    plot_label_distribution(l3_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Test + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 7])
    plot_ref_meter(label_ref_cdf)

    plt.subplot(gs[0, 8])
    plot_label_distribution(l4_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Accessible + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.savefig(f'../figures/intent_classification/odmr_{criterion}.pdf',
                bbox_inches='tight')