def main(model_seeds=[0, 1, 2, 3, 4],
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=300,
         criterions=['max-entropy', 'bald', 'batchbald']):
    optimal = []
    with shelve.open('statistics/perf_curves.shv') as curves:
        for seed in model_seeds:
            spec = f'{seed} {seed} {data_seed} {batchsize} {max_epoch} {patience} {tot_acq}'
            curve = curves[spec]['test']
            optimal.append(np.mean(curve))
    print(f'optimal quality: {np.mean(optimal)}')

    for criterion in criterions:
        heuristic = []
        for seed in model_seeds:
            curve = load_baseline(criterion, 'test', seed, data_seed,
                                  batchsize, max_epoch, patience,
                                  tot_acq)['curve']
            heuristic.append(np.mean(curve))
        print(f'{criterion} quality: {np.mean(heuristic)}')

    random = []
    for seed in model_seeds:
        curve = load_baseline('random', 'test', seed, data_seed, batchsize,
                              max_epoch, patience, tot_acq)['curve']
        random.append(np.mean(curve))
    print(f'Random AUC: {np.mean(random)}')
def main(model_seed=0, data_seed=0, batchsize=25, max_epoch=100, patience=20,
         tot_acq=250, criterions=['min-confidence', 'normalized-min-confidence', 'longest']):
    with shelve.open('statistics/perf_curves.shv') as curves:
        spec = f'{model_seed} {model_seed} {data_seed} {batchsize} {max_epoch} {patience} {tot_acq}'
        curve = curves[spec]['test']
        print(f'optimal quality: {np.mean(curve)}')

    for criterion in criterions:
        curve = load_baseline(criterion, 'test', model_seed, data_seed, batchsize, max_epoch, patience, tot_acq)['curve']
        print(f'{criterion} quality: {np.mean(curve)}')

    curve = load_baseline('random', 'test', model_seed, data_seed, batchsize, max_epoch, patience, tot_acq)['curve']
    print(f'Random AUC: {np.mean(curve)}')
Esempio n. 3
0
def plot_curves(order, xs, evaluation_set, search_model_seed, eval_model_seed,
                data_seed, batchsize, max_epoch, patience, tot_acq, use_gpus,
                workers_per_gpu, baselines):
    legends = []
    spec = f'{search_model_seed} {eval_model_seed} {data_seed} {batchsize} {max_epoch} {patience} {tot_acq}'
    with shelve.open(abs_path('statistics/perf_curves.shv')) as curves:
        if spec in curves and curves[spec]['order'] == order:
            optimal_curve = curves[spec][evaluation_set]
        else:
            eval_args = Namespace(model_seed=eval_model_seed,
                                  data_seed=data_seed,
                                  batchsize=batchsize,
                                  max_epoch=max_epoch,
                                  patience=patience,
                                  evaluation_set=evaluation_set,
                                  tot_acq=tot_acq,
                                  use_gpus=use_gpus,
                                  workers_per_gpu=workers_per_gpu)
            scheduler = TrainScheduler(eval_args)
            optimal_curve = scheduler.evaluate_order(order)
    plt.plot(xs, optimal_curve, 'C3-o')
    legends.append(f'Optimal: {np.mean(optimal_curve):0.3f}')
    for name, display_name, color in baselines:
        try:
            curve = load_baseline(name, evaluation_set, eval_model_seed,
                                  data_seed, batchsize, max_epoch, patience,
                                  tot_acq)['curve']
            plt.plot(xs, curve, f'C{color}-o')
            legends.append(f'{display_name}: {np.mean(curve):0.3f}')
        except:
            print(f'{display_name} not found')
    plt.legend(legends)
    return optimal_curve
Esempio n. 4
0
def get_baseline_model(args):
    vocab = utils.load_vocab(args.vocab_json)
    if args.baseline_start_from is not None:
        model, kwargs = utils.load_baseline(args.baseline_start_from)
    elif args.model_type == 'LSTM':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = LstmModel(**kwargs)
    elif args.model_type == 'CNN+LSTM':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'cnn_feat_dim': parse_int_list(args.feature_dim),
            'cnn_num_res_blocks': args.cnn_num_res_blocks,
            'cnn_res_block_dim': args.cnn_res_block_dim,
            'cnn_proj_dim': args.cnn_proj_dim,
            'cnn_pooling': args.cnn_pooling,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = CnnLstmModel(**kwargs)
    elif args.model_type == 'CNN+LSTM+SA':
        kwargs = {
            'vocab': vocab,
            'rnn_wordvec_dim': args.rnn_wordvec_dim,
            'rnn_dim': args.rnn_hidden_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'cnn_feat_dim': parse_int_list(args.feature_dim),
            'stacked_attn_dim': args.stacked_attn_dim,
            'num_stacked_attn': args.num_stacked_attn,
            'fc_dims': parse_int_list(args.classifier_fc_dims),
            'fc_use_batchnorm': args.classifier_batchnorm == 1,
            'fc_dropout': args.classifier_dropout,
        }
        model = CnnLstmSaModel(**kwargs)
    if model.rnn.token_to_idx != vocab['question_token_to_idx']:
        # Make sure new vocab is superset of old
        for k, v in model.rnn.token_to_idx.items():
            assert k in vocab['question_token_to_idx']
            assert vocab['question_token_to_idx'][k] == v
        for token, idx in vocab['question_token_to_idx'].items():
            model.rnn.token_to_idx[token] = idx
        kwargs['vocab'] = vocab
        model.rnn.expand_vocab(vocab['question_token_to_idx'])
    model.cuda()
    model.train()
    return model, kwargs
Esempio n. 5
0
def main(model1='lstm', criterion1='bald', model2='cnn', criterion2='bald', model_seed=0, 
         domain='alarm', data_seed=0, batchsize=20, max_epoch=100, patience=20, tot_acq=160, log_dir='logs'):
    m_dict = {'lstm': 'LSTM', 'cnn': 'CNN', 'aoe': 'AOE', 'roberta': 'RoBERTa'}
    c_dict = {'max-entropy': 'Max-Entropy', 'bald': 'BALD'}
    optimal_order1, _, _ = load_optimal(log_dir, model1, model_seed,
                               domain, data_seed, batchsize, max_epoch, patience, tot_acq)
    optimal_order2, _, _ = load_optimal(log_dir, model2, model_seed,
                               domain, data_seed, batchsize, max_epoch, patience, tot_acq)
    plot_relative_order(optimal_order1, optimal_order2, 
                        f'{m_dict[model1]}\nOptimal', f'{m_dict[model2]}\nOptimal')
    plt.savefig(f'../figures/intent_classification/relative_orders/{model1}_{model2}_optimal.pdf', 
        bbox_inches='tight')
    
    heuristic_order1 = load_baseline(criterion1, 'test', model1, model_seed, 
                              domain, data_seed, batchsize, max_epoch, patience, tot_acq)['order']
    heuristic_order2 = load_baseline(criterion2, 'test', model2, model_seed, 
                               domain, data_seed, batchsize, max_epoch, patience, tot_acq)['order']
    plot_relative_order(heuristic_order1, heuristic_order2, 
                        f'{m_dict[model1]}\n{c_dict[criterion1]}', f'{m_dict[model2]}\n{c_dict[criterion2]}')
    plt.savefig(f'../figures/intent_classification/relative_orders/{model1}_{model2}_heuristic.pdf', 
        bbox_inches='tight')
def compute_quality(criterion, search_model, search_seed, eval_model,
                    eval_seed, domain, data_seed, batchsize, max_epoch,
                    patience, tot_acq, use_gpus, workers_per_gpu, log_dir):
    if criterion == 'random':
        curve = load_baseline('random', 'test', eval_model, eval_seed, domain,
                              data_seed, batchsize, max_epoch, patience,
                              tot_acq)['curve']
        return np.mean(curve)
    spec1 = f'{criterion} {search_model} {search_seed}'
    spec2 = f'{eval_model} {eval_seed}'
    spec3 = f'{domain} {data_seed} {batchsize} {max_epoch} {patience} {tot_acq}'
    spec = f'{spec1} | {spec2} | {spec3}'
    if criterion == 'optimal':
        order, _, _ = load_optimal(log_dir, search_model, search_seed, domain,
                                   data_seed, batchsize, max_epoch, patience,
                                   tot_acq)
    else:
        order = load_baseline(criterion, 'test', search_model, eval_seed,
                              domain, data_seed, batchsize, max_epoch,
                              patience, tot_acq)['order']
    with shelve.open('statistics/model_transfer.shv') as transfer:
        if spec in transfer and transfer[spec][
                'order'] == order:  # fresh copy of cache
            return np.mean(transfer[spec]['curve'])
        # not present or stale copy of cache
        train_args = Namespace(model=eval_model,
                               model_seed=eval_seed,
                               domain=domain,
                               data_seed=data_seed,
                               evaluation_set='test',
                               batchsize=batchsize,
                               max_epoch=max_epoch,
                               patience=patience,
                               tot_acq=tot_acq,
                               use_gpus=use_gpus,
                               workers_per_gpu=workers_per_gpu)
        scheduler = TrainScheduler(train_args)
        curve = scheduler.evaluate_order(order)
        transfer[spec] = {'curve': curve, 'order': order}
        return np.mean(curve)
Esempio n. 7
0
def main(criterion, model='lstm', model_seed=0, domain='alarm', data_seed=0,
         batchsize=20, max_epoch=100, patience=20, tot_acq=160, evaluation_set='test', gpu_idx=0, log_dir='logs'):
    assert model != 'roberta', 'IDMR for RoBERTa model is not implemented'
    data = pickle.load(open('data/TOP.pkl', 'rb'))[domain]['seeds'][data_seed]
    N_warmstart = len(data['warmstart'])
    N = len(data['pool'])
    try:
        idmr_curve = load_baseline(f'idmr-{criterion}', evaluation_set, model, model_seed, domain, data_seed,
                                   batchsize, max_epoch, patience, tot_acq)['curve']
    except KeyError:
        lens_proportions = None
        accessible_set = data['warmstart'] + data['train_valid'] + data['pool']
        accessible_sents, _ = zip(*accessible_set)
        lens = [len(nltk.word_tokenize(s)) for s in accessible_sents]
        lens_ct = Counter(lens)
        cts = np.array([lens_ct[l] for l in range(max(lens_ct.keys()) + 1)])
        props = cts / sum(cts)
        groups = group_proportions(props, 0.08)
        lens_proportions = dict()
        for i, g in enumerate(groups):
            lo, hi = min(g), max(g)
            sum_props = sum(props[p] for p in range(lo, hi + 1))
            if i == len(groups) - 1:
                hi = 100
            lens_proportions[(lo, hi)] = sum_props
        idmr_curve, idmr_order = idmr(data, criterion, evaluation_set, lens_proportions, model, model_seed,
                                      domain, data_seed, batchsize, max_epoch, patience, tot_acq, gpu_idx)
        print(idmr_curve)
        print(np.mean(idmr_curve))
        store_baseline(idmr_curve, idmr_order, f'idmr-{criterion}', evaluation_set, model, model_seed,
                       domain, data_seed, batchsize, max_epoch, patience, tot_acq)
    plt.figure()
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    baselines = [('max-entropy', 'Max-Entropy', 0), ('bald', 'BALD', 1), ('random', 'Random', 4)]
    if criterion == 'max-entropy':
        baselines.append(('idmr-max-entropy', 'IDMR Max-Ent.', 6))
    elif criterion == 'bald':
        baselines.append(('idmr-bald', 'IDMR BALD', 6))
    optimal_order, _, _ = load_optimal(log_dir, model, model_seed, domain, data_seed,
                                       batchsize, max_epoch, patience, tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model, model_seed, model_seed, domain, data_seed,
                batchsize, max_epoch, patience, tot_acq, None, None, baselines)
    xmin1, xmax1, ymin1, ymax1 = plt.axis()
    plt.xticks(np.linspace(N_warmstart, tot_acq + N_warmstart, 5))
    plt.xlabel('# Data Points')
    plt.ylabel('F1')
    plt.title('Input Distribution-Matching Regularization')
    plt.savefig(f'../figures/intent_classification/idmr_{criterion}.pdf', bbox_inches='tight')
Esempio n. 8
0
def main(criterion, evaluation_set, model_seed, data_seed, batchsize, max_epoch, patience, tot_acq, log_dir, gpu_idx):
    data = pickle.load(open('data/restaurant.pkl', 'rb'))['seeds'][data_seed]
    N_warmstart = len(data['warmstart'])
    try:
        idmr_curve = load_baseline(f'idmr-{criterion}', evaluation_set, model_seed, data_seed,
                                   batchsize, max_epoch, patience, tot_acq)['curve']
    except KeyError:
        N_pool = len(data['pool'])
        accessible_set = data['warmstart'] + data['train_valid'] + data['pool']
        accessible_sents, _ = zip(*accessible_set)
        lens = [len(s) for s in accessible_sents]
        lens_ct = Counter(lens)
        max_len = max(lens_ct.keys())
        cts = np.array([lens_ct[l] for l in range(max_len + 1)])
        props = cts / sum(cts)
        groups = group_proportions(props, 0.13)
        lens_proportions = dict()
        for i, g in enumerate(groups):
            lo, hi = min(g), max(g)
            sum_props = sum(props[p] for p in range(lo, hi + 1))
            if i == len(groups) - 1:
                hi = 100
            lens_proportions[(lo, hi)] = sum_props
        idmr_curve, idmr_order = idmr(data, evaluation_set, criterion, lens_proportions, model_seed,
                                    batchsize, max_epoch, patience, tot_acq, gpu_idx)
        store_baseline(idmr_curve, idmr_order, f'idmr-{criterion}', evaluation_set, model_seed, data_seed,
                    batchsize, max_epoch, patience, tot_acq)
    print(idmr_curve)
    print(np.mean(idmr_curve))

    plt.figure()
    optimal_order, _, _ = load_optimal(log_dir, model_seed, data_seed, batchsize, max_epoch, patience, tot_acq)
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    display_name = {'min-confidence': 'Min-Confidence', 'normalized-min-confidence': 'Norm.-Min-Conf.',
                    'longest': 'Longest'}[criterion]
    baselines = [(criterion, display_name, 0), (f'idmr-{criterion}', f'IDMR-{display_name.replace("idence", ".")}', 6),
                 ('random', 'Random', 4)]
    plot_curves(optimal_order, xs, evaluation_set, model_seed, model_seed, data_seed, batchsize, max_epoch, patience,
                tot_acq, None, None, baselines)
    plt.tight_layout()
    plt.savefig(f'../figures/named_entity_recognition/idmr_{criterion}.pdf', bbox_inches='tight')
Esempio n. 9
0
def main(model_seed=0,
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=300,
         evaluation_set='test',
         log_dir='logs',
         tsne_dim=3,
         num_clusters=5,
         gpu_idx=0):
    data = load_data(data_seed)
    N_warmstart = len(data['warmstart'])
    try:
        idmr_curve = load_baseline('idmr-max-entropy', evaluation_set,
                                   model_seed, data_seed, batchsize, max_epoch,
                                   patience, tot_acq)['curve']
    except KeyError:
        data = load_data(data_seed)
        idmr_curve, idmr_order = idmr(data, evaluation_set, model_seed,
                                      batchsize, max_epoch, patience, tot_acq,
                                      tsne_dim, num_clusters, gpu_idx)
        store_baseline(idmr_curve, idmr_order, 'idmr-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
    print(idmr_curve)
    print(np.mean(idmr_curve))

    plt.figure()
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    baselines = [('max-entropy', 'Max-Entropy', 0), ('bald', 'BALD', 1),
                 ('random', 'Random', 4),
                 ('idmr-max-entropy', 'IDMR Max-Ent.', 6)]
    optimal_order, _, _ = load_optimal(log_dir, model_seed, data_seed,
                                       batchsize, max_epoch, patience, tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model_seed, model_seed,
                data_seed, batchsize, max_epoch, patience, tot_acq, None, None,
                baselines)
    plt.title('IDMR Performance Curve')
    plt.savefig('../figures/object_classification/idmr.pdf',
                bbox_inches='tight')
def main(criterion, model_seed, data_seed, batchsize, max_epoch, patience, tot_acq, log_dir):
    data = pickle.load(open('data/restaurant.pkl', 'rb'))['seeds'][data_seed]
    N_warmstart = len(data['warmstart'])
    N_pool = len(data['pool'])
    warmstart_sents, warmstart_tags = zip(*data['warmstart'])
    pool_sents, pool_tags = zip(*data['pool'])
    test_sents, test_tags = zip(*data['test'])
    display_name = {'min-confidence': 'Min-Confidence', 'normalized-min-confidence': 'Norm.-Min-Confidence',
                    'longest': 'Longest'}[criterion]

    optimal_order, _, _ = load_optimal(log_dir, model_seed, data_seed, batchsize, max_epoch, patience, tot_acq)
    heuristic_order = load_baseline(criterion, 'test', model_seed, data_seed, batchsize, max_epoch, patience, tot_acq)['order']
    random.seed(0)
    random_order = random.sample(range(N_pool), N_pool)[:tot_acq]

    lens_ct = Counter([len(t) for t in test_sents])
    max_len = max(lens_ct.keys())
    lens_props = [lens_ct[i] / sum(lens_ct.values()) for i in range(max_len + 1)]
    len_groups = group_proportions(lens_props, 0.13)
    num_groups = len(len_groups)
    group_sizes = np.array([sum([lens_ct[l] for l in len_group]) for len_group in len_groups])
    len_ref = np.cumsum(group_sizes / sum(group_sizes))
    len_ref = list(len_ref.flat)
    len_ref.insert(0, 0)

    tag_groups = [[0], [1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
    tag_groups_dict = {v: i for i, vs in enumerate(tag_groups) for v in vs}
    _, test_tags = zip(*data['test'])
    test_tags = [tag_groups_dict[t] for t in sum(map(list, test_tags), [])]
    test_tag_counts = Counter(test_tags)
    test_tag_counts = np.array([test_tag_counts[i] for i in range(len(tag_groups))])
    tag_ref = np.cumsum(test_tag_counts / sum(test_tag_counts))
    tag_ref = list(tag_ref.flat)
    tag_ref.insert(0, 0)

    fig = plt.figure(figsize=[9, 6])
    gs = GridSpec(ncols=5, nrows=2, width_ratios=[10, 0.7, 10, 0.7, 10], wspace=0.05, hspace=0.05)
    xs = np.linspace(N_warmstart, N_warmstart + tot_acq, 6)

    plt.subplot(gs[0, 0])
    plot_len_proportion(optimal_order, warmstart_sents, pool_sents, len_groups, len_ref, tot_acq)
    plt.xticks([])
    plt.ylabel('Length Distribution')
    plt.title('Optimal Order')

    plt.subplot(gs[0, 1])
    plot_ref_meter(len_ref)

    plt.subplot(gs[0, 2])
    plot_len_proportion(heuristic_order, warmstart_sents, pool_sents, len_groups, len_ref, tot_acq)
    plt.xticks([])
    plt.yticks([])
    plt.title(f'{display_name} Order')

    plt.subplot(gs[0, 3])
    plot_ref_meter(len_ref)

    plt.subplot(gs[0, 4])
    plot_len_proportion(random_order, warmstart_sents, pool_sents, len_groups, len_ref, tot_acq)
    plt.xticks([])
    plt.yticks([])
    plt.title('Random Order')

    plt.subplot(gs[1, 0])
    plot_tag_proportion(optimal_order, warmstart_tags, pool_tags, tag_ref, tag_groups_dict, tot_acq)
    plt.ylabel('Tag Distribution')
    plt.xlabel('# Data Points')

    plt.subplot(gs[1, 1])
    plot_ref_meter(tag_ref)

    plt.subplot(gs[1, 2])
    plot_tag_proportion(heuristic_order, warmstart_tags, pool_tags, tag_ref, tag_groups_dict, tot_acq)
    plt.yticks([])
    plt.xlabel('# Data Points')

    plt.subplot(gs[1, 3])
    plot_ref_meter(tag_ref)

    plt.subplot(gs[1, 4])
    plot_tag_proportion(random_order, warmstart_tags, pool_tags, tag_ref, tag_groups_dict, tot_acq)
    plt.xticks(xs)
    plt.yticks([])
    plt.xlabel('# Data Points')

    plt.savefig(f'../figures/named_entity_recognition/distribution_vis_{criterion}.pdf', bbox_inches='tight')
def main(model='lstm',
         model_seed=0,
         domain='alarm',
         data_seed=0,
         batchsize=20,
         max_epoch=100,
         patience=20,
         tot_acq=160,
         log_dir='logs'):
    data = pickle.load(open('data/TOP.pkl', 'rb'))[domain]
    num_labels = int(len(data['intent_label_mapping']) / 2)
    data = data['seeds'][data_seed]
    N = len(data['pool'])
    N_warmstart = len(data['warmstart'])

    optimal_order, _, _ = load_optimal(log_dir, model, model_seed, domain,
                                       data_seed, batchsize, max_epoch,
                                       patience, tot_acq)
    criterions = [('max-entropy', 'Max-Entropy'), ('bald', 'BALD')]
    heuristic_orders = [
        load_baseline(c, 'test', model, model_seed, domain, data_seed,
                      batchsize, max_epoch, patience, tot_acq)['order']
        for c, _ in criterions
    ]
    random.seed(0)
    random_order = random.sample(range(N), N)[:tot_acq]

    warmstart_sents, warmstart_labels = zip(*data['warmstart'])
    warmstart_sents = [nltk.word_tokenize(sent) for sent in warmstart_sents]
    pool_sents, pool_labels = zip(*data['pool'])
    pool_sents = [nltk.word_tokenize(sent) for sent in pool_sents]
    test_sents, test_labels = zip(*data['test'])
    test_sents = [nltk.word_tokenize(sent) for sent in test_sents]
    lens_ct = Counter([len(t) for t in test_sents])
    max_len = max(lens_ct.keys())
    lens_props = [
        lens_ct[i] / sum(lens_ct.values()) for i in range(max_len + 1)
    ]
    len_groups = group_adjacent(lens_props, 0.08)
    print('Length grouping:', len_groups)

    group_sizes = np.array(
        [sum([lens_ct[l] for l in len_group]) for len_group in len_groups])
    len_ref_cdf = np.cumsum(group_sizes / sum(group_sizes))
    len_ref_cdf = list(len_ref_cdf.flat)
    len_ref_cdf.insert(0, 0)

    test_label_counts = Counter(test_labels)
    test_label_counts = np.array(
        [test_label_counts[i] for i in range(num_labels)])
    label_ref_cdf = np.cumsum(test_label_counts / sum(test_label_counts))
    label_ref_cdf = list(label_ref_cdf.flat)
    label_ref_cdf.insert(0, 0)

    fig = plt.figure(figsize=[12, 5.5])
    gs = GridSpec(ncols=7,
                  nrows=2,
                  width_ratios=[10, 0.7, 10, 0.7, 10, 0.7, 10],
                  wspace=0.05,
                  hspace=0.05)
    xs = range(N_warmstart, tot_acq + N_warmstart + 1)

    fig.add_subplot(gs[0, 0])
    plot_length_distribution(optimal_order, warmstart_sents, pool_sents,
                             len_groups, len_ref_cdf, xs)
    plt.title('Optimal')
    plt.xticks([])
    plt.ylabel('Sentence Length Distribution')

    fig.add_subplot(gs[0, 1])
    plot_ref_meter(len_ref_cdf)

    fig.add_subplot(gs[0, 2])
    plot_length_distribution(heuristic_orders[0], warmstart_sents, pool_sents,
                             len_groups, len_ref_cdf, xs)
    plt.title(f'{criterions[0][1]}')
    plt.xticks([])
    plt.yticks([])

    fig.add_subplot(gs[0, 3])
    plot_ref_meter(len_ref_cdf)

    fig.add_subplot(gs[0, 4])
    plot_length_distribution(heuristic_orders[1], warmstart_sents, pool_sents,
                             len_groups, len_ref_cdf, xs)
    plt.title(f'{criterions[1][1]}')
    plt.xticks([])
    plt.yticks([])

    fig.add_subplot(gs[0, 5])
    plot_ref_meter(len_ref_cdf)

    fig.add_subplot(gs[0, 6])
    plot_length_distribution(random_order, warmstart_sents, pool_sents,
                             len_groups, len_ref_cdf, xs)
    plt.title('Random')
    plt.xticks([])
    plt.yticks([])

    fig.add_subplot(gs[1, 0])
    plot_label_distribution(optimal_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.xlabel('# Data Points')
    plt.xticks(np.linspace(N_warmstart, N_warmstart + tot_acq, 5))
    plt.ylabel('Label Distribution')

    fig.add_subplot(gs[1, 1])
    plot_ref_meter(label_ref_cdf)

    fig.add_subplot(gs[1, 2])
    plot_label_distribution(heuristic_orders[0], warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.xlabel('# Data Points')
    plt.xticks(np.linspace(N_warmstart, N_warmstart + tot_acq, 5))
    plt.yticks([])

    fig.add_subplot(gs[1, 3])
    plot_ref_meter(label_ref_cdf)

    fig.add_subplot(gs[1, 4])
    plot_label_distribution(heuristic_orders[1], warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.xlabel('# Data Points')
    plt.xticks(np.linspace(N_warmstart, N_warmstart + tot_acq, 5))
    plt.yticks([])

    fig.add_subplot(gs[1, 5])
    plot_ref_meter(label_ref_cdf)

    fig.add_subplot(gs[1, 6])
    plot_label_distribution(random_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.xlabel('# Data Points')
    plt.xticks(np.linspace(N_warmstart, N_warmstart + tot_acq, 5))
    plt.yticks([])

    plt.savefig('../figures/intent_classification/distribution_vis.pdf',
                bbox_inches='tight')
Esempio n. 12
0
def main(model_seed=0,
         data_seed=0,
         batchsize=25,
         max_epoch=100,
         patience=20,
         tot_acq=300,
         evaluation_set='test',
         gpu_idx=0,
         log_dir='logs'):
    data = load_data(data_seed)
    num_labels = 10

    N_warmstart = len(data['warmstart'])
    N_pool = len(data['pool'])

    _, warmstart_y = zip(*data['warmstart'])
    _, pool_y = zip(*data['pool'])
    _, eval_y = zip(*data[evaluation_set])

    try:
        l1 = load_baseline(f'odmr-l1-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l1_curve, l1_order = l1['curve'], l1['order']
        print(l1_curve, np.mean(l1_curve))
        l2 = load_baseline(f'odmr-l2-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l2_curve, l2_order = l2['curve'], l2['order']
        print(l2_curve, np.mean(l2_curve))
        l3 = load_baseline(f'odmr-l3-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l3_curve, l3_order = l3['curve'], l3['order']
        print(l3_curve, np.mean(l3_curve))
        l4 = load_baseline(f'odmr-l4-max-entropy', evaluation_set, model_seed,
                           data_seed, batchsize, max_epoch, patience, tot_acq)
        l4_curve, l4_order = l4['curve'], l4['order']
        print(l4_curve, np.mean(l4_curve))
    except KeyError:
        l1_curve, l1_order = odmr(data,
                                  evaluation_set,
                                  'test',
                                  True,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l1_curve, np.mean(l1_curve))
        store_baseline(l1_curve, l1_order, f'odmr-l1-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
        l2_curve, l2_order = odmr(data,
                                  evaluation_set,
                                  'accessible',
                                  True,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l2_curve, np.mean(l2_curve))
        store_baseline(l2_curve, l2_order, f'odmr-l2-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
        l3_curve, l3_order = odmr(data,
                                  evaluation_set,
                                  'test',
                                  False,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l3_curve, np.mean(l3_curve))
        store_baseline(l3_curve, l3_order, f'odmr-l3-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)
        l4_curve, l4_order = odmr(data,
                                  evaluation_set,
                                  'accessible',
                                  False,
                                  model_seed,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  smoothing=1)
        print(l4_curve, np.mean(l4_curve))
        store_baseline(l4_curve, l4_order, f'odmr-l4-max-entropy',
                       evaluation_set, model_seed, data_seed, batchsize,
                       max_epoch, patience, tot_acq)

    plt.figure(figsize=[20, 4])
    gs = GridSpec(ncols=9,
                  nrows=1,
                  width_ratios=[10, 0.3, 10, 0.7, 10, 0.7, 10, 0.7, 10],
                  wspace=0.05)
    plt.subplot(gs[0, 0])
    baselines = [('max-entropy', 'Max-Entropy', 0),
                 ('odmr-l1-max-entropy', 'Test + True', 1),
                 ('odmr-l2-max-entropy', 'Acce + True', 2),
                 ('odmr-l3-max-entropy', 'Test + Pred', 6),
                 ('odmr-l4-max-entropy', 'Acce + Pred', 8),
                 ('random', 'Random', 4)]
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    optimal_order, optimal_quality, _ = load_optimal(log_dir, model_seed,
                                                     data_seed, batchsize,
                                                     max_epoch, patience,
                                                     tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model_seed, model_seed,
                data_seed, batchsize, max_epoch, patience, tot_acq, None, None,
                baselines)
    plt.xlabel('# Data Points')
    plt.ylabel('Accuracy')
    plt.title('Object Classification')

    plt.subplot(gs[0, 2])
    plot_label_proportion(l1_order, warmstart_y, pool_y, eval_y)
    plt.title('Test + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 3])
    plot_ref_meter(eval_y)

    plt.subplot(gs[0, 4])
    plot_label_proportion(l2_order, warmstart_y, pool_y, eval_y)
    plt.title('Accessible + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 5])
    plot_ref_meter(eval_y)

    plt.subplot(gs[0, 6])
    plot_label_proportion(l3_order, warmstart_y, pool_y, eval_y)
    plt.title('Test + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 7])
    plot_ref_meter(eval_y)

    plt.subplot(gs[0, 8])
    plot_label_proportion(l4_order, warmstart_y, pool_y, eval_y)
    plt.title('Accessible + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.savefig('../figures/object_classification/odmr.pdf',
                bbox_inches='tight')
Esempio n. 13
0
    def render(self, mode='human', smooth_kernel_size=5, baseline=None):
        """
        Renders current state as a figure.
        """
        assert mode == 'human'
        sns.set(style='whitegrid')
        colors = [
            'tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple'
        ]
        plt.ion()
        plt.figure(0, dpi=40)
        plt.clf()

        experiments = [
            self._info_list_to_plot_metrics(
                self.info_list,
                label='Auto-learned',
                smooth_kernel_size=smooth_kernel_size)
        ]

        if baseline is None:
            try:
                experiments.append(
                    self._info_list_to_plot_metrics(
                        utils.load_baseline(self.dataset + '_' +
                                            self.architecture),
                        label='Baseline',
                        smooth_kernel_size=smooth_kernel_size))
            except:
                if not self.displayed_load_error:
                    print(
                        'Error: failed to load baseline experiment data. Run baselines.py to generate.'
                    )
                    self.displayed_load_error = True
        else:
            experiments.append(
                self._info_list_to_plot_metrics(
                    baseline.alrs.info_list,
                    label='Baseline',
                    smooth_kernel_size=smooth_kernel_size))

        plt.subplot(1, 3, 1)
        for i, (timeline, train_losses, val_losses, learning_rates,
                smoothed_train_losses, smoothed_val_losses,
                smoothed_learning_rates, label) in enumerate(experiments):
            if smoothed_train_losses is not None:
                plt.plot(timeline,
                         np.log(train_losses),
                         color=colors[i],
                         alpha=0.25)
                plt.plot(timeline,
                         np.log(smoothed_train_losses),
                         color=colors[i],
                         label=label)
            else:
                plt.plot(timeline,
                         np.log(train_losses),
                         color=colors[i],
                         label=label)
        plt.xlabel('Train steps')
        plt.ylabel('Log training loss')
        plt.legend(loc='upper right')

        plt.subplot(1, 3, 2)
        for i, (timeline, train_losses, val_losses, learning_rates,
                smoothed_train_losses, smoothed_val_losses,
                smoothed_learning_rates, label) in enumerate(experiments):
            if smoothed_val_losses is not None:
                plt.plot(timeline,
                         np.log(val_losses),
                         color=colors[i],
                         alpha=0.25)
                plt.plot(timeline,
                         np.log(smoothed_val_losses),
                         color=colors[i],
                         label=label)
            else:
                plt.plot(timeline,
                         np.log(val_losses),
                         color=colors[i],
                         label=label)
        plt.xlabel('Train steps')
        plt.ylabel('Log validation loss')
        plt.legend(loc='upper right')

        plt.subplot(1, 3, 3)
        for i, (timeline, train_losses, val_losses, learning_rates,
                smoothed_train_losses, smoothed_val_losses,
                smoothed_learning_rates, label) in enumerate(experiments):
            if i == 0 and smoothed_learning_rates is not None:
                plt.plot(timeline, learning_rates, color=colors[i], alpha=0.25)
                plt.plot(timeline,
                         smoothed_learning_rates,
                         color=colors[i],
                         label=label)
            else:
                plt.plot(timeline,
                         learning_rates,
                         color=colors[i],
                         label=label)
        plt.xlabel('Train steps')
        plt.ylabel('Learning rate')
        plt.legend(loc='upper right')

        last_step = len(self.info_list) == (self.num_train_steps //
                                            self.update_freq)

        plt.tight_layout()

        if last_step:
            path = 'results/'
            if not os.path.exists(path): os.makedirs(path)
            plt.savefig(path + 'experiment.png')

        plt.show()
        plt.draw()
        plt.pause(5 if last_step else 0.001)
Esempio n. 14
0
def main(criterion,
         model='lstm',
         model_seed=0,
         domain='alarm',
         data_seed=0,
         batchsize=20,
         max_epoch=100,
         patience=20,
         tot_acq=160,
         evaluation_set='test',
         gpu_idx=0,
         log_dir='logs'):
    data = pickle.load(open('data/TOP.pkl', 'rb'))[domain]
    num_labels = int(len(data['intent_label_mapping']) / 2)
    data = data['seeds'][data_seed]
    _, warmstart_labels = zip(*data['warmstart'])
    _, pool_labels = zip(*data['pool'])
    N_warmstart = len(data['warmstart'])
    N = len(data['pool'])
    try:
        l1 = load_baseline(f'odmr-l1-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l1_curve, l1_order = l1['curve'], l1['order']
        print(l1_curve, np.mean(l1_curve))
        l2 = load_baseline(f'odmr-l2-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l2_curve, l2_order = l2['curve'], l2['order']
        print(l2_curve, np.mean(l2_curve))
        l3 = load_baseline(f'odmr-l3-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l3_curve, l3_order = l3['curve'], l3['order']
        print(l3_curve, np.mean(l3_curve))
        l4 = load_baseline(f'odmr-l4-{criterion}', evaluation_set, model,
                           model_seed, domain, data_seed, batchsize, max_epoch,
                           patience, tot_acq)
        l4_curve, l4_order = l4['curve'], l4['order']
        print(l4_curve, np.mean(l4_curve))
    except KeyError:
        l1_curve, l1_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'test',
                                  True,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l1_curve, np.mean(l1_curve))
        store_baseline(l1_curve, l1_order, f'odmr-l1-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)
        l2_curve, l2_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'accessible',
                                  True,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l2_curve, np.mean(l2_curve))
        store_baseline(l2_curve, l2_order, f'odmr-l2-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)
        l3_curve, l3_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'test',
                                  False,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l3_curve, np.mean(l3_curve))
        store_baseline(l3_curve, l3_order, f'odmr-l3-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)
        l4_curve, l4_order = odmr(data,
                                  criterion,
                                  evaluation_set,
                                  'accessible',
                                  False,
                                  model,
                                  model_seed,
                                  domain,
                                  batchsize,
                                  max_epoch,
                                  patience,
                                  tot_acq,
                                  gpu_idx,
                                  num_labels,
                                  smoothing=1)
        print(l4_curve, np.mean(l4_curve))
        store_baseline(l4_curve, l4_order, f'odmr-l4-{criterion}',
                       evaluation_set, model, model_seed, domain, data_seed,
                       batchsize, max_epoch, patience, tot_acq)

    plt.figure(figsize=[20, 4])
    gs = GridSpec(ncols=9,
                  nrows=1,
                  width_ratios=[10, 0.3, 10, 0.7, 10, 0.7, 10, 0.7, 10],
                  wspace=0.05)
    plt.subplot(gs[0, 0])
    if criterion == 'max-entropy':
        baselines = [('max-entropy', 'Max-Entropy', 0),
                     ('odmr-l1-max-entropy', 'Test + True', 1),
                     ('odmr-l2-max-entropy', 'Acce + True', 2),
                     ('odmr-l3-max-entropy', 'Test + Pred', 6),
                     ('odmr-l4-max-entropy', 'Acce + Pred', 8),
                     ('random', 'Random', 4)]
    elif criterion == 'bald':
        baselines = [('bald', 'BALD', 0), ('odmr-l1-bald', 'Test + True', 1),
                     ('odmr-l2-bald', 'Acce + True', 2),
                     ('odmr-l3-bald', 'Test + Pred', 6),
                     ('odmr-l4-bald', 'Acce + Pred', 8),
                     ('random', 'Random', 4)]
    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1, batchsize))
    optimal_order, optimal_quality, _ = load_optimal(log_dir, model,
                                                     model_seed, domain,
                                                     data_seed, batchsize,
                                                     max_epoch, patience,
                                                     tot_acq)
    plot_curves(optimal_order, xs, evaluation_set, model, model_seed,
                model_seed, domain, data_seed, batchsize, max_epoch, patience,
                tot_acq, None, None, baselines)
    plt.xlabel('# Data Points')
    plt.ylabel('F1')
    plt.title('Intent Classification')

    xs = list(range(N_warmstart, N_warmstart + tot_acq + 1))
    _, test_labels = zip(*data['test'])
    test_label_counts = Counter(test_labels)
    test_label_counts = np.array(
        [test_label_counts[i] for i in range(num_labels)])
    label_ref_cdf = np.cumsum(test_label_counts / sum(test_label_counts))
    label_ref_cdf = list(label_ref_cdf.flat)
    label_ref_cdf.insert(0, 0)

    plt.subplot(gs[0, 2])
    plot_label_distribution(l1_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Test + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 3])
    plot_ref_meter(label_ref_cdf)

    plt.subplot(gs[0, 4])
    plot_label_distribution(l2_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Accessible + Groundtruth')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 5])
    plot_ref_meter(label_ref_cdf)

    plt.subplot(gs[0, 6])
    plot_label_distribution(l3_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Test + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.subplot(gs[0, 7])
    plot_ref_meter(label_ref_cdf)

    plt.subplot(gs[0, 8])
    plot_label_distribution(l4_order, warmstart_labels, pool_labels,
                            num_labels, label_ref_cdf, xs)
    plt.title('Accessible + Predicted')
    plt.xlabel('# Data Points')
    plt.yticks([])

    plt.savefig(f'../figures/intent_classification/odmr_{criterion}.pdf',
                bbox_inches='tight')
Esempio n. 15
0
import setproctitle
import pickle as pkl
import utils

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)
    import tensorflow as tf
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

if __name__ == '__main__':
    args = utils.parse_args()
    setproctitle.setproctitle('PPO2-ALRS')

    env = utils.make_alrs_env(args, test=True, baseline=True)
    baseline = utils.values_from_list_of_dicts(
        utils.load_baseline(args.dataset + '_' + args.architecture), 'lr')

    test_loss, test_acc = utils.AvgLoss(), utils.AvgLoss()
    num_runs = 10

    for run in range(num_runs):

        env.reset()
        done = False
        step = 0
        alrs = env.venv.envs[0].env

        while not done:

            action = np.array(baseline[step] / alrs.lr).reshape(1, )
            _, _, done, _ = env.step(action)
def main(model_seed, data_seed, batchsize, max_epoch, patience, tot_acq,
         log_dir):
    data = load_data(data_seed)
    warmstart = data['warmstart']
    pool = data['pool']
    test = data['test']

    optimal_order, _, _ = load_optimal(log_dir, model_seed, data_seed,
                                       batchsize, max_epoch, patience, tot_acq)
    bald_order = load_baseline('bald', 'test', model_seed, data_seed,
                               batchsize, max_epoch, patience,
                               tot_acq)['order']
    batchbald_order = load_baseline('batchbald', 'test', model_seed, data_seed,
                                    batchsize, max_epoch, patience,
                                    tot_acq)['order']
    random.seed(0)
    random_order = random.sample(range(len(pool)), len(pool))[:tot_acq]

    warmstart_X, warmstart_y = zip(*warmstart)
    warmstart_X = np.array(warmstart_X)
    pool_X, pool_y = zip(*pool)
    pool_X = np.array(pool_X)
    test_X, test_y = zip(*test)
    test_X = np.array(test_X)

    all_X = np.vstack((warmstart_X, pool_X, test_X[:2000]))
    all_X = all_X.reshape(len(all_X), -1)
    pca = PCA(n_components=100, random_state=np.random.RandomState(0))
    tsne = TSNE(n_components=2,
                n_jobs=-1,
                random_state=np.random.RandomState(0))
    all_X_2d = tsne.fit_transform(pca.fit_transform(all_X))
    warmstart_X_2d, pool_X_2d, test_X_2d = np.array_split(
        all_X_2d,
        [len(warmstart_X), len(warmstart_X) + len(pool_X)])

    plt.figure(figsize=[10, 7])
    gs = GridSpec(ncols=7,
                  nrows=2,
                  width_ratios=[10, 0.7, 10, 0.7, 10, 0.7, 10],
                  wspace=0.05,
                  height_ratios=[3, 2],
                  hspace=0.15)

    plt.subplot(gs[0, :3])
    plot_tsne(bald_order, warmstart_X_2d, warmstart_y, pool_X_2d, pool_y,
              test_X_2d, test_y[:2000], 5, batchsize)
    plt.title('BALD Order')

    plt.subplot(gs[0, 4:])
    plot_tsne(batchbald_order, warmstart_X_2d, warmstart_y, pool_X_2d, pool_y,
              test_X_2d, test_y[:2000], 5, batchsize)
    plt.title('BatchBALD Order')

    plt.subplot(gs[1, 0])
    plot_label_proportion(optimal_order, warmstart_y, pool_y, test_y)
    plt.title('Optimal Order')
    plt.ylabel('Label Distribution')

    plt.subplot(gs[1, 1])
    plot_ref_meter(test_y)

    plt.subplot(gs[1, 2])
    plot_label_proportion(bald_order, warmstart_y, pool_y, test_y)
    plt.title(f'BALD Order')
    plt.yticks([])

    plt.subplot(gs[1, 3])
    plot_ref_meter(test_y)

    plt.subplot(gs[1, 4])
    plot_label_proportion(batchbald_order, warmstart_y, pool_y, test_y)
    plt.title(f'BatchBALD Order')
    plt.yticks([])

    plt.subplot(gs[1, 5])
    plot_ref_meter(test_y)

    plt.subplot(gs[1, 6])
    plot_label_proportion(random_order, warmstart_y, pool_y, test_y)
    plt.title('Random Order')
    plt.yticks([])

    plt.savefig(
        f'../figures/object_classification/distribution_vis_bald_batchbald.pdf',
        bbox_inches='tight')