def main():
    """
    MNIST example
    weight norm reparameterized MLP with prior on rescaling parameters
    """

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--perdatapoint', action='store_true')
    parser.add_argument('--coupling', action='store_true')
    parser.add_argument('--size', default=10000, type=int)
    parser.add_argument('--lrdecay', action='store_true')
    parser.add_argument('--lr0', default=0.1, type=float)
    parser.add_argument('--lbda', default=0.01, type=float)
    parser.add_argument('--bs', default=50, type=int)
    args = parser.parse_args()
    print args

    perdatapoint = args.perdatapoint
    coupling = 1  #args.coupling
    lr0 = args.lr0
    lrdecay = args.lrdecay
    lbda = np.cast[floatX](args.lbda)
    bs = args.bs
    size = max(10, min(50000, args.size))
    clip_grad = 100
    max_norm = 100

    # load dataset
    filename = '/data/lisa/data/mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)

    input_var = T.matrix('input_var')
    target_var = T.matrix('target_var')
    dataset_size = T.scalar('dataset_size')
    lr = T.scalar('lr')

    # 784 -> 20 -> 10
    weight_shapes = [(784, 200), (200, 10)]

    num_params = sum(ws[1] for ws in weight_shapes)
    if perdatapoint:
        wd1 = input_var.shape[0]
    else:
        wd1 = 1

    # stochastic hypernet
    ep = srng.normal(std=0.01, size=(wd1, num_params), dtype=floatX)
    logdets_layers = []
    h_layer = lasagne.layers.InputLayer([None, num_params])

    layer_temp = LinearFlowLayer(h_layer)
    h_layer = IndexLayer(layer_temp, 0)
    logdets_layers.append(IndexLayer(layer_temp, 1))

    if coupling:
        layer_temp = CoupledDenseLayer(h_layer, 200)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

        h_layer = PermuteLayer(h_layer, num_params)

        layer_temp = CoupledDenseLayer(h_layer, 200)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

    weights = lasagne.layers.get_output(h_layer, ep)

    # primary net
    t = np.cast['int32'](0)
    layer = lasagne.layers.InputLayer([None, 784])
    inputs = {layer: input_var}
    for ws in weight_shapes:
        num_param = ws[1]
        w_layer = lasagne.layers.InputLayer((None, ws[1]))
        weight = weights[:, t:t + num_param].reshape((wd1, ws[1]))
        inputs[w_layer] = weight
        layer = stochasticDenseLayer2([layer, w_layer], ws[1])
        print layer.output_shape
        t += num_param

    layer.nonlinearity = nonlinearities.softmax
    y = T.clip(get_output(layer, inputs), 0.001, 0.999)  # stability

    # loss terms
    logdets = sum([get_output(logdet, ep) for logdet in logdets_layers])
    logqw = -(0.5 *
              (ep**2).sum(1) + 0.5 * T.log(2 * np.pi) * num_params + logdets)
    #logpw = log_normal(weights,0.,-T.log(lbda)).sum(1)
    logpw = log_stdnormal(weights).sum(1)
    kl = (logqw - logpw).mean()
    logpyx = -cc(y, target_var).mean()
    loss = -(logpyx - kl / T.cast(dataset_size, floatX))

    params = lasagne.layers.get_all_params([h_layer, layer])
    grads = T.grad(loss, params)
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
    updates = lasagne.updates.adam(cgrads, params, learning_rate=lr)

    train = theano.function([input_var, target_var, dataset_size, lr],
                            loss,
                            updates=updates)
    predict = theano.function([input_var], y.argmax(1))

    records = train_model(train, predict, train_x[:size], train_y[:size],
                          valid_x, valid_y, lr0, lrdecay, bs)
Example #2
0
    locals().update(args.__dict__)
    print args

    size = max(10,min(50000,args.size))
    print "size",size
    # TODO: these seem large!
    clip_grad = 100
    max_norm = 1000
    

    ###########################
    # load dataset
    # TODO
    #get_dataset(dataset)
    filename = '/data/lisa/data/mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)

    if 1:

    
    
    ###########################
    # theano variables
    input_var = T.matrix('input_var')
    target_var = T.matrix('target_var')
    dataset_size = T.scalar('dataset_size')
    lr = T.scalar('lr') 
    
    ###########################
    # primary net architecture
    ninp, nout = X.shape[1], Y.shape[1]
Example #3
0
def active_learning(acquisition_iterations):

    bh_iterations = 100
    nb_classes = 10
    Queries = 10
    all_accuracy = 0

    acquisition_iterations = 98

    filename = '../../mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)
    train_x = train_x.reshape(50000, 1, 28, 28)
    valid_x = valid_x.reshape(10000, 1, 28, 28)
    test_x = test_x.reshape(10000, 1, 28, 28)

    train_x, train_y, pool_x, pool_y = split_train_pool_data(train_x, train_y)

    train_y_multiclass = train_y.argmax(1)

    train_x, train_y = get_initial_training_data(train_x, train_y_multiclass)

    print("Initial Training Data", train_x.shape)

    model = HyperCNN(lbda=lbda,
                     perdatapoint=perdatapoint,
                     prior=prior,
                     kernel_width=4,
                     pad='valid',
                     stride=1,
                     coupling=coupling)

    recs = train_model(model.train_func, model.predict, train_x[:size],
                       train_y[:size], valid_x, valid_y, lr0, lrdecay, bs,
                       epochs)

    test_accuracy = test_model(model.predict_proba, test_x, test_y)

    all_accuracy = test_accuracy

    print("Training Set Size", train_x.shape)
    print("Test Accuracy", test_accuracy)

    for i in range(acquisition_iterations):

        print('POOLING ITERATION', i)
        pool_subset = 2000

        pool_subset_dropout = np.asarray(
            random.sample(range(0, pool_x.shape[0]), pool_subset))

        X_pool_Dropout = pool_x[pool_subset_dropout, :, :, :]
        y_pool_Dropout = pool_y[pool_subset_dropout]

        All_BH_Classes = np.zeros(shape=(X_pool_Dropout.shape[0], 1))

        for d in range(bh_iterations):
            bh_score = model.predict(X_pool_Dropout)
            bh_score = np.array([bh_score]).T
            All_BH_Classes = np.append(All_BH_Classes, bh_score, axis=1)

        Variation = np.zeros(shape=(X_pool_Dropout.shape[0]))

        for t in range(X_pool_Dropout.shape[0]):
            L = np.array([0])
            for d_iter in range(bh_iterations):
                L = np.append(L, All_BH_Classes[t, d_iter + 1])
            Predicted_Class, Mode = mode(L[1:])
            v = np.array([1 - Mode / float(bh_iterations)])
            Variation[t] = v

        sort_values = Variation.flatten()
        x_pool_index = sort_values.argsort()[-Queries:][::-1]

        Pooled_X = X_pool_Dropout[x_pool_index, :, :, :]
        Pooled_Y = y_pool_Dropout[x_pool_index]

        delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0)
        delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0)

        delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index),
                                          axis=0)
        delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index),
                                          axis=0)

        pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0)
        pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0)

        train_x = np.concatenate((train_x, Pooled_X), axis=0)
        train_y = np.concatenate((train_y, Pooled_Y), axis=0)

        if 0:  # don't warm start
            model = HyperCNN(lbda=lbda,
                             perdatapoint=perdatapoint,
                             prior=prior,
                             kernel_width=4,
                             pad='valid',
                             stride=1,
                             coupling=coupling)

        recs = train_model(model.train_func, model.predict, train_x[:size],
                           train_y[:size], valid_x, valid_y, lr0, lrdecay, bs,
                           epochs)

        test_accuracy = test_model(model.predict_proba, test_x, test_y)

        print("Training Set Size", train_x.shape)
        print("Test Accuracy", test_accuracy)

        all_accuracy = np.append(all_accuracy, test_accuracy)

    return all_accuracy
Example #4
0
def main():
    """
    MNIST example
    """

    import argparse

    parser = argparse.ArgumentParser()
    parser = argparse.ArgumentParser()
    parser.add_argument('--perdatapoint', action='store_true')
    parser.add_argument('--coupling', action='store_true')
    parser.add_argument('--size', default=10000, type=int)
    parser.add_argument('--lrdecay', action='store_true')
    parser.add_argument('--lr0', default=0.1, type=float)
    parser.add_argument('--lbda', default=10, type=float)
    parser.add_argument('--bs', default=50, type=int)
    args = parser.parse_args()
    print args

    perdatapoint = args.perdatapoint
    coupling = args.coupling
    size = max(10, min(50000, args.size))
    clip_grad = 10
    max_norm = 1000

    # load dataset
    filename = '/data/lisa/data/mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)

    input_var = T.matrix('input_var')
    target_var = T.matrix('target_var')
    dataset_size = T.scalar('dataset_size')
    lr = T.scalar('lr')

    # 784 -> 20 -> 10
    weight_shapes = [(784, 20), (20, 20), (20, 10)]

    num_params = sum(np.prod(ws) for ws in weight_shapes)
    if perdatapoint:
        wd1 = input_var.shape[0]
    else:
        wd1 = 1

    # stochastic hypernet
    ep = srng.normal(size=(wd1, num_params), dtype=floatX)
    logdets_layers = []
    h_layer = lasagne.layers.InputLayer([None, num_params])

    layer_temp = LinearFlowLayer(h_layer)
    h_layer = IndexLayer(layer_temp, 0)
    logdets_layers.append(IndexLayer(layer_temp, 1))

    if coupling:
        layer_temp = CoupledConv1DLayer(h_layer, 16, 5)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

        h_layer = PermuteLayer(h_layer, num_params)

        layer_temp = CoupledConv1DLayer(h_layer, 16, 5)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

    weights = lasagne.layers.get_output(h_layer, ep)

    # primary net
    t = np.cast['int32'](0)
    layer = lasagne.layers.InputLayer([None, 784])
    inputs = {layer: input_var}
    for ws in weight_shapes:
        num_param = np.prod(ws)
        print t, t + num_param
        w_layer = lasagne.layers.InputLayer((None, ) + ws)
        weight = weights[:, t:t + num_param].reshape((wd1, ) + ws)
        inputs[w_layer] = weight
        layer = stochasticDenseLayer([layer, w_layer], ws[1])
        t += num_param

    layer.nonlinearity = nonlinearities.softmax
    y = T.clip(get_output(layer, inputs), 0.001, 0.999)  # stability

    # loss terms
    logdets = sum([get_output(logdet, ep) for logdet in logdets_layers])
    logqw = -(0.5 *
              (ep**2).sum(1) + 0.5 * T.log(2 * np.pi) * num_params + logdets)
    logpw = log_stdnormal(weights).sum(1)
    kl = (logqw - logpw).mean()
    logpyx = -cc(y, target_var).mean()
    loss = -(logpyx - kl / T.cast(dataset_size, floatX))

    params = lasagne.layers.get_all_params([h_layer, layer])
    grads = T.grad(loss, params)
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
    updates = lasagne.updates.nesterov_momentum(cgrads,
                                                params,
                                                learning_rate=lr)

    train = theano.function([input_var, target_var, dataset_size, lr],
                            loss,
                            updates=updates)
    predict = theano.function([input_var], y.argmax(1))

    records = train_model(train, predict, train_x[:size], train_y[:size],
                          valid_x, valid_y)

    output_probs = theano.function([input_var], y)
    MCt = np.zeros((100, 1000, 10))
    MCv = np.zeros((100, 1000, 10))
    for i in range(100):
        MCt[i] = output_probs(train_x[:1000])
        MCv[i] = output_probs(valid_x[:1000])

    tr = np.equal(MCt.mean(0).argmax(-1), train_y[:1000].argmax(-1)).mean()
    va = np.equal(MCv.mean(0).argmax(-1), valid_y[:1000].argmax(-1)).mean()
    print "train perf=", tr
    print "valid perf=", va

    for ii in range(15):
        print np.round(MCt[ii][0] * 1000)
def main():
    """
    MNIST example
    weight norm reparameterized MLP with prior on rescaling parameters
    """

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--coupling', action='store_true')
    parser.add_argument('--size', default=10000, type=int)
    parser.add_argument('--lrdecay', action='store_true')
    parser.add_argument('--lr0', default=0.1, type=float)
    parser.add_argument('--lbda', default=0.01, type=float)
    parser.add_argument('--bs', default=50, type=int)
    args = parser.parse_args()
    print args

    coupling = args.coupling
    lr0 = args.lr0
    lrdecay = args.lrdecay
    lbda = np.cast[floatX](args.lbda)
    bs = args.bs
    size = max(10, min(50000, args.size))
    clip_grad = 5
    max_norm = 10

    # load dataset
    filename = '/data/lisa/data/mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)
    train_x = train_x.reshape(50000, 1, 28, 28)
    valid_x = valid_x.reshape(10000, 1, 28, 28)
    test_x = test_x.reshape(10000, 1, 28, 28)

    input_var = T.tensor4('input_var')
    target_var = T.matrix('target_var')
    dataset_size = T.scalar('dataset_size')
    lr = T.scalar('lr')

    # 784 -> 20 -> 10
    weight_shapes = [
        (16, 1, 5, 5),  # -> (None, 16, 14, 14)
        (16, 16, 5, 5),  # -> (None, 16,  7,  7)
        (16, 16, 5, 5)
    ]  # -> (None, 16,  4,  4)

    num_params = sum(np.prod(ws) for ws in weight_shapes) + 10
    wd1 = 1

    # stochastic hypernet
    ep = srng.normal(std=0.01, size=(wd1, num_params), dtype=floatX)
    logdets_layers = []
    h_layer = lasagne.layers.InputLayer([None, num_params])

    layer_temp = LinearFlowLayer(h_layer)
    h_layer = IndexLayer(layer_temp, 0)
    logdets_layers.append(IndexLayer(layer_temp, 1))

    if coupling:
        layer_temp = CoupledDenseLayer(h_layer, 200)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

        h_layer = PermuteLayer(h_layer, num_params)

        layer_temp = CoupledDenseLayer(h_layer, 200)
        h_layer = IndexLayer(layer_temp, 0)
        logdets_layers.append(IndexLayer(layer_temp, 1))

    weights = lasagne.layers.get_output(h_layer, ep)

    # primary net
    t = np.cast['int32'](0)
    layer = lasagne.layers.InputLayer([None, 1, 28, 28])
    inputs = {layer: input_var}
    for ws in weight_shapes:
        num_param = np.prod(ws)
        weight = weights[:, t:t + num_param].reshape(ws)
        num_filters = ws[0]
        filter_size = ws[2]
        stride = 2
        pad = 'same'
        layer = stochasticConv2DLayer([layer, weight], num_filters,
                                      filter_size, stride, pad)
        print layer.output_shape
        t += num_param

    w_layer = lasagne.layers.InputLayer((None, 10))
    weight = weights[:, t:t + 10].reshape((wd1, 10))
    inputs[w_layer] = weight
    layer = stochasticDenseLayer2([layer, w_layer],
                                  10,
                                  nonlinearity=nonlinearities.softmax)

    y = T.clip(get_output(layer, inputs), 0.001, 0.999)

    # loss terms
    logdets = sum([get_output(logdet, ep) for logdet in logdets_layers])
    logqw = -(0.5 *
              (ep**2).sum(1) + 0.5 * T.log(2 * np.pi) * num_params + logdets)
    logpw = log_normal(weights, 0., -T.log(lbda)).sum(1)
    #logpw = log_stdnormal(weights).sum(1)
    kl = (logqw - logpw).mean()
    logpyx = -cc(y, target_var).mean()
    loss = -(logpyx - kl / T.cast(dataset_size, floatX))

    params = lasagne.layers.get_all_params([layer])[1:]  # excluding rand state
    grads = T.grad(loss, params)

    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]
    updates = lasagne.updates.adam(cgrads, params, learning_rate=lr)

    train = theano.function([input_var, target_var, dataset_size, lr],
                            loss,
                            updates=updates)
    predict = theano.function([input_var], y.argmax(1))

    records = train_model(train, predict, train_x[:size], train_y[:size],
                          valid_x, valid_y, lr0, lrdecay, bs)

    output_probs = theano.function([input_var], y)
    MCt = np.zeros((100, 1000, 10))
    MCv = np.zeros((100, 1000, 10))
    for i in range(100):
        MCt[i] = output_probs(train_x[:1000])
        MCv[i] = output_probs(valid_x[:1000])

    tr = np.equal(MCt.mean(0).argmax(-1), train_y[:1000].argmax(-1)).mean()
    va = np.equal(MCv.mean(0).argmax(-1), valid_y[:1000].argmax(-1)).mean()
    print "train perf=", tr
    print "valid perf=", va

    for ii in range(15):
        print np.round(MCt[ii][0] * 1000)
Example #6
0
def active_learning(acquisition_iterations):

    bh_iterations = 1000
    nb_classes = 10
    Queries = 10
    all_accuracy = 0

    filename = '../../mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)

    train_x, train_y, valid_x, valid_y, pool_x, pool_y = split_train_pool_data(
        train_x, train_y, valid_x, valid_y)
    train_x, train_y = get_initial_training_data(train_x, train_y)

    print("Training Set Size", train_x.shape)

    model = MLPWeightNorm_BHN(lbda=lbda,
                              perdatapoint=perdatapoint,
                              prior=prior,
                              coupling=coupling)

    recs = train_model(model.train_func, model.predict, train_x[:size],
                       train_y[:size], valid_x, valid_y, lr0, lrdecay, bs,
                       epochs)

    test_accuracy = test_model(model.predict_proba, test_x, test_y)

    print("Test Accuracy", test_accuracy)

    all_accuracy = test_accuracy

    for i in range(acquisition_iterations):

        print('POOLING ITERATION', i)
        pool_subset = 2000

        pool_subset_dropout = np.asarray(
            random.sample(range(0, pool_x.shape[0]), pool_subset))

        X_pool_Dropout = pool_x[pool_subset_dropout, :]
        y_pool_Dropout = pool_y[pool_subset_dropout]

        score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes))

        for d in range(bh_iterations):
            bh_score = model.predict_proba(X_pool_Dropout)
            score_All = score_All + bh_score

        Avg_Pi = np.divide(score_All, bh_iterations)
        Log_Avg_Pi = np.log2(Avg_Pi)
        Entropy_Avg_Pi = -np.multiply(Avg_Pi, Log_Avg_Pi)
        Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1)

        U_X = Entropy_Average_Pi

        sort_values = U_X.flatten()
        x_pool_index = sort_values.argsort()[-Queries:][::-1]

        Pooled_X = X_pool_Dropout[x_pool_index, :]
        Pooled_Y = y_pool_Dropout[x_pool_index]

        delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0)
        delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0)

        delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index),
                                          axis=0)
        delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index),
                                          axis=0)

        pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0)
        pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0)

        train_x = np.concatenate((train_x, Pooled_X), axis=0)
        train_y = np.concatenate((train_y, Pooled_Y), axis=0)

        print("Training Set Size", train_x.shape)

        recs = train_model(model.train_func, model.predict, train_x[:size],
                           train_y[:size], valid_x, valid_y, lr0, lrdecay, bs,
                           epochs)

        test_accuracy = test_model(model.predict_proba, test_x, test_y)

        print("Test Accuracy", test_accuracy)

        all_accuracy = np.append(all_accuracy, test_accuracy)

    return all_accuracy
Example #7
0
def active_learning(acquisition_iterations):

    t0 = time.time()

    bh_iterations = 100
    nb_classes = 10
    Queries = 10
    all_accuracy = 0

    acquisition_iterations = 98

    filename = '../../mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)
    train_x = train_x.reshape(50000,1,28,28)
    valid_x = valid_x.reshape(10000,1,28,28)
    test_x = test_x.reshape(10000,1,28,28)
    train_x, train_y, pool_x, pool_y = split_train_pool_data(train_x, train_y)
    train_y_multiclass = train_y.argmax(1)
    train_x, train_y = get_initial_training_data(train_x, train_y_multiclass)
    train_y = train_y.astype('float32')
    print "Initial Training Data", train_x.shape

    # select model
    if arch == 'hyperCNN':
        model = HyperCNN(lbda=lbda,
                         perdatapoint=perdatapoint,
                         prior=prior,
                         coupling=coupling,
                         kernel_width=4,
                         pad='valid',
                         stride=1,
                         extra_linear=extra_linear)
                         #dataset=dataset)
    elif arch == 'CNN':
        model = MCdropoutCNN(kernel_width=4,
                         pad='valid',
                         stride=1)
    elif arch == 'CNN_spatial_dropout':
        model = MCdropoutCNN(dropout='spatial',
                         kernel_width=4,
                         pad='valid',
                         stride=1)
    elif arch == 'CNN_dropout':
        model = MCdropoutCNN(dropout=1,
                         kernel_width=4,
                         pad='valid',
                         stride=1)
    else:
        raise Exception('no model named `{}`'.format(model))
        
    if save:
        model.save(save_path + '_params_init.npy')

    # TODO: pretraining
    if params_reset == 'pretrained': # train the model to 100% train accuracy on the initial train set 
        # TODO: we could also try training to 100% every time...
        # TODO: and the last time, we should train until overfitting
        # TODO: we also need to consider cross-validating the prior (do we even USE a prior for the dropout net?? we're supposed to!!!)
        # TODO: ...and we could also use the validation set for early-stopping after every acquisition
        tr_acc = 0.
        epochs = 0
        print "pretraining..."
        while tr_acc < 1.:
            epochs += 1
            print "                     epoch", epochs
            tr_acc = train_epoch(model.train_func,model.predict,
                       train_x[:size],train_y[:size],
                       valid_x,valid_y,
                       lr0,lrdecay,bs)
        
        model.add_reset('pretrained')
        if save:
            model.save(save_path + '_params_pretrained.npy')
        print "pretraining completed"

    else:
        recs = train_model(model.train_func,model.predict,
                           train_x[:size],train_y[:size],
                           valid_x,valid_y,
                           lr0,lrdecay,bs,epochs)
    
    
            



   
    valid_accuracy = test_model(model.predict_proba, valid_x, valid_y)
    print "                                                          valid Accuracy", valid_accuracy
    all_valid_accuracy = valid_accuracy

    test_accuracy = test_model(model.predict_proba, test_x, test_y)
    print "                                                          Test Accuracy", test_accuracy
    all_accuracy = test_accuracy

    for i in range(acquisition_iterations):

        print'time', time.time() - t0
    	print'POOLING ITERATION', i
    	pool_subset = pool_size

    	pool_subset_dropout = np.asarray(random.sample(range(0,pool_x.shape[0]), pool_subset))

    	X_pool_Dropout = pool_x[pool_subset_dropout, :, :, :]
    	y_pool_Dropout = pool_y[pool_subset_dropout]



        #####################################3
        # BEGIN ACQUISITION
        if acq == 'bald':
    	    score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes))
            All_Entropy_BH = np.zeros(shape=X_pool_Dropout.shape[0])
            all_bh_classes = np.zeros(shape=(X_pool_Dropout.shape[0], bh_iterations))


            for d in range(bh_iterations):
                bh_score = model.predict_proba(X_pool_Dropout)
                score_All = score_All + bh_score

                bh_score_log = np.log2(bh_score)
                Entropy_Compute = - np.multiply(bh_score, bh_score_log)

                Entropy_Per_BH = np.sum(Entropy_Compute, axis=1)

                All_Entropy_BH = All_Entropy_BH + Entropy_Per_BH

                bh_classes = np.max(bh_score, axis=1)
                all_bh_classes[:, d] = bh_classes



            ### for plotting uncertainty
            predicted_class = np.max(all_bh_classes, axis=1)
            predicted_class_std = np.std(all_bh_classes, axis=1)

            Avg_Pi = np.divide(score_All, bh_iterations)
            Log_Avg_Pi = np.log2(Avg_Pi)
            Entropy_Avg_Pi = - np.multiply(Avg_Pi, Log_Avg_Pi)
            Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1)

            G_X = Entropy_Average_Pi

            Average_Entropy = np.divide(All_Entropy_BH, bh_iterations)
            F_X = Average_Entropy
            U_X = G_X - F_X
            sort_values = U_X.flatten()
            x_pool_index = sort_values.argsort()[-Queries:][::-1]
            #print x_pool_index.shape # 10
            #assert False

        elif acq == 'max_ent':
    	    score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes))
            for d in range(bh_iterations):
                bh_score = model.predict_proba(X_pool_Dropout)
                score_All = score_All + bh_score

            Avg_Pi = np.divide(score_All, bh_iterations)
            Log_Avg_Pi = np.log2(Avg_Pi)
            Entropy_Avg_Pi = - np.multiply(Avg_Pi, Log_Avg_Pi)
            Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1)

            U_X = Entropy_Average_Pi
            sort_values = U_X.flatten()
            x_pool_index = sort_values.argsort()[-Queries:][::-1]

        elif acq == 'var_ratio':
            All_BH_Classes = np.zeros(shape=(X_pool_Dropout.shape[0],1))

            for d in range(bh_iterations):
                bh_score = model.predict(X_pool_Dropout)
                bh_score = np.array([bh_score]).T
                All_BH_Classes = np.append(All_BH_Classes, bh_score, axis=1)


            Variation = np.zeros(shape=(X_pool_Dropout.shape[0]))

            for t in range(X_pool_Dropout.shape[0]):
                L = np.array([0])
                for d_iter in range(bh_iterations):
                    L = np.append(L, All_BH_Classes[t, d_iter+1])                      
                Predicted_Class, Mode = mode(L[1:])
                v = np.array(  [1 - Mode/float(bh_iterations)])
                Variation[t] = v     

            sort_values = Variation.flatten()
            x_pool_index = sort_values.argsort()[-Queries:][::-1]

        elif acq == 'mean_std':
            All_Dropout_Scores = np.zeros(shape=(X_Pool_Dropout.shape[0], nb_classes))

            for d in range(dropout_iterations):
                dropout_score = model.predict_stochastic(X_Pool_Dropout,batch_size=batch_size, verbose=1)
                All_Dropout_Scores = np.append(All_Dropout_Scores, dropout_score, axis=1)

            All_Std = np.zeros(shape=(X_Pool_Dropout.shape[0],nb_classes))
            BayesSegnet_Sigma = np.zeros(shape=(X_Pool_Dropout.shape[0],1)) 

            for t in range(X_Pool_Dropout.shape[0]):
                for r in range(nb_classes):
                    L = np.array([0])
                    L = np.append(L, All_Dropout_Scores[t, r+10])
                    
                    L_std = np.std(L[1:])
                    All_Std[t,r] = L_std
                    E = All_Std[t,:]
                    BayesSegnet_Sigma[t,0] = sum(E)

            a_1d = BayesSegnet_Sigma.flatten()
            x_pool_index = a_1d.argsort()[-Queries:][::-1]
            
        elif acq == 'random':
            #x_pool_index = np.asarray(random.sample(range(0, 38000), Queries))
            x_pool_index = np.random.choice(range(pool_size), Queries, replace=False)


        # END ACQUISITION
        #####################################3


        Pooled_X = X_pool_Dropout[x_pool_index, :, :, :]
        Pooled_Y = y_pool_Dropout[x_pool_index] 
        delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0)
        delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0)        
        delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index), axis=0)
        delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index), axis=0)
        pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0)
        pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0)
        train_x = np.concatenate((train_x, Pooled_X), axis=0)
        train_y = np.concatenate((train_y, Pooled_Y), axis=0).astype('float32')
        #print pool_x.shape, Pooled_X.shape, train_x.shape
        #assert False


        if params_reset == 'random':# don't warm start (TODO!)
            if arch == 'hyperCNN':
                model = HyperCNN(lbda=lbda,
                                 perdatapoint=perdatapoint,
                                 prior=prior,
                                 coupling=coupling,
                                 kernel_width=4,
                                 pad='valid',
                                 stride=1,
                                 extra_linear=extra_linear)
                                 #dataset=dataset)
            elif arch == 'CNN':
                model = MCdropoutCNN(kernel_width=4,
                                 pad='valid',
                                 stride=1)
            elif arch == 'CNN_spatial_dropout':
                model = MCdropoutCNN(dropout='spatial',
                                 kernel_width=4,
                                 pad='valid',
                                 stride=1)
            elif arch == 'CNN_dropout':
                model = MCdropoutCNN(dropout=1,
                                 kernel_width=4,
                                 pad='valid',
                                 stride=1)
            else:
                raise Exception('no model named `{}`'.format(model))
        elif params_reset == 'deterministic':
            model.call_reset('init')
        elif params_reset == 'pretrained':
            model.call_reset('pretrained')
    
        recs = train_model(model.train_func,model.predict,
	                       train_x[:size],train_y[:size],
	                       valid_x,valid_y,
	                       lr0,lrdecay,bs,epochs)
   

        valid_accuracy = test_model(model.predict_proba, valid_x, valid_y)   
        print "                                                          Valid Accuracy", valid_accuracy
        all_valid_accuracy = np.append(all_valid_accuracy, valid_accuracy)

        if test_eval:
            test_accuracy = test_model(model.predict_proba, test_x, test_y)   
            print "                                                          Test Accuracy", test_accuracy
            all_accuracy = np.append(all_accuracy, test_accuracy)

    return all_accuracy, all_valid_accuracy
    dataset = args.dataset
    anneal = args.anneal
    if args.prior == 'log_normal':
        prior = log_normal
    elif args.prior == 'log_laplace':
        prior = log_laplace
    else:
        raise Exception('no prior named `{}`'.format(args.prior))
    size = max(10, min(50000, args.size))

    print '\tloading dataset'
    if 1:
        if dataset == 'mnist':
            filename = '/data/lisa/data/mnist.pkl.gz'
            train_x, train_y, valid_x, valid_y, test_x, test_y = \
                load_mnist(filename)
            train_x = train_x.reshape((-1, 1, 28, 28))
            valid_x = valid_x.reshape((-1, 1, 28, 28))
            test_x = test_x.reshape((-1, 1, 28, 28))
            input_channels = 1
            input_shape = (1, 28, 28)
            n_classes = 10
            n_convlayers = 2
            n_channels = [20, 50]
            kernel_size = 5
            n_mlplayers = 1
            n_units = 500
            stride = 1
            pad = 'valid'
            nonl = rectify
            pool_per = 1
def active_learning(acquisition_iterations):

    bh_iterations = 100
    nb_classes = 10
    Queries = 10
    all_accuracy = 0

    acquisition_iterations = 98

    filename = '../../mnist.pkl.gz'
    train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename)
    train_x = train_x.reshape(50000,1,28,28)
    valid_x = valid_x.reshape(10000,1,28,28)
    test_x = test_x.reshape(10000,1,28,28)
        
    train_x, train_y, pool_x, pool_y = split_train_pool_data(train_x, train_y)

    train_y_multiclass = train_y.argmax(1)


    train_x, train_y = get_initial_training_data(train_x, train_y_multiclass)

    print ("Initial Training Data", train_x.shape)


    model = HyperCNN(lbda=lbda,
                              perdatapoint=perdatapoint,
                              prior=prior,
                              kernel_width=4,
                              pad='valid',
                              stride=1,
                              coupling=coupling)
    
    
    train_y = train_y.astype('float32')
    recs = train_model(model.train_func,model.predict,
                       train_x[:size],train_y[:size],
                       valid_x,valid_y,
                       lr0,lrdecay,bs,epochs)
   
    test_accuracy = test_model(model.predict_proba, test_x, test_y)

    print ("Test Accuracy", test_accuracy)

    all_accuracy = test_accuracy


    for i in range(acquisition_iterations):

    	print('POOLING ITERATION', i)
    	pool_subset = 2000

    	pool_subset_dropout = np.asarray(random.sample(range(0,pool_x.shape[0]), pool_subset))

    	X_pool_Dropout = pool_x[pool_subset_dropout, :, :, :]
    	y_pool_Dropout = pool_y[pool_subset_dropout]


    	score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes))
    	All_Entropy_BH = np.zeros(shape=X_pool_Dropout.shape[0])

    	all_bh_classes = np.zeros(shape=(X_pool_Dropout.shape[0], bh_iterations))


    	for d in range(bh_iterations):
    		bh_score = model.predict_proba(X_pool_Dropout)
    		score_All = score_All + bh_score

    		bh_score_log = np.log2(bh_score)
    		Entropy_Compute = - np.multiply(bh_score, bh_score_log)

    		Entropy_Per_BH = np.sum(Entropy_Compute, axis=1)

    		All_Entropy_BH = All_Entropy_BH + Entropy_Per_BH

    		bh_classes = np.max(bh_score, axis=1)
    		all_bh_classes[:, d] = bh_classes



        ### for plotting uncertainty
        predicted_class = np.max(all_bh_classes, axis=1)
        predicted_class_std = np.std(all_bh_classes, axis=1)

        Avg_Pi = np.divide(score_All, bh_iterations)
        Log_Avg_Pi = np.log2(Avg_Pi)
        Entropy_Avg_Pi = - np.multiply(Avg_Pi, Log_Avg_Pi)
        Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1)

        G_X = Entropy_Average_Pi

        Average_Entropy = np.divide(All_Entropy_BH, bh_iterations)
        F_X = Average_Entropy
        U_X = G_X - F_X

        sort_values = U_X.flatten()
        x_pool_index = sort_values.argsort()[-Queries:][::-1]


        Pooled_X = X_pool_Dropout[x_pool_index, :, :, :]
        Pooled_Y = y_pool_Dropout[x_pool_index] 

        delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0)
        delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0)        

        delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index), axis=0)
        delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index), axis=0)


        pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0)
        pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0)



        train_x = np.concatenate((train_x, Pooled_X), axis=0)
        train_y = np.concatenate((train_y, Pooled_Y), axis=0).astype('float32')


        if 0:# don't warm start
            model = HyperCNN(lbda=lbda,
                              perdatapoint=perdatapoint,
                              prior=prior,
                              kernel_width=4,
                              pad='valid',
                              stride=1,
                              coupling=coupling)
    
        recs = train_model(model.train_func,model.predict,
	                       train_x[:size],train_y[:size],
	                       valid_x,valid_y,
	                       lr0,lrdecay,bs,epochs)
   

        test_accuracy = test_model(model.predict_proba, test_x, test_y)   

        print ("Test Accuracy", test_accuracy)

        all_accuracy = np.append(all_accuracy, test_accuracy)


    return all_accuracy