def main(): """ MNIST example weight norm reparameterized MLP with prior on rescaling parameters """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--perdatapoint', action='store_true') parser.add_argument('--coupling', action='store_true') parser.add_argument('--size', default=10000, type=int) parser.add_argument('--lrdecay', action='store_true') parser.add_argument('--lr0', default=0.1, type=float) parser.add_argument('--lbda', default=0.01, type=float) parser.add_argument('--bs', default=50, type=int) args = parser.parse_args() print args perdatapoint = args.perdatapoint coupling = 1 #args.coupling lr0 = args.lr0 lrdecay = args.lrdecay lbda = np.cast[floatX](args.lbda) bs = args.bs size = max(10, min(50000, args.size)) clip_grad = 100 max_norm = 100 # load dataset filename = '/data/lisa/data/mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) input_var = T.matrix('input_var') target_var = T.matrix('target_var') dataset_size = T.scalar('dataset_size') lr = T.scalar('lr') # 784 -> 20 -> 10 weight_shapes = [(784, 200), (200, 10)] num_params = sum(ws[1] for ws in weight_shapes) if perdatapoint: wd1 = input_var.shape[0] else: wd1 = 1 # stochastic hypernet ep = srng.normal(std=0.01, size=(wd1, num_params), dtype=floatX) logdets_layers = [] h_layer = lasagne.layers.InputLayer([None, num_params]) layer_temp = LinearFlowLayer(h_layer) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) if coupling: layer_temp = CoupledDenseLayer(h_layer, 200) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) h_layer = PermuteLayer(h_layer, num_params) layer_temp = CoupledDenseLayer(h_layer, 200) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) weights = lasagne.layers.get_output(h_layer, ep) # primary net t = np.cast['int32'](0) layer = lasagne.layers.InputLayer([None, 784]) inputs = {layer: input_var} for ws in weight_shapes: num_param = ws[1] w_layer = lasagne.layers.InputLayer((None, ws[1])) weight = weights[:, t:t + num_param].reshape((wd1, ws[1])) inputs[w_layer] = weight layer = stochasticDenseLayer2([layer, w_layer], ws[1]) print layer.output_shape t += num_param layer.nonlinearity = nonlinearities.softmax y = T.clip(get_output(layer, inputs), 0.001, 0.999) # stability # loss terms logdets = sum([get_output(logdet, ep) for logdet in logdets_layers]) logqw = -(0.5 * (ep**2).sum(1) + 0.5 * T.log(2 * np.pi) * num_params + logdets) #logpw = log_normal(weights,0.,-T.log(lbda)).sum(1) logpw = log_stdnormal(weights).sum(1) kl = (logqw - logpw).mean() logpyx = -cc(y, target_var).mean() loss = -(logpyx - kl / T.cast(dataset_size, floatX)) params = lasagne.layers.get_all_params([h_layer, layer]) grads = T.grad(loss, params) mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm) cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads] updates = lasagne.updates.adam(cgrads, params, learning_rate=lr) train = theano.function([input_var, target_var, dataset_size, lr], loss, updates=updates) predict = theano.function([input_var], y.argmax(1)) records = train_model(train, predict, train_x[:size], train_y[:size], valid_x, valid_y, lr0, lrdecay, bs)
locals().update(args.__dict__) print args size = max(10,min(50000,args.size)) print "size",size # TODO: these seem large! clip_grad = 100 max_norm = 1000 ########################### # load dataset # TODO #get_dataset(dataset) filename = '/data/lisa/data/mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) if 1: ########################### # theano variables input_var = T.matrix('input_var') target_var = T.matrix('target_var') dataset_size = T.scalar('dataset_size') lr = T.scalar('lr') ########################### # primary net architecture ninp, nout = X.shape[1], Y.shape[1]
def active_learning(acquisition_iterations): bh_iterations = 100 nb_classes = 10 Queries = 10 all_accuracy = 0 acquisition_iterations = 98 filename = '../../mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) train_x = train_x.reshape(50000, 1, 28, 28) valid_x = valid_x.reshape(10000, 1, 28, 28) test_x = test_x.reshape(10000, 1, 28, 28) train_x, train_y, pool_x, pool_y = split_train_pool_data(train_x, train_y) train_y_multiclass = train_y.argmax(1) train_x, train_y = get_initial_training_data(train_x, train_y_multiclass) print("Initial Training Data", train_x.shape) model = HyperCNN(lbda=lbda, perdatapoint=perdatapoint, prior=prior, kernel_width=4, pad='valid', stride=1, coupling=coupling) recs = train_model(model.train_func, model.predict, train_x[:size], train_y[:size], valid_x, valid_y, lr0, lrdecay, bs, epochs) test_accuracy = test_model(model.predict_proba, test_x, test_y) all_accuracy = test_accuracy print("Training Set Size", train_x.shape) print("Test Accuracy", test_accuracy) for i in range(acquisition_iterations): print('POOLING ITERATION', i) pool_subset = 2000 pool_subset_dropout = np.asarray( random.sample(range(0, pool_x.shape[0]), pool_subset)) X_pool_Dropout = pool_x[pool_subset_dropout, :, :, :] y_pool_Dropout = pool_y[pool_subset_dropout] All_BH_Classes = np.zeros(shape=(X_pool_Dropout.shape[0], 1)) for d in range(bh_iterations): bh_score = model.predict(X_pool_Dropout) bh_score = np.array([bh_score]).T All_BH_Classes = np.append(All_BH_Classes, bh_score, axis=1) Variation = np.zeros(shape=(X_pool_Dropout.shape[0])) for t in range(X_pool_Dropout.shape[0]): L = np.array([0]) for d_iter in range(bh_iterations): L = np.append(L, All_BH_Classes[t, d_iter + 1]) Predicted_Class, Mode = mode(L[1:]) v = np.array([1 - Mode / float(bh_iterations)]) Variation[t] = v sort_values = Variation.flatten() x_pool_index = sort_values.argsort()[-Queries:][::-1] Pooled_X = X_pool_Dropout[x_pool_index, :, :, :] Pooled_Y = y_pool_Dropout[x_pool_index] delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0) delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0) delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index), axis=0) delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index), axis=0) pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0) pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0) train_x = np.concatenate((train_x, Pooled_X), axis=0) train_y = np.concatenate((train_y, Pooled_Y), axis=0) if 0: # don't warm start model = HyperCNN(lbda=lbda, perdatapoint=perdatapoint, prior=prior, kernel_width=4, pad='valid', stride=1, coupling=coupling) recs = train_model(model.train_func, model.predict, train_x[:size], train_y[:size], valid_x, valid_y, lr0, lrdecay, bs, epochs) test_accuracy = test_model(model.predict_proba, test_x, test_y) print("Training Set Size", train_x.shape) print("Test Accuracy", test_accuracy) all_accuracy = np.append(all_accuracy, test_accuracy) return all_accuracy
def main(): """ MNIST example """ import argparse parser = argparse.ArgumentParser() parser = argparse.ArgumentParser() parser.add_argument('--perdatapoint', action='store_true') parser.add_argument('--coupling', action='store_true') parser.add_argument('--size', default=10000, type=int) parser.add_argument('--lrdecay', action='store_true') parser.add_argument('--lr0', default=0.1, type=float) parser.add_argument('--lbda', default=10, type=float) parser.add_argument('--bs', default=50, type=int) args = parser.parse_args() print args perdatapoint = args.perdatapoint coupling = args.coupling size = max(10, min(50000, args.size)) clip_grad = 10 max_norm = 1000 # load dataset filename = '/data/lisa/data/mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) input_var = T.matrix('input_var') target_var = T.matrix('target_var') dataset_size = T.scalar('dataset_size') lr = T.scalar('lr') # 784 -> 20 -> 10 weight_shapes = [(784, 20), (20, 20), (20, 10)] num_params = sum(np.prod(ws) for ws in weight_shapes) if perdatapoint: wd1 = input_var.shape[0] else: wd1 = 1 # stochastic hypernet ep = srng.normal(size=(wd1, num_params), dtype=floatX) logdets_layers = [] h_layer = lasagne.layers.InputLayer([None, num_params]) layer_temp = LinearFlowLayer(h_layer) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) if coupling: layer_temp = CoupledConv1DLayer(h_layer, 16, 5) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) h_layer = PermuteLayer(h_layer, num_params) layer_temp = CoupledConv1DLayer(h_layer, 16, 5) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) weights = lasagne.layers.get_output(h_layer, ep) # primary net t = np.cast['int32'](0) layer = lasagne.layers.InputLayer([None, 784]) inputs = {layer: input_var} for ws in weight_shapes: num_param = np.prod(ws) print t, t + num_param w_layer = lasagne.layers.InputLayer((None, ) + ws) weight = weights[:, t:t + num_param].reshape((wd1, ) + ws) inputs[w_layer] = weight layer = stochasticDenseLayer([layer, w_layer], ws[1]) t += num_param layer.nonlinearity = nonlinearities.softmax y = T.clip(get_output(layer, inputs), 0.001, 0.999) # stability # loss terms logdets = sum([get_output(logdet, ep) for logdet in logdets_layers]) logqw = -(0.5 * (ep**2).sum(1) + 0.5 * T.log(2 * np.pi) * num_params + logdets) logpw = log_stdnormal(weights).sum(1) kl = (logqw - logpw).mean() logpyx = -cc(y, target_var).mean() loss = -(logpyx - kl / T.cast(dataset_size, floatX)) params = lasagne.layers.get_all_params([h_layer, layer]) grads = T.grad(loss, params) mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm) cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads] updates = lasagne.updates.nesterov_momentum(cgrads, params, learning_rate=lr) train = theano.function([input_var, target_var, dataset_size, lr], loss, updates=updates) predict = theano.function([input_var], y.argmax(1)) records = train_model(train, predict, train_x[:size], train_y[:size], valid_x, valid_y) output_probs = theano.function([input_var], y) MCt = np.zeros((100, 1000, 10)) MCv = np.zeros((100, 1000, 10)) for i in range(100): MCt[i] = output_probs(train_x[:1000]) MCv[i] = output_probs(valid_x[:1000]) tr = np.equal(MCt.mean(0).argmax(-1), train_y[:1000].argmax(-1)).mean() va = np.equal(MCv.mean(0).argmax(-1), valid_y[:1000].argmax(-1)).mean() print "train perf=", tr print "valid perf=", va for ii in range(15): print np.round(MCt[ii][0] * 1000)
def main(): """ MNIST example weight norm reparameterized MLP with prior on rescaling parameters """ import argparse parser = argparse.ArgumentParser() parser.add_argument('--coupling', action='store_true') parser.add_argument('--size', default=10000, type=int) parser.add_argument('--lrdecay', action='store_true') parser.add_argument('--lr0', default=0.1, type=float) parser.add_argument('--lbda', default=0.01, type=float) parser.add_argument('--bs', default=50, type=int) args = parser.parse_args() print args coupling = args.coupling lr0 = args.lr0 lrdecay = args.lrdecay lbda = np.cast[floatX](args.lbda) bs = args.bs size = max(10, min(50000, args.size)) clip_grad = 5 max_norm = 10 # load dataset filename = '/data/lisa/data/mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) train_x = train_x.reshape(50000, 1, 28, 28) valid_x = valid_x.reshape(10000, 1, 28, 28) test_x = test_x.reshape(10000, 1, 28, 28) input_var = T.tensor4('input_var') target_var = T.matrix('target_var') dataset_size = T.scalar('dataset_size') lr = T.scalar('lr') # 784 -> 20 -> 10 weight_shapes = [ (16, 1, 5, 5), # -> (None, 16, 14, 14) (16, 16, 5, 5), # -> (None, 16, 7, 7) (16, 16, 5, 5) ] # -> (None, 16, 4, 4) num_params = sum(np.prod(ws) for ws in weight_shapes) + 10 wd1 = 1 # stochastic hypernet ep = srng.normal(std=0.01, size=(wd1, num_params), dtype=floatX) logdets_layers = [] h_layer = lasagne.layers.InputLayer([None, num_params]) layer_temp = LinearFlowLayer(h_layer) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) if coupling: layer_temp = CoupledDenseLayer(h_layer, 200) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) h_layer = PermuteLayer(h_layer, num_params) layer_temp = CoupledDenseLayer(h_layer, 200) h_layer = IndexLayer(layer_temp, 0) logdets_layers.append(IndexLayer(layer_temp, 1)) weights = lasagne.layers.get_output(h_layer, ep) # primary net t = np.cast['int32'](0) layer = lasagne.layers.InputLayer([None, 1, 28, 28]) inputs = {layer: input_var} for ws in weight_shapes: num_param = np.prod(ws) weight = weights[:, t:t + num_param].reshape(ws) num_filters = ws[0] filter_size = ws[2] stride = 2 pad = 'same' layer = stochasticConv2DLayer([layer, weight], num_filters, filter_size, stride, pad) print layer.output_shape t += num_param w_layer = lasagne.layers.InputLayer((None, 10)) weight = weights[:, t:t + 10].reshape((wd1, 10)) inputs[w_layer] = weight layer = stochasticDenseLayer2([layer, w_layer], 10, nonlinearity=nonlinearities.softmax) y = T.clip(get_output(layer, inputs), 0.001, 0.999) # loss terms logdets = sum([get_output(logdet, ep) for logdet in logdets_layers]) logqw = -(0.5 * (ep**2).sum(1) + 0.5 * T.log(2 * np.pi) * num_params + logdets) logpw = log_normal(weights, 0., -T.log(lbda)).sum(1) #logpw = log_stdnormal(weights).sum(1) kl = (logqw - logpw).mean() logpyx = -cc(y, target_var).mean() loss = -(logpyx - kl / T.cast(dataset_size, floatX)) params = lasagne.layers.get_all_params([layer])[1:] # excluding rand state grads = T.grad(loss, params) mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm) cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads] updates = lasagne.updates.adam(cgrads, params, learning_rate=lr) train = theano.function([input_var, target_var, dataset_size, lr], loss, updates=updates) predict = theano.function([input_var], y.argmax(1)) records = train_model(train, predict, train_x[:size], train_y[:size], valid_x, valid_y, lr0, lrdecay, bs) output_probs = theano.function([input_var], y) MCt = np.zeros((100, 1000, 10)) MCv = np.zeros((100, 1000, 10)) for i in range(100): MCt[i] = output_probs(train_x[:1000]) MCv[i] = output_probs(valid_x[:1000]) tr = np.equal(MCt.mean(0).argmax(-1), train_y[:1000].argmax(-1)).mean() va = np.equal(MCv.mean(0).argmax(-1), valid_y[:1000].argmax(-1)).mean() print "train perf=", tr print "valid perf=", va for ii in range(15): print np.round(MCt[ii][0] * 1000)
def active_learning(acquisition_iterations): bh_iterations = 1000 nb_classes = 10 Queries = 10 all_accuracy = 0 filename = '../../mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) train_x, train_y, valid_x, valid_y, pool_x, pool_y = split_train_pool_data( train_x, train_y, valid_x, valid_y) train_x, train_y = get_initial_training_data(train_x, train_y) print("Training Set Size", train_x.shape) model = MLPWeightNorm_BHN(lbda=lbda, perdatapoint=perdatapoint, prior=prior, coupling=coupling) recs = train_model(model.train_func, model.predict, train_x[:size], train_y[:size], valid_x, valid_y, lr0, lrdecay, bs, epochs) test_accuracy = test_model(model.predict_proba, test_x, test_y) print("Test Accuracy", test_accuracy) all_accuracy = test_accuracy for i in range(acquisition_iterations): print('POOLING ITERATION', i) pool_subset = 2000 pool_subset_dropout = np.asarray( random.sample(range(0, pool_x.shape[0]), pool_subset)) X_pool_Dropout = pool_x[pool_subset_dropout, :] y_pool_Dropout = pool_y[pool_subset_dropout] score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes)) for d in range(bh_iterations): bh_score = model.predict_proba(X_pool_Dropout) score_All = score_All + bh_score Avg_Pi = np.divide(score_All, bh_iterations) Log_Avg_Pi = np.log2(Avg_Pi) Entropy_Avg_Pi = -np.multiply(Avg_Pi, Log_Avg_Pi) Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1) U_X = Entropy_Average_Pi sort_values = U_X.flatten() x_pool_index = sort_values.argsort()[-Queries:][::-1] Pooled_X = X_pool_Dropout[x_pool_index, :] Pooled_Y = y_pool_Dropout[x_pool_index] delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0) delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0) delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index), axis=0) delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index), axis=0) pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0) pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0) train_x = np.concatenate((train_x, Pooled_X), axis=0) train_y = np.concatenate((train_y, Pooled_Y), axis=0) print("Training Set Size", train_x.shape) recs = train_model(model.train_func, model.predict, train_x[:size], train_y[:size], valid_x, valid_y, lr0, lrdecay, bs, epochs) test_accuracy = test_model(model.predict_proba, test_x, test_y) print("Test Accuracy", test_accuracy) all_accuracy = np.append(all_accuracy, test_accuracy) return all_accuracy
def active_learning(acquisition_iterations): t0 = time.time() bh_iterations = 100 nb_classes = 10 Queries = 10 all_accuracy = 0 acquisition_iterations = 98 filename = '../../mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) train_x = train_x.reshape(50000,1,28,28) valid_x = valid_x.reshape(10000,1,28,28) test_x = test_x.reshape(10000,1,28,28) train_x, train_y, pool_x, pool_y = split_train_pool_data(train_x, train_y) train_y_multiclass = train_y.argmax(1) train_x, train_y = get_initial_training_data(train_x, train_y_multiclass) train_y = train_y.astype('float32') print "Initial Training Data", train_x.shape # select model if arch == 'hyperCNN': model = HyperCNN(lbda=lbda, perdatapoint=perdatapoint, prior=prior, coupling=coupling, kernel_width=4, pad='valid', stride=1, extra_linear=extra_linear) #dataset=dataset) elif arch == 'CNN': model = MCdropoutCNN(kernel_width=4, pad='valid', stride=1) elif arch == 'CNN_spatial_dropout': model = MCdropoutCNN(dropout='spatial', kernel_width=4, pad='valid', stride=1) elif arch == 'CNN_dropout': model = MCdropoutCNN(dropout=1, kernel_width=4, pad='valid', stride=1) else: raise Exception('no model named `{}`'.format(model)) if save: model.save(save_path + '_params_init.npy') # TODO: pretraining if params_reset == 'pretrained': # train the model to 100% train accuracy on the initial train set # TODO: we could also try training to 100% every time... # TODO: and the last time, we should train until overfitting # TODO: we also need to consider cross-validating the prior (do we even USE a prior for the dropout net?? we're supposed to!!!) # TODO: ...and we could also use the validation set for early-stopping after every acquisition tr_acc = 0. epochs = 0 print "pretraining..." while tr_acc < 1.: epochs += 1 print " epoch", epochs tr_acc = train_epoch(model.train_func,model.predict, train_x[:size],train_y[:size], valid_x,valid_y, lr0,lrdecay,bs) model.add_reset('pretrained') if save: model.save(save_path + '_params_pretrained.npy') print "pretraining completed" else: recs = train_model(model.train_func,model.predict, train_x[:size],train_y[:size], valid_x,valid_y, lr0,lrdecay,bs,epochs) valid_accuracy = test_model(model.predict_proba, valid_x, valid_y) print " valid Accuracy", valid_accuracy all_valid_accuracy = valid_accuracy test_accuracy = test_model(model.predict_proba, test_x, test_y) print " Test Accuracy", test_accuracy all_accuracy = test_accuracy for i in range(acquisition_iterations): print'time', time.time() - t0 print'POOLING ITERATION', i pool_subset = pool_size pool_subset_dropout = np.asarray(random.sample(range(0,pool_x.shape[0]), pool_subset)) X_pool_Dropout = pool_x[pool_subset_dropout, :, :, :] y_pool_Dropout = pool_y[pool_subset_dropout] #####################################3 # BEGIN ACQUISITION if acq == 'bald': score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes)) All_Entropy_BH = np.zeros(shape=X_pool_Dropout.shape[0]) all_bh_classes = np.zeros(shape=(X_pool_Dropout.shape[0], bh_iterations)) for d in range(bh_iterations): bh_score = model.predict_proba(X_pool_Dropout) score_All = score_All + bh_score bh_score_log = np.log2(bh_score) Entropy_Compute = - np.multiply(bh_score, bh_score_log) Entropy_Per_BH = np.sum(Entropy_Compute, axis=1) All_Entropy_BH = All_Entropy_BH + Entropy_Per_BH bh_classes = np.max(bh_score, axis=1) all_bh_classes[:, d] = bh_classes ### for plotting uncertainty predicted_class = np.max(all_bh_classes, axis=1) predicted_class_std = np.std(all_bh_classes, axis=1) Avg_Pi = np.divide(score_All, bh_iterations) Log_Avg_Pi = np.log2(Avg_Pi) Entropy_Avg_Pi = - np.multiply(Avg_Pi, Log_Avg_Pi) Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1) G_X = Entropy_Average_Pi Average_Entropy = np.divide(All_Entropy_BH, bh_iterations) F_X = Average_Entropy U_X = G_X - F_X sort_values = U_X.flatten() x_pool_index = sort_values.argsort()[-Queries:][::-1] #print x_pool_index.shape # 10 #assert False elif acq == 'max_ent': score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes)) for d in range(bh_iterations): bh_score = model.predict_proba(X_pool_Dropout) score_All = score_All + bh_score Avg_Pi = np.divide(score_All, bh_iterations) Log_Avg_Pi = np.log2(Avg_Pi) Entropy_Avg_Pi = - np.multiply(Avg_Pi, Log_Avg_Pi) Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1) U_X = Entropy_Average_Pi sort_values = U_X.flatten() x_pool_index = sort_values.argsort()[-Queries:][::-1] elif acq == 'var_ratio': All_BH_Classes = np.zeros(shape=(X_pool_Dropout.shape[0],1)) for d in range(bh_iterations): bh_score = model.predict(X_pool_Dropout) bh_score = np.array([bh_score]).T All_BH_Classes = np.append(All_BH_Classes, bh_score, axis=1) Variation = np.zeros(shape=(X_pool_Dropout.shape[0])) for t in range(X_pool_Dropout.shape[0]): L = np.array([0]) for d_iter in range(bh_iterations): L = np.append(L, All_BH_Classes[t, d_iter+1]) Predicted_Class, Mode = mode(L[1:]) v = np.array( [1 - Mode/float(bh_iterations)]) Variation[t] = v sort_values = Variation.flatten() x_pool_index = sort_values.argsort()[-Queries:][::-1] elif acq == 'mean_std': All_Dropout_Scores = np.zeros(shape=(X_Pool_Dropout.shape[0], nb_classes)) for d in range(dropout_iterations): dropout_score = model.predict_stochastic(X_Pool_Dropout,batch_size=batch_size, verbose=1) All_Dropout_Scores = np.append(All_Dropout_Scores, dropout_score, axis=1) All_Std = np.zeros(shape=(X_Pool_Dropout.shape[0],nb_classes)) BayesSegnet_Sigma = np.zeros(shape=(X_Pool_Dropout.shape[0],1)) for t in range(X_Pool_Dropout.shape[0]): for r in range(nb_classes): L = np.array([0]) L = np.append(L, All_Dropout_Scores[t, r+10]) L_std = np.std(L[1:]) All_Std[t,r] = L_std E = All_Std[t,:] BayesSegnet_Sigma[t,0] = sum(E) a_1d = BayesSegnet_Sigma.flatten() x_pool_index = a_1d.argsort()[-Queries:][::-1] elif acq == 'random': #x_pool_index = np.asarray(random.sample(range(0, 38000), Queries)) x_pool_index = np.random.choice(range(pool_size), Queries, replace=False) # END ACQUISITION #####################################3 Pooled_X = X_pool_Dropout[x_pool_index, :, :, :] Pooled_Y = y_pool_Dropout[x_pool_index] delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0) delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0) delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index), axis=0) delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index), axis=0) pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0) pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0) train_x = np.concatenate((train_x, Pooled_X), axis=0) train_y = np.concatenate((train_y, Pooled_Y), axis=0).astype('float32') #print pool_x.shape, Pooled_X.shape, train_x.shape #assert False if params_reset == 'random':# don't warm start (TODO!) if arch == 'hyperCNN': model = HyperCNN(lbda=lbda, perdatapoint=perdatapoint, prior=prior, coupling=coupling, kernel_width=4, pad='valid', stride=1, extra_linear=extra_linear) #dataset=dataset) elif arch == 'CNN': model = MCdropoutCNN(kernel_width=4, pad='valid', stride=1) elif arch == 'CNN_spatial_dropout': model = MCdropoutCNN(dropout='spatial', kernel_width=4, pad='valid', stride=1) elif arch == 'CNN_dropout': model = MCdropoutCNN(dropout=1, kernel_width=4, pad='valid', stride=1) else: raise Exception('no model named `{}`'.format(model)) elif params_reset == 'deterministic': model.call_reset('init') elif params_reset == 'pretrained': model.call_reset('pretrained') recs = train_model(model.train_func,model.predict, train_x[:size],train_y[:size], valid_x,valid_y, lr0,lrdecay,bs,epochs) valid_accuracy = test_model(model.predict_proba, valid_x, valid_y) print " Valid Accuracy", valid_accuracy all_valid_accuracy = np.append(all_valid_accuracy, valid_accuracy) if test_eval: test_accuracy = test_model(model.predict_proba, test_x, test_y) print " Test Accuracy", test_accuracy all_accuracy = np.append(all_accuracy, test_accuracy) return all_accuracy, all_valid_accuracy
dataset = args.dataset anneal = args.anneal if args.prior == 'log_normal': prior = log_normal elif args.prior == 'log_laplace': prior = log_laplace else: raise Exception('no prior named `{}`'.format(args.prior)) size = max(10, min(50000, args.size)) print '\tloading dataset' if 1: if dataset == 'mnist': filename = '/data/lisa/data/mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = \ load_mnist(filename) train_x = train_x.reshape((-1, 1, 28, 28)) valid_x = valid_x.reshape((-1, 1, 28, 28)) test_x = test_x.reshape((-1, 1, 28, 28)) input_channels = 1 input_shape = (1, 28, 28) n_classes = 10 n_convlayers = 2 n_channels = [20, 50] kernel_size = 5 n_mlplayers = 1 n_units = 500 stride = 1 pad = 'valid' nonl = rectify pool_per = 1
def active_learning(acquisition_iterations): bh_iterations = 100 nb_classes = 10 Queries = 10 all_accuracy = 0 acquisition_iterations = 98 filename = '../../mnist.pkl.gz' train_x, train_y, valid_x, valid_y, test_x, test_y = load_mnist(filename) train_x = train_x.reshape(50000,1,28,28) valid_x = valid_x.reshape(10000,1,28,28) test_x = test_x.reshape(10000,1,28,28) train_x, train_y, pool_x, pool_y = split_train_pool_data(train_x, train_y) train_y_multiclass = train_y.argmax(1) train_x, train_y = get_initial_training_data(train_x, train_y_multiclass) print ("Initial Training Data", train_x.shape) model = HyperCNN(lbda=lbda, perdatapoint=perdatapoint, prior=prior, kernel_width=4, pad='valid', stride=1, coupling=coupling) train_y = train_y.astype('float32') recs = train_model(model.train_func,model.predict, train_x[:size],train_y[:size], valid_x,valid_y, lr0,lrdecay,bs,epochs) test_accuracy = test_model(model.predict_proba, test_x, test_y) print ("Test Accuracy", test_accuracy) all_accuracy = test_accuracy for i in range(acquisition_iterations): print('POOLING ITERATION', i) pool_subset = 2000 pool_subset_dropout = np.asarray(random.sample(range(0,pool_x.shape[0]), pool_subset)) X_pool_Dropout = pool_x[pool_subset_dropout, :, :, :] y_pool_Dropout = pool_y[pool_subset_dropout] score_All = np.zeros(shape=(X_pool_Dropout.shape[0], nb_classes)) All_Entropy_BH = np.zeros(shape=X_pool_Dropout.shape[0]) all_bh_classes = np.zeros(shape=(X_pool_Dropout.shape[0], bh_iterations)) for d in range(bh_iterations): bh_score = model.predict_proba(X_pool_Dropout) score_All = score_All + bh_score bh_score_log = np.log2(bh_score) Entropy_Compute = - np.multiply(bh_score, bh_score_log) Entropy_Per_BH = np.sum(Entropy_Compute, axis=1) All_Entropy_BH = All_Entropy_BH + Entropy_Per_BH bh_classes = np.max(bh_score, axis=1) all_bh_classes[:, d] = bh_classes ### for plotting uncertainty predicted_class = np.max(all_bh_classes, axis=1) predicted_class_std = np.std(all_bh_classes, axis=1) Avg_Pi = np.divide(score_All, bh_iterations) Log_Avg_Pi = np.log2(Avg_Pi) Entropy_Avg_Pi = - np.multiply(Avg_Pi, Log_Avg_Pi) Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1) G_X = Entropy_Average_Pi Average_Entropy = np.divide(All_Entropy_BH, bh_iterations) F_X = Average_Entropy U_X = G_X - F_X sort_values = U_X.flatten() x_pool_index = sort_values.argsort()[-Queries:][::-1] Pooled_X = X_pool_Dropout[x_pool_index, :, :, :] Pooled_Y = y_pool_Dropout[x_pool_index] delete_Pool_X = np.delete(pool_x, (pool_subset_dropout), axis=0) delete_Pool_Y = np.delete(pool_y, (pool_subset_dropout), axis=0) delete_Pool_X_Dropout = np.delete(X_pool_Dropout, (x_pool_index), axis=0) delete_Pool_Y_Dropout = np.delete(y_pool_Dropout, (x_pool_index), axis=0) pool_x = np.concatenate((pool_x, X_pool_Dropout), axis=0) pool_y = np.concatenate((pool_y, y_pool_Dropout), axis=0) train_x = np.concatenate((train_x, Pooled_X), axis=0) train_y = np.concatenate((train_y, Pooled_Y), axis=0).astype('float32') if 0:# don't warm start model = HyperCNN(lbda=lbda, perdatapoint=perdatapoint, prior=prior, kernel_width=4, pad='valid', stride=1, coupling=coupling) recs = train_model(model.train_func,model.predict, train_x[:size],train_y[:size], valid_x,valid_y, lr0,lrdecay,bs,epochs) test_accuracy = test_model(model.predict_proba, test_x, test_y) print ("Test Accuracy", test_accuracy) all_accuracy = np.append(all_accuracy, test_accuracy) return all_accuracy