def create_embedder_fn(glimpses): X = T.tensor4("input") l_in = InputLayer(shape=(None, 1, 32, 32), input_var=X) l_arc = SimpleARC(l_in, lstm_states=512, image_size=32, attn_win=4, glimpses=glimpses, fg_bias_init=0.0) embedding = get_output(l_arc, deterministic=True) embedding_fn = theano.function([X], outputs=embedding) params = deserialize('ARC_OS' + '.params') helper.set_all_param_values(l_arc, params[:2]) return embedding_fn
0: 0.0001, 10: 0.00001, 20: 0.000001 } """ Set up all theano functions """ X = T.tensor4('X') Y = T.ivector('y') # set up theano functions to generate output by feeding data through network, any test outputs should be deterministic net = inception_v3(X) # load network weights d = pickle.load(open('data/pre_trained_weights/inception_v3.pkl')) helper.set_all_param_values(net['softmax'], d['param values']) # stack our own softmax onto the final layer output_layer = DenseLayer(net['pool3'], num_units=10, W=lasagne.init.HeNormal(), nonlinearity=softmax) # standard output functions output_train = lasagne.layers.get_output(output_layer) output_test = lasagne.layers.get_output(output_layer, deterministic=True) # set up the loss that we aim to minimize, when using cat cross entropy our Y should be ints not one-hot loss = lasagne.objectives.categorical_crossentropy(output_train, Y) loss = loss.mean() # set up loss functions for validation dataset valid_loss = lasagne.objectives.categorical_crossentropy(output_test, Y) valid_loss = valid_loss.mean()
if width > 1: output_layer = ResNet(X, n=depth, k=width) else: output_layer = ResNet(X, n=depth) output_test = lasagne.layers.get_output(output_layer, deterministic=True) # set up training and prediction functions features = theano.function(inputs=[X], outputs=output_test) output_caffe = lasagne.layers.get_output(resnet['output']) features_caffe = theano.function(inputs=[X], output=output_caffe) # load network weights f = gzip.open('models/data/weights/resnet164_fullpreactivation.pklz', 'r') all_params = pickle.load(f) f.close() helper.set_all_param_values(output_layer, all_params) test_imgs, train_imgs = get_seperation() #train_imgs = train_imgs[0:37] #test_imgs = test_imgs[0:37] img_train, img_eval = get_traineval_seperation(train_imgs) class_ = 'sheep' w1 = 0.01 * np.random.rand(10) w2 = 0.01 * np.random.rand(4096) w3 = 0.01 * np.random.rand(1000) scaler1 = MinMaxScaler() scaler2 = MinMaxScaler() scaler3 = MinMaxScaler() #normalize for img_nr in img_train: #load image
output_class = T.argmax(output_test, axis=1) # set up training and prediction functions predict_proba = theano.function(inputs=[X], outputs=output_test) predict_class = theano.function(inputs=[X], outputs=output_class) ''' Load data and make predictions ''' test_X, test_y = load_pickle_data_test() # load network weights f = gzip.open('data/weights/%s%d_resnet.pklz'%(variant,depth), 'rb') all_params = pickle.load(f) f.close() helper.set_all_param_values(output_layer, all_params) #make predictions pred_labels = [] for j in range((test_X.shape[0] + BATCHSIZE - 1) // BATCHSIZE): sl = slice(j * BATCHSIZE, (j + 1) * BATCHSIZE) X_batch = test_X[sl] pred_labels.extend(predict_class(X_batch)) pred_labels = np.array(pred_labels) print pred_labels.shape ''' Compare differences ''' same = 0
center_y = gp[:, 0].dimshuffle([0, 'x']) center_x = gp[:, 1].dimshuffle([0, 'x']) delta = 1.0 - T.abs_(gp[:, 2]).dimshuffle([0, 'x']) gamma = T.exp(1.0 - 2 * T.abs_(gp[:, 2])).dimshuffle([0, 'x', 'x']) center_y = (image_size - 1) * (center_y + 1.0) / 2.0 center_x = (image_size - 1) * (center_x + 1.0) / 2.0 delta = image_size / attn_win * delta GPs.extend([center_y, center_x, delta]) embedding_fn = theano.function([X], outputs=GPs) params = deserialize(expt_name + '.params') helper.set_all_param_values(l_arc, params[:2]) worker = OmniglotOS(image_size=image_size, batch_size=batch_size) while (1): X_sample, _ = worker.fetch_batch('val') G = embedding_fn(X_sample) G = np.array(G) G = G.reshape(2 * glimpses, 3, batch_size) g = G[:, :, 0] I1 = X_sample[0, 0] I2 = X_sample[2, 0] fig_axs = plt.subplots(2, glimpses)
def main(): # load model and parameters output_layer = lasagne_model() f = gzip.open('data/weights.pklz', 'rb') all_params = pickle.load(f) f.close() X = T.ftensor4() Y = T.fmatrix() # set up theano functions to generate output by feeding data through network output_layer = lasagne_model() output_train = lasagne.layers.get_output(output_layer, X) output_valid = lasagne.layers.get_output(output_layer, X, deterministic=True) # set up the loss that we aim to minimize loss_train = T.mean(T.nnet.categorical_crossentropy(output_train, Y)) loss_valid = T.mean(T.nnet.categorical_crossentropy(output_valid, Y)) # prediction functions for classifications pred = T.argmax(output_train, axis=1) pred_valid = T.argmax(output_valid, axis=1) # get parameters from network and set up sgd with nesterov momentum to update parameters helper.set_all_param_values(output_layer, all_params) params = lasagne.layers.get_all_params(output_layer) updates = nesterov_momentum(loss_train, params, learning_rate=0.0001, momentum=0.9) # set up training and prediction functions train = theano.function(inputs=[X, Y], outputs=loss_train, updates=updates, allow_input_downcast=True) valid = theano.function(inputs=[X, Y], outputs=loss_valid, allow_input_downcast=True) predict_valid = theano.function(inputs=[X], outputs=pred_valid, allow_input_downcast=True) # fine tune network train_X, test_X, train_y, test_y = load_data_cv('data/train.csv') train_eval = [] valid_eval = [] valid_acc = [] try: for i in range(5): train_loss = batch_iterator_no_aug(train_X, train_y, BATCHSIZE, train) train_eval.append(train_loss) valid_loss = valid(test_X, test_y) valid_eval.append(valid_loss) acc = np.mean(np.argmax(test_y, axis=1) == predict_valid(test_X)) valid_acc.append(acc) print 'iter:', i, '| Tloss:', train_loss, '| Vloss:', valid_loss, '| valid acc:', acc except KeyboardInterrupt: pass # after training create output for kaggle testing_inputs = load_test_data('data/test.csv') predictions = [] for j in range((testing_inputs.shape[0] + BATCHSIZE - 1) // BATCHSIZE): sl = slice(j * BATCHSIZE, (j + 1) * BATCHSIZE) X_batch = testing_inputs[sl] predictions.extend(predict_valid(X_batch)) out = pd.read_csv('data/convnet_preds.csv') out['Label'] = predictions out.to_csv('preds/convnet_preds.csv', index=False)
PIXELS = 224 imageSize = PIXELS * PIXELS num_features = imageSize """ Set up all theano functions """ X = T.tensor4('X') Y = T.ivector('y') # set up theano functions to generate output by feeding data through network, any test outputs should be deterministic net = bvlc_googlenet(X) # load network weights d = pickle.load(open('data/pre_trained_weights/blvc_googlenet.pkl')) helper.set_all_param_values(net['prob'], d['param values']) # stack our own softmax onto the final layer output_layer = DenseLayer(net['pool5/7x7_s1'], num_units=10, W=lasagne.init.HeNormal(), nonlinearity=softmax) # standard output functions output_train = lasagne.layers.get_output(output_layer) output_test = lasagne.layers.get_output(output_layer, deterministic=True) # set up the loss that we aim to minimize, when using cat cross entropy our Y should be ints not one-hot loss = lasagne.objectives.categorical_crossentropy(output_train, Y) loss = loss.mean() # L2 regularization all_layers = lasagne.layers.get_all_layers(output_layer) l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * 0.0001
loss = loss + l2_penalty params = get_all_params(l_y, trainable=True) updates = adam(loss, params, learning_rate=learning_rate) meta_data["num_param"] = lasagne.layers.count_params(l_y) print "number of parameters: ", meta_data["num_param"] print "... compiling" train_fn = theano.function(inputs=[X, y], outputs=loss, updates=updates) val_fn = theano.function(inputs=[X, y], outputs=[loss, accuracy]) op_fn = theano.function([X], outputs=prediction_clean) print "... loading dataset" if meta_data["dataset"] == 'omniglot': worker = OmniglotOS(image_size=image_size, batch_size=batch_size) elif meta_data["dataset"] == 'lfw': worker = LFWVerif(image_size=image_size, batch_size=batch_size) meta_data, best_params = train(train_fn, val_fn, worker, meta_data, \ get_params=lambda: helper.get_all_param_values(l_y)) if meta_data["testing"]: print "... testing" helper.set_all_param_values(l_y, best_params) meta_data = test(val_fn, worker, meta_data) serialize(params, expt_name + '.params') serialize(meta_data, expt_name + '.mtd') serialize(op_fn, expt_name + '.opf')
def main(): # load model and parameters output_layer = lasagne_model() f = gzip.open('data/weights.pklz', 'rb') all_params = pickle.load(f) f.close() X = T.ftensor4() Y = T.fmatrix() # set up theano functions to generate output by feeding data through network output_layer = lasagne_model() output_train = lasagne.layers.get_output(output_layer, X) output_valid = lasagne.layers.get_output(output_layer, X, deterministic=True) # set up the loss that we aim to minimize loss_train = T.mean(T.nnet.categorical_crossentropy(output_train, Y)) loss_valid = T.mean(T.nnet.categorical_crossentropy(output_valid, Y)) # prediction functions for classifications pred = T.argmax(output_train, axis=1) pred_valid = T.argmax(output_valid, axis=1) # get parameters from network and set up sgd with nesterov momentum to update parameters helper.set_all_param_values(output_layer, all_params) params = lasagne.layers.get_all_params(output_layer) updates = nesterov_momentum(loss_train, params, learning_rate=0.0001, momentum=0.9) # set up training and prediction functions train = theano.function(inputs=[X, Y], outputs=loss_train, updates=updates, allow_input_downcast=True) valid = theano.function(inputs=[X, Y], outputs=loss_valid, allow_input_downcast=True) predict_valid = theano.function(inputs=[X], outputs=pred_valid, allow_input_downcast=True) # fine tune network train_X, test_X, train_y, test_y = load_data_cv('data/train.csv') train_eval = [] valid_eval = [] valid_acc = [] try: for i in range(5): train_loss = batch_iterator_no_aug(train_X, train_y, BATCHSIZE, train) train_eval.append(train_loss) valid_loss = valid(test_X, test_y) valid_eval.append(valid_loss) acc = np.mean(np.argmax(test_y, axis=1) == predict_valid(test_X)) valid_acc.append(acc) print 'iter:', i, '| Tloss:', train_loss, '| Vloss:', valid_loss, '| valid acc:', acc except KeyboardInterrupt: pass # after training create output for kaggle testing_inputs = load_test_data('data/test.csv') predictions = [] for j in range((testing_inputs.shape[0] + BATCHSIZE -1) // BATCHSIZE): sl = slice(j * BATCHSIZE, (j + 1) * BATCHSIZE) X_batch = testing_inputs[sl] predictions.extend(predict_valid(X_batch)) out = pd.read_csv('data/convnet_preds.csv') out['Label'] = predictions out.to_csv('preds/convnet_preds.csv', index = False)