def load_cifar10(): from pylearn2.utils import serial from pylearn2.datasets.zca_dataset import ZCA_Dataset # from pylearn2.datasets.cifar10 import CIFAR10 import theano def rotate_and_convert_grayscale(img): reshaped = img.reshape(32, 32, 3, order="F") rotated = np.rot90(reshaped, k=3) grayscaled = np.dot(rotated[:, :, :3], [0.299, 0.587, 0.144]) return grayscaled def transform(img_set): result = [] # Convert all images to grayscale and flatten the shape for img in img_set: # result.append(rotate_and_convert_grayscale(img).ravel()) result.append(img.ravel()) return np.array(result) # train_set = CIFAR10(which_set='train', start=0, stop=45000) # valid_set = CIFAR10(which_set='train', start=45000, stop=50000) # test_set = CIFAR10(which_set='test') data_path = os.getenv("PYLEARN2_DATA_PATH") whitened_path = os.path.join(data_path, "cifar10_cpu", "pylearn2_gcn_whitened") preprocessed_train_dataset = serial.load(os.path.join(whitened_path, "train.pkl")) preprocessed_test_dataset = serial.load(os.path.join(whitened_path, "test.pkl")) preprocesssor = serial.load(os.path.join(whitened_path, "preprocessor.pkl")) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) # Convert the images to grayscale and flatten them train_set.X = transform(train_set.X) valid_set.X = transform(valid_set.X) test_set.X = transform(test_set.X) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, "int32") train_set_tuple = ( theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(train_set.y.ravel()), ) valid_set_tuple = ( theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(valid_set.y.ravel()), ) test_set_tuple = ( theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(test_set.y.ravel()), ) return [train_set_tuple, valid_set_tuple, test_set_tuple]
def load_cifar10(cifar_path, confidence_ascending=None): from pylearn2.datasets.zca_dataset import ZCA_Dataset from pylearn2.utils import serial import theano import theano.tensor as T def flatten(img_set): result = [] for img in img_set: result.append(img.ravel()) return np.array(result) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, "int32") whitened_path = os.path.join(cifar_path, "pylearn2_gcn_whitened") preprocessed_train_dataset = serial.load(os.path.join(whitened_path, "train.pkl")) preprocessed_test_dataset = serial.load(os.path.join(whitened_path, "test.pkl")) preprocesssor = serial.load(os.path.join(whitened_path, "preprocessor.pkl")) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) if confidence_ascending is not None: X_new = np.empty_like(train_set.X) y_new = np.empty_like(train_set.y) for i in range(len(X_new)): label = int(train_set.y[i]) index_new = confidence_ascending[label].pop(0) X_new[i] = train_set.X[index_new] y_new[i] = train_set.y[index_new] train_set.X = X_new train_set.y = y_new train_set.X = flatten(train_set.X) valid_set.X = flatten(valid_set.X) test_set.X = flatten(test_set.X) train_set_tuple = ( theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(train_set.y.ravel()), ) valid_set_tuple = ( theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(valid_set.y.ravel()), ) test_set_tuple = ( theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(test_set.y.ravel()), ) return [train_set_tuple, valid_set_tuple, test_set_tuple]
"${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor=preprocessor) # bc01 format # print train_set.X.shape train_set.X = train_set.X.reshape(45000, 3, 32, 32) valid_set.X = valid_set.X.reshape(5000, 3, 32, 32) test_set.X = test_set.X.reshape(10000, 3, 32, 32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) valid_set.y = np.float32(np.eye(10)[valid_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) # for hinge loss train_set.y = 2 * train_set.y - 1.
preprocessor = serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=0, stop = 45000) valid_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=45000, stop = 50000) test_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor = preprocessor) # bc01 format # print train_set.X.shape train_set.X = train_set.X.reshape(45000,3,32,32) valid_set.X = valid_set.X.reshape(5000,3,32,32) test_set.X = test_set.X.reshape(10000,3,32,32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) valid_set.y = np.float32(np.eye(10)[valid_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) # for hinge loss train_set.y = 2* train_set.y - 1.
# print("Preprocessing the test data") # test_set.apply_preprocessor(preprocessor=preprocessor, can_fit=False) # # print("Saving the test data") # test_set.use_design_loc(output_dir+'/test.npy') # serial.save(output_dir+'/test.pkl', test_set) train_set = serial.load(os.path.join(output_dir, 'train.pkl')) test_set = serial.load(os.path.join(output_dir, 'test.pkl')) preprocessor = serial.load(os.path.join(output_dir, 'preprocessor.pkl')) train_set = ZCA_Dataset(train_set, preprocessor, 0, 50000) test_set = ZCA_Dataset(test_set, preprocessor) train_set.X = train_set.X.reshape(-1, 3, 32, 32) test_set.X = test_set.X.reshape(-1, 3, 32, 32) # flatten targets train_set.y = np.hstack(train_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) np.savez_compressed(os.path.join(output_dir, 'x_train'), train_set.X) np.savez_compressed(os.path.join(output_dir, 'y_train'), train_set.y) np.savez_compressed(os.path.join(output_dir, 'x_test'), test_set.X) np.savez_compressed(os.path.join(output_dir, 'y_test'), test_set.y)
print("LR_decay = "+str(LR_decay)) print("Loading CIFAR-10 dataset...") path = '${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/' preprocessor = serial.load(path+'preprocessor.pkl') train_set = ZCA_Dataset(preprocessed_dataset=serial.load(path+'train.pkl'), preprocessor=preprocessor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_dataset=serial.load(path+'train.pkl'), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_dataset=serial.load(path+'test.pkl'), preprocessor=preprocessor) # bc01 format train_set.X = train_set.X.reshape(-1, 3, 32, 32) valid_set.X = valid_set.X.reshape(-1, 3, 32, 32) test_set.X = test_set.X.reshape(-1, 3, 32, 32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) valid_set.y = np.float32(np.eye(10)[valid_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) # for hinge loss train_set.y = 2 * train_set.y - 1.
def read_cifar10_data(): ''' require Theano==0.80 version and pylearn2 ''' from pylearn2.datasets.zca_dataset import ZCA_Dataset from pylearn2.utils import serial train_set_size = 45000 preprocessor = serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=0, stop=train_set_size) valid_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor=preprocessor) import pdb pdb.set_trace() train_set.X = train_set.X.reshape(-1, 3, 32, 32) valid_set.X = valid_set.X.reshape(-1, 3, 32, 32) test_set.X = test_set.X.reshape(-1, 3, 32, 32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) valid_set.y = np.float32(np.eye(10)[valid_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) # for hinge loss train_set.y = 2 * train_set.y - 1. valid_set.y = 2 * valid_set.y - 1. test_set.y = 2 * test_set.y - 1. train_set.X = train_set.X.astype(np.float32) valid_set.X = valid_set.X.astype(np.float32) test_set.X = test_set.X.astype(np.float32) train_set.y = train_set.y.astype(np.float32) valid_set.y = valid_set.y.astype(np.float32) test_set.y = test_set.y.astype(np.float32) x_train = train_set.X y_train = train_set.y x_validate = valid_set.X y_validate = valid_set.y x_test = test_set.X y_test = test_set.y # Reorder the indices of the array. x_train = x_train.transpose([0, 2, 3, 1]) x_validate = x_validate.transpose([0, 2, 3, 1]) x_test = x_test.transpose([0, 2, 3, 1])
def main(method,LR_start): name = "cifar100" print("dataset = "+str(name)) print("Method = "+str(method)) # alpha is the exponential moving average factor alpha = .1 print("alpha = "+str(alpha)) epsilon = 1e-4 print("epsilon = "+str(epsilon)) # Training parameters batch_size = 100 print("batch_size = "+str(batch_size)) num_epochs = 200 print("num_epochs = "+str(num_epochs)) print("LR_start = "+str(LR_start)) LR_decay = 0.5 print("LR_decay="+str(LR_decay)) activation = lasagne.nonlinearities.rectify train_set_size = 45000 print("train_set_size = "+str(train_set_size)) print('Loading CIFAR-100 dataset...') preprocessor = serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=0, stop = train_set_size) valid_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=45000, stop = 50000) test_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/test.pkl"), preprocessor = preprocessor) # bc01 format train_set.X = train_set.X.reshape(-1,3,32,32) valid_set.X = valid_set.X.reshape(-1,3,32,32) test_set.X = test_set.X.reshape(-1,3,32,32) # flatten targets train_set.y = np.int32(np.hstack(train_set.y)) valid_set.y = np.int32(np.hstack(valid_set.y)) test_set.y = np.int32(np.hstack(test_set.y)) print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.ivector('targets') LR = T.scalar('LR', dtype=theano.config.floatX) l_in = lasagne.layers.InputLayer( shape=(None, 3, 32, 32), input_var=input) # 128C3-128C3-P2 l_cnn1 = laq.Conv2DLayer( l_in, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn1 = batch_norm.BatchNormLayer( l_cnn1, epsilon=epsilon, alpha=alpha) l_nl1 = lasagne.layers.NonlinearityLayer( l_bn1, nonlinearity = activation) l_cnn2 = laq.Conv2DLayer( l_nl1, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2)) l_bn2 = batch_norm.BatchNormLayer( l_mp1, epsilon=epsilon, alpha=alpha) l_nl2 = lasagne.layers.NonlinearityLayer( l_bn2, nonlinearity = activation) # 256C3-256C3-P2 l_cnn3 = laq.Conv2DLayer( l_nl2, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn3 = batch_norm.BatchNormLayer( l_cnn3, epsilon=epsilon, alpha=alpha) l_nl3 = lasagne.layers.NonlinearityLayer( l_bn3, nonlinearity = activation) l_cnn4 = laq.Conv2DLayer( l_nl3, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2)) l_bn4 = batch_norm.BatchNormLayer( l_mp2, epsilon=epsilon, alpha=alpha) l_nl4 = lasagne.layers.NonlinearityLayer( l_bn4, nonlinearity = activation) # 512C3-512C3-P2 l_cnn5 = laq.Conv2DLayer( l_nl4, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn5 = batch_norm.BatchNormLayer( l_cnn5, epsilon=epsilon, alpha=alpha) l_nl5 = lasagne.layers.NonlinearityLayer( l_bn5, nonlinearity = activation) l_cnn6 = laq.Conv2DLayer( l_nl5, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2)) l_bn6 = batch_norm.BatchNormLayer( l_mp3, epsilon=epsilon, alpha=alpha) l_nl6 = lasagne.layers.NonlinearityLayer( l_bn6, nonlinearity = activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP l_dn1 = laq.DenseLayer( l_nl6, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn7 = batch_norm.BatchNormLayer( l_dn1, epsilon=epsilon, alpha=alpha) l_nl7 = lasagne.layers.NonlinearityLayer( l_bn7, nonlinearity = activation) l_dn2 = laq.DenseLayer( l_nl7, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn8 = batch_norm.BatchNormLayer( l_dn2, epsilon=epsilon, alpha=alpha) l_nl8 = lasagne.layers.NonlinearityLayer( l_bn8, nonlinearity = activation) l_dn3 = laq.DenseLayer( l_nl8, nonlinearity=lasagne.nonlinearities.identity, num_units=100, method = method) l_out = lasagne.layers.NonlinearityLayer(l_dn3, nonlinearity=lasagne.nonlinearities.softmax) train_output = lasagne.layers.get_output(l_out, deterministic=False) loss = categorical_crossentropy(train_output, target).mean() if method!="FPN": # W updates W = lasagne.layers.get_all_params(l_out, quantized=True) W_grads = laq.compute_grads(loss,l_out) updates = optimizer.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = laq.clipping_scaling(updates,l_out) # other parameters updates params = lasagne.layers.get_all_params(l_out, trainable=True, quantized=False) updates = OrderedDict(updates.items() + optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR).items()) ## update 2nd moment, can get from the adam optimizer also ternary_weights = laq.get_quantized_weights(loss, l_out) updates2 = OrderedDict() idx = 0 tt_tag = lasagne.layers.get_all_params(l_out, tt=True) for tt_tag_temp in tt_tag: updates2[tt_tag_temp]= ternary_weights[idx] idx = idx+1 updates = OrderedDict(updates.items() + updates2.items()) ## update 2nd momentum updates3 = OrderedDict() acc_tag = lasagne.layers.get_all_params(l_out, acc=True) idx = 0 beta2 = 0.999 for acc_tag_temp in acc_tag: updates3[acc_tag_temp]= acc_tag_temp*beta2 + W_grads[idx]*W_grads[idx]*(1-beta2) idx = idx+1 updates = OrderedDict(updates.items() + updates3.items()) else: params = lasagne.layers.get_all_params(l_out, trainable=True) updates = optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR) test_output = lasagne.layers.get_output(l_out, deterministic=True) test_loss = categorical_crossentropy(test_output, target).mean() test_err = T.mean(T.neq(T.argmax(test_output, axis=1), target),dtype=theano.config.floatX) train_fn = theano.function([input, target, LR], loss, updates=updates) val_fn = theano.function([input, target], [test_loss, test_err]) print('Training...') X_train = train_set.X y_train = train_set.y X_val = valid_set.X y_val = valid_set.y X_test = test_set.X y_test = test_set.y # This function trains the model a full epoch (on the whole dataset) def train_epoch(X,y,LR): loss = 0 batches = len(X)/batch_size shuffled_range = range(len(X)) np.random.shuffle(shuffled_range) for i in range(batches): tmp_ind = shuffled_range[i*batch_size:(i+1)*batch_size] newloss = train_fn(X[tmp_ind],y[tmp_ind],LR) loss +=newloss loss/=batches return loss # This function tests the model a full epoch (on the whole dataset) def val_epoch(X,y): err = 0 loss = 0 batches = len(X)/batch_size for i in range(batches): new_loss, new_err = val_fn(X[i*batch_size:(i+1)*batch_size], y[i*batch_size:(i+1)*batch_size]) err += new_err loss += new_loss err = err / batches * 100 loss /= batches return err, loss best_val_err = 100 best_epoch = 1 LR = LR_start # We iterate over epochs: for epoch in range(1, num_epochs+1): start_time = time.time() train_loss = train_epoch(X_train,y_train,LR) val_err, val_loss = val_epoch(X_val,y_val) # test if validation error went down if val_err <= best_val_err: best_val_err = val_err best_epoch = epoch test_err, test_loss = val_epoch(X_test,y_test) epoch_duration = time.time() - start_time # Then we print the results for this epoch: print("Epoch "+str(epoch)+" of "+str(num_epochs)+" took "+str(epoch_duration)+"s") print(" LR: "+str(LR)) print(" training loss: "+str(train_loss)) print(" validation loss: "+str(val_loss)) print(" validation error rate: "+str(val_err)+"%") print(" best epoch: "+str(best_epoch)) print(" best validation error rate: "+str(best_val_err)+"%") print(" test loss: "+str(test_loss)) print(" test error rate: "+str(test_err)+"%") with open("{0}/{1}_lr{2}_{3}.txt".format(method, name, LR_start, method), "a") as myfile: myfile.write("{0} {1:.5f} {2:.5f} {3:.5f} {4:.5f} {5:.5f} {6:.5f} {7:.5f}\n".format(epoch, train_loss, val_loss, test_loss, val_err, test_err, epoch_duration, LR)) if epoch % 15 ==0: LR*=LR_decay
def load_cifar10(cifar_path, confidence_ascending=None): from pylearn2.datasets.zca_dataset import ZCA_Dataset from pylearn2.utils import serial import theano import theano.tensor as T def flatten(img_set): result = [] for img in img_set: result.append(img.ravel()) return np.array(result) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, 'int32') whitened_path = os.path.join(cifar_path, 'pylearn2_gcn_whitened') preprocessed_train_dataset = serial.load( os.path.join(whitened_path, 'train.pkl')) preprocessed_test_dataset = serial.load( os.path.join(whitened_path, 'test.pkl')) preprocesssor = serial.load(os.path.join(whitened_path, 'preprocessor.pkl')) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) if confidence_ascending is not None: X_new = np.empty_like(train_set.X) y_new = np.empty_like(train_set.y) for i in range(len(X_new)): label = int(train_set.y[i]) index_new = confidence_ascending[label].pop(0) X_new[i] = train_set.X[index_new] y_new[i] = train_set.y[index_new] train_set.X = X_new train_set.y = y_new train_set.X = flatten(train_set.X) valid_set.X = flatten(valid_set.X) test_set.X = flatten(test_set.X) train_set_tuple = \ theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), \ shared_y_cast(train_set.y.ravel()) valid_set_tuple = \ theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), \ shared_y_cast(valid_set.y.ravel()) test_set_tuple = \ theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), \ shared_y_cast(test_set.y.ravel()) return [train_set_tuple, valid_set_tuple, test_set_tuple]
def load_cifar10(): from pylearn2.utils import serial from pylearn2.datasets.zca_dataset import ZCA_Dataset # from pylearn2.datasets.cifar10 import CIFAR10 import theano def rotate_and_convert_grayscale(img): reshaped = img.reshape(32, 32, 3, order='F') rotated = np.rot90(reshaped, k=3) grayscaled = np.dot(rotated[:, :, :3], [0.299, 0.587, 0.144]) return grayscaled def transform(img_set): result = [] # Convert all images to grayscale and flatten the shape for img in img_set: # result.append(rotate_and_convert_grayscale(img).ravel()) result.append(img.ravel()) return np.array(result) # train_set = CIFAR10(which_set='train', start=0, stop=45000) # valid_set = CIFAR10(which_set='train', start=45000, stop=50000) # test_set = CIFAR10(which_set='test') data_path = os.getenv('PYLEARN2_DATA_PATH') whitened_path = os.path.join(data_path, 'cifar10_cpu', 'pylearn2_gcn_whitened') preprocessed_train_dataset = serial.load( os.path.join(whitened_path, 'train.pkl')) preprocessed_test_dataset = serial.load( os.path.join(whitened_path, 'test.pkl')) preprocesssor = serial.load(os.path.join(whitened_path, 'preprocessor.pkl')) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) # Convert the images to grayscale and flatten them train_set.X = transform(train_set.X) valid_set.X = transform(valid_set.X) test_set.X = transform(test_set.X) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, 'int32') train_set_tuple = (theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(train_set.y.ravel())) valid_set_tuple = (theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(valid_set.y.ravel())) test_set_tuple = (theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(test_set.y.ravel())) return [train_set_tuple, valid_set_tuple, test_set_tuple]
def main(method,LR_start,Binarize_weight_only): name = "cifar" print("dataset = "+str(name)) print("Binarize_weight_only="+str(Binarize_weight_only)) print("Method = "+str(method)) # alpha is the exponential moving average factor alpha = .1 print("alpha = "+str(alpha)) epsilon = 1e-4 print("epsilon = "+str(epsilon)) # Training parameters batch_size = 50 print("batch_size = "+str(batch_size)) num_epochs = 200 print("num_epochs = "+str(num_epochs)) print("LR_start = "+str(LR_start)) LR_decay = 0.5 print("LR_decay="+str(LR_decay)) if Binarize_weight_only =="w": activation = lasagne.nonlinearities.rectify else: activation = lab.binary_tanh_unit print("activation = "+ str(activation)) train_set_size = 45000 print("train_set_size = "+str(train_set_size)) print('Loading CIFAR-10 dataset...') preprocessor = serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=0, stop = train_set_size) valid_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=45000, stop = 50000) test_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor = preprocessor) # bc01 format train_set.X = train_set.X.reshape(-1,3,32,32) valid_set.X = valid_set.X.reshape(-1,3,32,32) test_set.X = test_set.X.reshape(-1,3,32,32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) valid_set.y = np.float32(np.eye(10)[valid_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) # for hinge loss train_set.y = 2* train_set.y - 1. valid_set.y = 2* valid_set.y - 1. test_set.y = 2* test_set.y - 1. print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) l_in = lasagne.layers.InputLayer( shape=(None, 3, 32, 32), input_var=input) # 128C3-128C3-P2 l_cnn1 = lab.Conv2DLayer( l_in, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn1 = batch_norm.BatchNormLayer( l_cnn1, epsilon=epsilon, alpha=alpha) l_nl1 = lasagne.layers.NonlinearityLayer( l_bn1, nonlinearity = activation) l_cnn2 = lab.Conv2DLayer( l_nl1, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2)) l_bn2 = batch_norm.BatchNormLayer( l_mp1, epsilon=epsilon, alpha=alpha) l_nl2 = lasagne.layers.NonlinearityLayer( l_bn2, nonlinearity = activation) # 256C3-256C3-P2 l_cnn3 = lab.Conv2DLayer( l_nl2, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn3 = batch_norm.BatchNormLayer( l_cnn3, epsilon=epsilon, alpha=alpha) l_nl3 = lasagne.layers.NonlinearityLayer( l_bn3, nonlinearity = activation) l_cnn4 = lab.Conv2DLayer( l_nl3, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2)) l_bn4 = batch_norm.BatchNormLayer( l_mp2, epsilon=epsilon, alpha=alpha) l_nl4 = lasagne.layers.NonlinearityLayer( l_bn4, nonlinearity = activation) # 512C3-512C3-P2 l_cnn5 = lab.Conv2DLayer( l_nl4, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn5 = batch_norm.BatchNormLayer( l_cnn5, epsilon=epsilon, alpha=alpha) l_nl5 = lasagne.layers.NonlinearityLayer( l_bn5, nonlinearity = activation) l_cnn6 = lab.Conv2DLayer( l_nl5, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2)) l_bn6 = batch_norm.BatchNormLayer( l_mp3, epsilon=epsilon, alpha=alpha) l_nl6 = lasagne.layers.NonlinearityLayer( l_bn6, nonlinearity = activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP l_dn1 = lab.DenseLayer( l_nl6, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn7 = batch_norm.BatchNormLayer( l_dn1, epsilon=epsilon, alpha=alpha) l_nl7 = lasagne.layers.NonlinearityLayer( l_bn7, nonlinearity = activation) l_dn2 = lab.DenseLayer( l_nl7, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn8 = batch_norm.BatchNormLayer( l_dn2, epsilon=epsilon, alpha=alpha) l_nl8 = lasagne.layers.NonlinearityLayer( l_bn8, nonlinearity = activation) l_dn3 = lab.DenseLayer( l_nl8, nonlinearity=lasagne.nonlinearities.identity, num_units=10, method = method) l_out = batch_norm.BatchNormLayer( l_dn3, epsilon=epsilon, alpha=alpha) train_output = lasagne.layers.get_output(l_out, deterministic=False) # squared hinge loss loss = T.mean(T.sqr(T.maximum(0.,1.-target*train_output))) if method!="FPN": # W updates W = lasagne.layers.get_all_params(l_out, binary=True) W_grads = lab.compute_grads(loss,l_out) updates = optimizer.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = lab.clipping_scaling(updates,l_out) # other parameters updates params = lasagne.layers.get_all_params(l_out, trainable=True, binary=False) updates = OrderedDict(updates.items() + optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR).items()) ## update 2nd moment, can get from the adam optimizer also updates3 = OrderedDict() acc_tag = lasagne.layers.get_all_params(l_out, acc=True) idx = 0 beta2 = 0.999 for acc_tag_temp in acc_tag: updates3[acc_tag_temp]= acc_tag_temp*beta2 + W_grads[idx]*W_grads[idx]*(1-beta2) idx = idx+1 updates = OrderedDict(updates.items() + updates3.items()) else: params = lasagne.layers.get_all_params(l_out, trainable=True) updates = optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR) test_output = lasagne.layers.get_output(l_out, deterministic=True) test_loss = T.mean(T.sqr(T.maximum(0.,1.-target*test_output))) test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)),dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) # and returning the corresponding training loss: train_fn = theano.function([input, target, LR], loss, updates=updates) val_fn = theano.function([input, target], [test_loss, test_err]) print('Training...') lab.train( name, method, train_fn,val_fn, batch_size, LR_start,LR_decay, num_epochs, train_set.X,train_set.y, valid_set.X,valid_set.y, test_set.X,test_set.y)