def test_zca_dataset(): """ Tests the ZCA_Dataset class. """ # Preparation rng = np.random.RandomState([2014, 11, 4]) start = 0 stop = 990 num_examples = 1000 num_feat = 5 num_classes = 2 # random_dense_design_matrix has values that are centered and of # unit stdev, which is not useful to test the ZCA. # So, we replace its value by an uncentered uniform one. raw = random_dense_design_matrix(rng, num_examples, num_feat, num_classes) x = rng.uniform(low=-0.5, high=2.0, size=(num_examples, num_feat)) x = x.astype(np.float32) raw.X = x zca = ZCA(filter_bias=0.0) zca.apply(raw, can_fit=True) zca_dataset = ZCA_Dataset(raw, zca, start, stop) # Testing general behaviour mean = zca_dataset.X.mean(axis=0) var = zca_dataset.X.std(axis=0) assert_allclose(mean, np.zeros(num_feat), atol=1e-2) assert_allclose(var, np.ones(num_feat), atol=1e-2) # Testing mapback() y = zca_dataset.mapback(zca_dataset.X) assert_allclose(x[start:stop], y) # Testing mapback_for_viewer() y = zca_dataset.mapback_for_viewer(zca_dataset.X) z = x/np.abs(x).max(axis=0) assert_allclose(z[start:stop], y, rtol=1e-2) # Testing adjust_for_viewer() y = zca_dataset.adjust_for_viewer(x.T).T z = x/np.abs(x).max(axis=0) assert_allclose(z, y) # Testing adjust_to_be_viewed_with() y = zca_dataset.adjust_to_be_viewed_with(x, 2*x, True) z = zca_dataset.adjust_for_viewer(x) assert_allclose(z/2, y) y = zca_dataset.adjust_to_be_viewed_with(x, 2*x, False) z = x/np.abs(x).max() assert_allclose(z/2, y) # Testing has_targets() assert zca_dataset.has_targets()
def load_cifar10(cifar_path, confidence_ascending=None): from pylearn2.datasets.zca_dataset import ZCA_Dataset from pylearn2.utils import serial import theano import theano.tensor as T def flatten(img_set): result = [] for img in img_set: result.append(img.ravel()) return np.array(result) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, "int32") whitened_path = os.path.join(cifar_path, "pylearn2_gcn_whitened") preprocessed_train_dataset = serial.load(os.path.join(whitened_path, "train.pkl")) preprocessed_test_dataset = serial.load(os.path.join(whitened_path, "test.pkl")) preprocesssor = serial.load(os.path.join(whitened_path, "preprocessor.pkl")) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) if confidence_ascending is not None: X_new = np.empty_like(train_set.X) y_new = np.empty_like(train_set.y) for i in range(len(X_new)): label = int(train_set.y[i]) index_new = confidence_ascending[label].pop(0) X_new[i] = train_set.X[index_new] y_new[i] = train_set.y[index_new] train_set.X = X_new train_set.y = y_new train_set.X = flatten(train_set.X) valid_set.X = flatten(valid_set.X) test_set.X = flatten(test_set.X) train_set_tuple = ( theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(train_set.y.ravel()), ) valid_set_tuple = ( theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(valid_set.y.ravel()), ) test_set_tuple = ( theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(test_set.y.ravel()), ) return [train_set_tuple, valid_set_tuple, test_set_tuple]
def _load_batch_cifar10pre(dtype='float64'): """ load a batch in the CIFAR-10 format """ preproc = os.path.join(data_dir_cifar10pre, "preprocessor.pkl") preprocessor = serial.load(preproc) train = os.path.join(data_dir_cifar10pre, "train.pkl") train_set = ZCA_Dataset(preprocessed_dataset=serial.load(train), preprocessor=preprocessor, start=0, stop=50000) test = os.path.join(data_dir_cifar10pre, "test.pkl") test_set = ZCA_Dataset(preprocessed_dataset=serial.load(test), preprocessor=preprocessor) return train_set, test_set
def load_cifar10(): from pylearn2.utils import serial from pylearn2.datasets.zca_dataset import ZCA_Dataset # from pylearn2.datasets.cifar10 import CIFAR10 import theano def rotate_and_convert_grayscale(img): reshaped = img.reshape(32, 32, 3, order="F") rotated = np.rot90(reshaped, k=3) grayscaled = np.dot(rotated[:, :, :3], [0.299, 0.587, 0.144]) return grayscaled def transform(img_set): result = [] # Convert all images to grayscale and flatten the shape for img in img_set: # result.append(rotate_and_convert_grayscale(img).ravel()) result.append(img.ravel()) return np.array(result) # train_set = CIFAR10(which_set='train', start=0, stop=45000) # valid_set = CIFAR10(which_set='train', start=45000, stop=50000) # test_set = CIFAR10(which_set='test') data_path = os.getenv("PYLEARN2_DATA_PATH") whitened_path = os.path.join(data_path, "cifar10_cpu", "pylearn2_gcn_whitened") preprocessed_train_dataset = serial.load(os.path.join(whitened_path, "train.pkl")) preprocessed_test_dataset = serial.load(os.path.join(whitened_path, "test.pkl")) preprocesssor = serial.load(os.path.join(whitened_path, "preprocessor.pkl")) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) # Convert the images to grayscale and flatten them train_set.X = transform(train_set.X) valid_set.X = transform(valid_set.X) test_set.X = transform(test_set.X) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, "int32") train_set_tuple = ( theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(train_set.y.ravel()), ) valid_set_tuple = ( theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(valid_set.y.ravel()), ) test_set_tuple = ( theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(test_set.y.ravel()), ) return [train_set_tuple, valid_set_tuple, test_set_tuple]
def test_zca_dataset(): """ Test that a ZCA dataset can be constructed without crashing. No attempt to verify correctness of behavior. """ rng = np.random.RandomState([2014, 11, 4]) num_examples = 5 dim = 3 num_classes = 2 raw = random_dense_design_matrix(rng, num_examples, dim, num_classes) zca = ZCA() zca.apply(raw, can_fit=True) zca_dataset = ZCA_Dataset(raw, zca, start=1, stop=4)
def main(method,LR_start,Binarize_weight_only): name = "cifar" print("dataset = "+str(name)) print("Binarize_weight_only="+str(Binarize_weight_only)) print("Method = "+str(method)) # alpha is the exponential moving average factor alpha = .1 print("alpha = "+str(alpha)) epsilon = 1e-4 print("epsilon = "+str(epsilon)) # Training parameters batch_size = 50 print("batch_size = "+str(batch_size)) num_epochs = 200 print("num_epochs = "+str(num_epochs)) print("LR_start = "+str(LR_start)) LR_decay = 0.5 print("LR_decay="+str(LR_decay)) if Binarize_weight_only =="w": activation = lasagne.nonlinearities.rectify else: activation = lab.binary_tanh_unit print("activation = "+ str(activation)) train_set_size = 45000 print("train_set_size = "+str(train_set_size)) print('Loading CIFAR-10 dataset...') preprocessor = serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=0, stop = train_set_size) valid_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=45000, stop = 50000) test_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor = preprocessor) # bc01 format train_set.X = train_set.X.reshape(-1,3,32,32) valid_set.X = valid_set.X.reshape(-1,3,32,32) test_set.X = test_set.X.reshape(-1,3,32,32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) valid_set.y = np.float32(np.eye(10)[valid_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) # for hinge loss train_set.y = 2* train_set.y - 1. valid_set.y = 2* valid_set.y - 1. test_set.y = 2* test_set.y - 1. print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.matrix('targets') LR = T.scalar('LR', dtype=theano.config.floatX) l_in = lasagne.layers.InputLayer( shape=(None, 3, 32, 32), input_var=input) # 128C3-128C3-P2 l_cnn1 = lab.Conv2DLayer( l_in, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn1 = batch_norm.BatchNormLayer( l_cnn1, epsilon=epsilon, alpha=alpha) l_nl1 = lasagne.layers.NonlinearityLayer( l_bn1, nonlinearity = activation) l_cnn2 = lab.Conv2DLayer( l_nl1, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2)) l_bn2 = batch_norm.BatchNormLayer( l_mp1, epsilon=epsilon, alpha=alpha) l_nl2 = lasagne.layers.NonlinearityLayer( l_bn2, nonlinearity = activation) # 256C3-256C3-P2 l_cnn3 = lab.Conv2DLayer( l_nl2, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn3 = batch_norm.BatchNormLayer( l_cnn3, epsilon=epsilon, alpha=alpha) l_nl3 = lasagne.layers.NonlinearityLayer( l_bn3, nonlinearity = activation) l_cnn4 = lab.Conv2DLayer( l_nl3, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2)) l_bn4 = batch_norm.BatchNormLayer( l_mp2, epsilon=epsilon, alpha=alpha) l_nl4 = lasagne.layers.NonlinearityLayer( l_bn4, nonlinearity = activation) # 512C3-512C3-P2 l_cnn5 = lab.Conv2DLayer( l_nl4, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn5 = batch_norm.BatchNormLayer( l_cnn5, epsilon=epsilon, alpha=alpha) l_nl5 = lasagne.layers.NonlinearityLayer( l_bn5, nonlinearity = activation) l_cnn6 = lab.Conv2DLayer( l_nl5, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2)) l_bn6 = batch_norm.BatchNormLayer( l_mp3, epsilon=epsilon, alpha=alpha) l_nl6 = lasagne.layers.NonlinearityLayer( l_bn6, nonlinearity = activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP l_dn1 = lab.DenseLayer( l_nl6, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn7 = batch_norm.BatchNormLayer( l_dn1, epsilon=epsilon, alpha=alpha) l_nl7 = lasagne.layers.NonlinearityLayer( l_bn7, nonlinearity = activation) l_dn2 = lab.DenseLayer( l_nl7, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn8 = batch_norm.BatchNormLayer( l_dn2, epsilon=epsilon, alpha=alpha) l_nl8 = lasagne.layers.NonlinearityLayer( l_bn8, nonlinearity = activation) l_dn3 = lab.DenseLayer( l_nl8, nonlinearity=lasagne.nonlinearities.identity, num_units=10, method = method) l_out = batch_norm.BatchNormLayer( l_dn3, epsilon=epsilon, alpha=alpha) train_output = lasagne.layers.get_output(l_out, deterministic=False) # squared hinge loss loss = T.mean(T.sqr(T.maximum(0.,1.-target*train_output))) if method!="FPN": # W updates W = lasagne.layers.get_all_params(l_out, binary=True) W_grads = lab.compute_grads(loss,l_out) updates = optimizer.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = lab.clipping_scaling(updates,l_out) # other parameters updates params = lasagne.layers.get_all_params(l_out, trainable=True, binary=False) updates = OrderedDict(updates.items() + optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR).items()) ## update 2nd moment, can get from the adam optimizer also updates3 = OrderedDict() acc_tag = lasagne.layers.get_all_params(l_out, acc=True) idx = 0 beta2 = 0.999 for acc_tag_temp in acc_tag: updates3[acc_tag_temp]= acc_tag_temp*beta2 + W_grads[idx]*W_grads[idx]*(1-beta2) idx = idx+1 updates = OrderedDict(updates.items() + updates3.items()) else: params = lasagne.layers.get_all_params(l_out, trainable=True) updates = optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR) test_output = lasagne.layers.get_output(l_out, deterministic=True) test_loss = T.mean(T.sqr(T.maximum(0.,1.-target*test_output))) test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)),dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) # and returning the corresponding training loss: train_fn = theano.function([input, target, LR], loss, updates=updates) val_fn = theano.function([input, target], [test_loss, test_err]) print('Training...') lab.train( name, method, train_fn,val_fn, batch_size, LR_start,LR_decay, num_epochs, train_set.X,train_set.y, valid_set.X,valid_set.y, test_set.X,test_set.y)
for item in zip(dataset.y, dataset.X): example = _convert_to_example_proto(np.squeeze(item[0]), item[1].tobytes()) writer.write(example.SerializeToString()) if __name__ == '__main__': print("Generating .tfrecords files ...") preprocessor = serial.load("/datasets/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("/datasets/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=0, stop=45000) valid_set = ZCA_Dataset( preprocessed_dataset=serial.load("/datasets/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset( preprocessed_dataset=serial.load("/datasets/pylearn2_gcn_whitened/test.pkl"), preprocessor=preprocessor) output_dir = '/datasets/cifar_10/pylearn2_tfrecords' create_tfrecords('train', train_set, output_dir) create_tfrecords('val', valid_set, output_dir) create_tfrecords('test', test_set, output_dir)
binary_test = False stochastic_test = False if BinaryConnect == True: binary_training = True if stochastic == True: stochastic_training = True else: binary_test = True print 'Loading the dataset' preprocessor = serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor=preprocessor) # bc01 format # print train_set.X.shape train_set.X = train_set.X.reshape(45000, 3, 32, 32) valid_set.X = valid_set.X.reshape(5000, 3, 32, 32)
def load_cifar10(cifar_path, confidence_ascending=None): from pylearn2.datasets.zca_dataset import ZCA_Dataset from pylearn2.utils import serial import theano import theano.tensor as T def flatten(img_set): result = [] for img in img_set: result.append(img.ravel()) return np.array(result) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, 'int32') whitened_path = os.path.join(cifar_path, 'pylearn2_gcn_whitened') preprocessed_train_dataset = serial.load( os.path.join(whitened_path, 'train.pkl')) preprocessed_test_dataset = serial.load( os.path.join(whitened_path, 'test.pkl')) preprocesssor = serial.load(os.path.join(whitened_path, 'preprocessor.pkl')) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) if confidence_ascending is not None: X_new = np.empty_like(train_set.X) y_new = np.empty_like(train_set.y) for i in range(len(X_new)): label = int(train_set.y[i]) index_new = confidence_ascending[label].pop(0) X_new[i] = train_set.X[index_new] y_new[i] = train_set.y[index_new] train_set.X = X_new train_set.y = y_new train_set.X = flatten(train_set.X) valid_set.X = flatten(valid_set.X) test_set.X = flatten(test_set.X) train_set_tuple = \ theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), \ shared_y_cast(train_set.y.ravel()) valid_set_tuple = \ theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), \ shared_y_cast(valid_set.y.ravel()) test_set_tuple = \ theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), \ shared_y_cast(test_set.y.ravel()) return [train_set_tuple, valid_set_tuple, test_set_tuple]
binary_training=False stochastic_training=False binary_test=False stochastic_test=False if BinaryConnect == True: binary_training=True if stochastic == True: stochastic_training=True else: binary_test=True print 'Loading the dataset' preprocessor = serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=0, stop = 45000) valid_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=45000, stop = 50000) test_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor = preprocessor) # bc01 format # print train_set.X.shape train_set.X = train_set.X.reshape(45000,3,32,32) valid_set.X = valid_set.X.reshape(5000,3,32,32) test_set.X = test_set.X.reshape(10000,3,32,32)
# print("Loading the test data") # test_set = CIFAR10(which_set='test') # # print("Preprocessing the test data") # test_set.apply_preprocessor(preprocessor=preprocessor, can_fit=False) # # print("Saving the test data") # test_set.use_design_loc(output_dir+'/test.npy') # serial.save(output_dir+'/test.pkl', test_set) train_set = serial.load(os.path.join(output_dir, 'train.pkl')) test_set = serial.load(os.path.join(output_dir, 'test.pkl')) preprocessor = serial.load(os.path.join(output_dir, 'preprocessor.pkl')) train_set = ZCA_Dataset(train_set, preprocessor, 0, 50000) test_set = ZCA_Dataset(test_set, preprocessor) train_set.X = train_set.X.reshape(-1, 3, 32, 32) test_set.X = test_set.X.reshape(-1, 3, 32, 32) # flatten targets train_set.y = np.hstack(train_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) np.savez_compressed(os.path.join(output_dir, 'x_train'), train_set.X) np.savez_compressed(os.path.join(output_dir, 'y_train'), train_set.y)
def load_dataset(which_set, dataset_types): # we need to have at least 2 types otherwise this func is useless assert len(dataset_types) > 1 print "loading.. ", which_set if which_set == 'test': start_set = 0 stop_set = 10000 elif which_set == 'valid': which_set = 'train' start_set = 40000 stop_set = 50000 else: #train start_set = 0 stop_set = 40000 n_classes = 10 data = [] for prepro in dataset_types: if prepro == 'gcn': print "LOADING GCN..." input_data = CIFAR10(which_set=which_set, start=start_set, stop=stop_set, gcn=55., axes=['b', 0, 1, 'c']) # gcn_data = input_data.get_topological_view() data.append(input_data.get_topological_view()) if prepro == 'toronto': print "LOADING TOR..." input_data = CIFAR10(which_set=which_set, start=start_set, stop=stop_set, axes=['b', 0, 1, 'c'], toronto_prepro=1) # tor_data = input_data.get_topological_view() data.append(input_data.get_topological_view()) if prepro == 'zca': print "LOADING ZCA..." data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}/cifar10') input_data = ZCA_Dataset( preprocessed_dataset=serial.load(data_dir + "/pylearn2_gcn_whitened/" + which_set + ".pkl"), preprocessor=serial.load( data_dir + "/pylearn2_gcn_whitened/preprocessor.pkl"), start=start_set, stop=stop_set, axes=['b', 0, 1, 'c']) # zca_data = input_data.get_topological_view() data.append(input_data.get_topological_view()) target_data = OneHotFormatter(n_classes).format(input_data.y, mode="concatenate") data.append(target_data) data_source = [] for i in range(len(dataset_types)): data_source.append('features' + str(i)) data_source.append('targets') ################################## DEFINE SPACES ################################## spaces = [] # add input spaces as b01c for i in range(0, len(dataset_types)): spaces.append( Conv2DSpace(shape=(32, 32), num_channels=3, axes=('b', 0, 1, 'c'))) # add output space spaces.append(VectorSpace(n_classes)) set = VectorSpacesDataset(tuple(data), (CompositeSpace(spaces), tuple(data_source))) return set
def load_cifar10(): from pylearn2.utils import serial from pylearn2.datasets.zca_dataset import ZCA_Dataset # from pylearn2.datasets.cifar10 import CIFAR10 import theano def rotate_and_convert_grayscale(img): reshaped = img.reshape(32, 32, 3, order='F') rotated = np.rot90(reshaped, k=3) grayscaled = np.dot(rotated[:, :, :3], [0.299, 0.587, 0.144]) return grayscaled def transform(img_set): result = [] # Convert all images to grayscale and flatten the shape for img in img_set: # result.append(rotate_and_convert_grayscale(img).ravel()) result.append(img.ravel()) return np.array(result) # train_set = CIFAR10(which_set='train', start=0, stop=45000) # valid_set = CIFAR10(which_set='train', start=45000, stop=50000) # test_set = CIFAR10(which_set='test') data_path = os.getenv('PYLEARN2_DATA_PATH') whitened_path = os.path.join(data_path, 'cifar10_cpu', 'pylearn2_gcn_whitened') preprocessed_train_dataset = serial.load( os.path.join(whitened_path, 'train.pkl')) preprocessed_test_dataset = serial.load( os.path.join(whitened_path, 'test.pkl')) preprocesssor = serial.load(os.path.join(whitened_path, 'preprocessor.pkl')) train_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_train_dataset, preprocesssor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_test_dataset, preprocesssor) # Convert the images to grayscale and flatten them train_set.X = transform(train_set.X) valid_set.X = transform(valid_set.X) test_set.X = transform(test_set.X) def shared_y_cast(y): shared_y = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=True) return T.cast(shared_y, 'int32') train_set_tuple = (theano.shared(np.array(train_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(train_set.y.ravel())) valid_set_tuple = (theano.shared(np.array(valid_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(valid_set.y.ravel())) test_set_tuple = (theano.shared(np.array(test_set.X, dtype=theano.config.floatX), borrow=True), shared_y_cast(test_set.y.ravel())) return [train_set_tuple, valid_set_tuple, test_set_tuple]
stop=60000) #, center = True) test_set = MNIST(which_set='test') #, center = True) # for both datasets, onehot the target train_set.y = np.float32(onehot(train_set.y)) valid_set.y = np.float32(onehot(valid_set.y)) test_set.y = np.float32(onehot(test_set.y)) elif dataset == "CIFAR10": preprocessor = serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl" ) train_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor=preprocessor) # for both datasets, onehot the target train_set.y = np.float32(onehot(train_set.y)) valid_set.y = np.float32(onehot(valid_set.y)) test_set.y = np.float32(onehot(test_set.y))
if __name__ == '__main__': # get dataset CIFAR10 print "Loading gcn dataset.." cifar10_gcn = CIFAR10(which_set='test', gcn=55., axes=['c', 0, 1, 'b']) print "Loading torontoprepro dataset.." cifar10_toronto = CIFAR10(which_set='test', toronto_prepro=True, axes=['c', 0, 1, 'b']) print "Loading zca dataset.." data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}/cifar10') cifar10_zca = ZCA_Dataset( preprocessed_dataset=serial.load(data_dir + "/pylearn2_gcn_whitened/test.pkl"), preprocessor=serial.load(data_dir + "/pylearn2_gcn_whitened/preprocessor.pkl"), axes=['c', 0, 1, 'b']) columns = { 'gcn': (cifar10_gcn, 'pkl/best/singlecolumn_complex_GCN_paper_best.pkl', np.zeros((10000, 10))), 'toronto': (cifar10_toronto, 'pkl/best/singlecolumn_complex_TORONTO_paper_best.pkl', np.zeros((10000, 10))), 'zca': (cifar10_zca, 'pkl/best/singlecolumn_complex_ZCA_paper_best.pkl', np.zeros((10000, 10))) }
batch_size = 50 print("batch_size = "+str(batch_size)) # Decaying LR LR_start = 0.003 print("LR_start = "+str(LR_start)) LR_fin = 0.000002 print("LR_fin = "+str(LR_fin)) LR_decay = (LR_fin/LR_start)**(1./num_epochs) print("LR_decay = "+str(LR_decay)) print("Loading CIFAR-10 dataset...") path = '${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/' preprocessor = serial.load(path+'preprocessor.pkl') train_set = ZCA_Dataset(preprocessed_dataset=serial.load(path+'train.pkl'), preprocessor=preprocessor, start=0, stop=45000) valid_set = ZCA_Dataset(preprocessed_dataset=serial.load(path+'train.pkl'), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_dataset=serial.load(path+'test.pkl'), preprocessor=preprocessor) # bc01 format train_set.X = train_set.X.reshape(-1, 3, 32, 32) valid_set.X = valid_set.X.reshape(-1, 3, 32, 32) test_set.X = test_set.X.reshape(-1, 3, 32, 32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y)
def read_cifar10_data(): ''' require Theano==0.80 version and pylearn2 ''' from pylearn2.datasets.zca_dataset import ZCA_Dataset from pylearn2.utils import serial train_set_size = 45000 preprocessor = serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=0, stop=train_set_size) valid_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor=preprocessor, start=45000, stop=50000) test_set = ZCA_Dataset(preprocessed_dataset=serial.load( "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor=preprocessor) import pdb pdb.set_trace() train_set.X = train_set.X.reshape(-1, 3, 32, 32) valid_set.X = valid_set.X.reshape(-1, 3, 32, 32) test_set.X = test_set.X.reshape(-1, 3, 32, 32) # flatten targets train_set.y = np.hstack(train_set.y) valid_set.y = np.hstack(valid_set.y) test_set.y = np.hstack(test_set.y) # Onehot the targets train_set.y = np.float32(np.eye(10)[train_set.y]) valid_set.y = np.float32(np.eye(10)[valid_set.y]) test_set.y = np.float32(np.eye(10)[test_set.y]) # for hinge loss train_set.y = 2 * train_set.y - 1. valid_set.y = 2 * valid_set.y - 1. test_set.y = 2 * test_set.y - 1. train_set.X = train_set.X.astype(np.float32) valid_set.X = valid_set.X.astype(np.float32) test_set.X = test_set.X.astype(np.float32) train_set.y = train_set.y.astype(np.float32) valid_set.y = valid_set.y.astype(np.float32) test_set.y = test_set.y.astype(np.float32) x_train = train_set.X y_train = train_set.y x_validate = valid_set.X y_validate = valid_set.y x_test = test_set.X y_test = test_set.y # Reorder the indices of the array. x_train = x_train.transpose([0, 2, 3, 1]) x_validate = x_validate.transpose([0, 2, 3, 1]) x_test = x_test.transpose([0, 2, 3, 1])
def main(method,LR_start): name = "cifar100" print("dataset = "+str(name)) print("Method = "+str(method)) # alpha is the exponential moving average factor alpha = .1 print("alpha = "+str(alpha)) epsilon = 1e-4 print("epsilon = "+str(epsilon)) # Training parameters batch_size = 100 print("batch_size = "+str(batch_size)) num_epochs = 200 print("num_epochs = "+str(num_epochs)) print("LR_start = "+str(LR_start)) LR_decay = 0.5 print("LR_decay="+str(LR_decay)) activation = lasagne.nonlinearities.rectify train_set_size = 45000 print("train_set_size = "+str(train_set_size)) print('Loading CIFAR-100 dataset...') preprocessor = serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=0, stop = train_set_size) valid_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=45000, stop = 50000) test_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar100/pylearn2_gcn_whitened/test.pkl"), preprocessor = preprocessor) # bc01 format train_set.X = train_set.X.reshape(-1,3,32,32) valid_set.X = valid_set.X.reshape(-1,3,32,32) test_set.X = test_set.X.reshape(-1,3,32,32) # flatten targets train_set.y = np.int32(np.hstack(train_set.y)) valid_set.y = np.int32(np.hstack(valid_set.y)) test_set.y = np.int32(np.hstack(test_set.y)) print('Building the CNN...') # Prepare Theano variables for inputs and targets input = T.tensor4('inputs') target = T.ivector('targets') LR = T.scalar('LR', dtype=theano.config.floatX) l_in = lasagne.layers.InputLayer( shape=(None, 3, 32, 32), input_var=input) # 128C3-128C3-P2 l_cnn1 = laq.Conv2DLayer( l_in, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn1 = batch_norm.BatchNormLayer( l_cnn1, epsilon=epsilon, alpha=alpha) l_nl1 = lasagne.layers.NonlinearityLayer( l_bn1, nonlinearity = activation) l_cnn2 = laq.Conv2DLayer( l_nl1, num_filters=128, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2)) l_bn2 = batch_norm.BatchNormLayer( l_mp1, epsilon=epsilon, alpha=alpha) l_nl2 = lasagne.layers.NonlinearityLayer( l_bn2, nonlinearity = activation) # 256C3-256C3-P2 l_cnn3 = laq.Conv2DLayer( l_nl2, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn3 = batch_norm.BatchNormLayer( l_cnn3, epsilon=epsilon, alpha=alpha) l_nl3 = lasagne.layers.NonlinearityLayer( l_bn3, nonlinearity = activation) l_cnn4 = laq.Conv2DLayer( l_nl3, num_filters=256, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2)) l_bn4 = batch_norm.BatchNormLayer( l_mp2, epsilon=epsilon, alpha=alpha) l_nl4 = lasagne.layers.NonlinearityLayer( l_bn4, nonlinearity = activation) # 512C3-512C3-P2 l_cnn5 = laq.Conv2DLayer( l_nl4, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_bn5 = batch_norm.BatchNormLayer( l_cnn5, epsilon=epsilon, alpha=alpha) l_nl5 = lasagne.layers.NonlinearityLayer( l_bn5, nonlinearity = activation) l_cnn6 = laq.Conv2DLayer( l_nl5, num_filters=512, filter_size=(3, 3), pad=1, nonlinearity=lasagne.nonlinearities.identity, method = method) l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2)) l_bn6 = batch_norm.BatchNormLayer( l_mp3, epsilon=epsilon, alpha=alpha) l_nl6 = lasagne.layers.NonlinearityLayer( l_bn6, nonlinearity = activation) # print(cnn.output_shape) # 1024FP-1024FP-10FP l_dn1 = laq.DenseLayer( l_nl6, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn7 = batch_norm.BatchNormLayer( l_dn1, epsilon=epsilon, alpha=alpha) l_nl7 = lasagne.layers.NonlinearityLayer( l_bn7, nonlinearity = activation) l_dn2 = laq.DenseLayer( l_nl7, nonlinearity=lasagne.nonlinearities.identity, num_units=1024, method = method) l_bn8 = batch_norm.BatchNormLayer( l_dn2, epsilon=epsilon, alpha=alpha) l_nl8 = lasagne.layers.NonlinearityLayer( l_bn8, nonlinearity = activation) l_dn3 = laq.DenseLayer( l_nl8, nonlinearity=lasagne.nonlinearities.identity, num_units=100, method = method) l_out = lasagne.layers.NonlinearityLayer(l_dn3, nonlinearity=lasagne.nonlinearities.softmax) train_output = lasagne.layers.get_output(l_out, deterministic=False) loss = categorical_crossentropy(train_output, target).mean() if method!="FPN": # W updates W = lasagne.layers.get_all_params(l_out, quantized=True) W_grads = laq.compute_grads(loss,l_out) updates = optimizer.adam(loss_or_grads=W_grads, params=W, learning_rate=LR) updates = laq.clipping_scaling(updates,l_out) # other parameters updates params = lasagne.layers.get_all_params(l_out, trainable=True, quantized=False) updates = OrderedDict(updates.items() + optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR).items()) ## update 2nd moment, can get from the adam optimizer also ternary_weights = laq.get_quantized_weights(loss, l_out) updates2 = OrderedDict() idx = 0 tt_tag = lasagne.layers.get_all_params(l_out, tt=True) for tt_tag_temp in tt_tag: updates2[tt_tag_temp]= ternary_weights[idx] idx = idx+1 updates = OrderedDict(updates.items() + updates2.items()) ## update 2nd momentum updates3 = OrderedDict() acc_tag = lasagne.layers.get_all_params(l_out, acc=True) idx = 0 beta2 = 0.999 for acc_tag_temp in acc_tag: updates3[acc_tag_temp]= acc_tag_temp*beta2 + W_grads[idx]*W_grads[idx]*(1-beta2) idx = idx+1 updates = OrderedDict(updates.items() + updates3.items()) else: params = lasagne.layers.get_all_params(l_out, trainable=True) updates = optimizer.adam(loss_or_grads=loss, params=params, learning_rate=LR) test_output = lasagne.layers.get_output(l_out, deterministic=True) test_loss = categorical_crossentropy(test_output, target).mean() test_err = T.mean(T.neq(T.argmax(test_output, axis=1), target),dtype=theano.config.floatX) train_fn = theano.function([input, target, LR], loss, updates=updates) val_fn = theano.function([input, target], [test_loss, test_err]) print('Training...') X_train = train_set.X y_train = train_set.y X_val = valid_set.X y_val = valid_set.y X_test = test_set.X y_test = test_set.y # This function trains the model a full epoch (on the whole dataset) def train_epoch(X,y,LR): loss = 0 batches = len(X)/batch_size shuffled_range = range(len(X)) np.random.shuffle(shuffled_range) for i in range(batches): tmp_ind = shuffled_range[i*batch_size:(i+1)*batch_size] newloss = train_fn(X[tmp_ind],y[tmp_ind],LR) loss +=newloss loss/=batches return loss # This function tests the model a full epoch (on the whole dataset) def val_epoch(X,y): err = 0 loss = 0 batches = len(X)/batch_size for i in range(batches): new_loss, new_err = val_fn(X[i*batch_size:(i+1)*batch_size], y[i*batch_size:(i+1)*batch_size]) err += new_err loss += new_loss err = err / batches * 100 loss /= batches return err, loss best_val_err = 100 best_epoch = 1 LR = LR_start # We iterate over epochs: for epoch in range(1, num_epochs+1): start_time = time.time() train_loss = train_epoch(X_train,y_train,LR) val_err, val_loss = val_epoch(X_val,y_val) # test if validation error went down if val_err <= best_val_err: best_val_err = val_err best_epoch = epoch test_err, test_loss = val_epoch(X_test,y_test) epoch_duration = time.time() - start_time # Then we print the results for this epoch: print("Epoch "+str(epoch)+" of "+str(num_epochs)+" took "+str(epoch_duration)+"s") print(" LR: "+str(LR)) print(" training loss: "+str(train_loss)) print(" validation loss: "+str(val_loss)) print(" validation error rate: "+str(val_err)+"%") print(" best epoch: "+str(best_epoch)) print(" best validation error rate: "+str(best_val_err)+"%") print(" test loss: "+str(test_loss)) print(" test error rate: "+str(test_err)+"%") with open("{0}/{1}_lr{2}_{3}.txt".format(method, name, LR_start, method), "a") as myfile: myfile.write("{0} {1:.5f} {2:.5f} {3:.5f} {4:.5f} {5:.5f} {6:.5f} {7:.5f}\n".format(epoch, train_loss, val_loss, test_loss, val_err, test_err, epoch_duration, LR)) if epoch % 15 ==0: LR*=LR_decay
binary_training=False stochastic_training=False binary_test=False stochastic_test=False if BinaryConnect == True: binary_training=True if stochastic == True: stochastic_training=True else: binary_test=True print 'Loading the dataset' preprocessor = serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/preprocessor.pkl") train_set = ZCA_Dataset( preprocessed_dataset=serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=0, stop = 40000) valid_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/train.pkl"), preprocessor = preprocessor, start=40000, stop = 50000) test_set = ZCA_Dataset( preprocessed_dataset= serial.load("${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"), preprocessor = preprocessor) # bc01 format train_set.X = train_set.X.reshape(40000,3,32,32) valid_set.X = valid_set.X.reshape(10000,3,32,32) test_set.X = test_set.X.reshape(10000,3,32,32) # if using cross entrophy, comment out this block.