def __init__(self, path, noise_dim=100, input_dim=(28, 28, 1), optimizer='adam_beta', batch_size=128, visualize=True): initialize_logger() configure_tf() self.path = path self.name = 'gan' self.input_dim = input_dim self.z_dim = noise_dim self.batch_size = batch_size self.train_x, self.train_y = load_mnist() logging.info('Dataset is loaded') self.optimizer = optimizer self.discriminator_lr = 1e-5 self.generator_lr = self.discriminator_lr / 2 self.weight_initialization = RandomNormal(mean=0., stddev=0.02, seed=0) self.epochs = 6000 self.sample_every_n_steps = 200 self.discriminator_losses, self.generator_losses = [], [] self._build_discriminator_network() logging.info('Discriminator model is built') self._build_generator_network() logging.info('Generator model is built') self._build_adversarial_network() logging.info('GAN is built') if visualize: print(self.model.summary()) print(self.generator.summary()) print(self.discriminator.summary())
def t_sne(size_datapoints): model.to('cpu') model.eval() with torch.no_grad(): dataloader = load.load_mnist(size_datapoints) data = iter(dataloader) images, labels = data.next() mean, logvar = model.encoder(images) z = model.reparameterize( mean, logvar) # latent space representation of dataset tsne = TSNE(n_components=2, random_state=0) z_2d = tsne.fit_transform( z) # Apply t-sne to convert to 2D (n_components) dimensions target_ids = range(0, 10) y = labels.detach().numpy( ) # need to detach labels from computation graph to use .numpy() plt.figure(figsize=(6, 5)) colors = 'r', 'g', 'b', 'c', 'm', 'gold', 'k', 'gray', 'orange', 'chocolate' for i, c in zip(target_ids, colors): # zip creates iterator ind = np.where(y == i) # returns indices where y==i plt.scatter( z_2d[ind, 0], z_2d[ind, 1], c=c) # plt.scatter(x-coordinate , y-coordinate , color) plt.savefig(t_sne_save_directory + 't_sne_visualization.png') plt.show() plt.close()
def main(args): # load model symbol if args.network == 'mnist_cnn': model = mnist_cnn() elif args.network == 'mnist_bwn': model = mnist_bwn() elif args.network == 'mnist_xnor': model = mnist_xnor() else: raise Exception('Unknown network: ' + args.network) # load data batch_size = args.batch_size (x_train, y_train), (x_val, y_val) = load_mnist(args.data_path) train_iter = mx.io.NDArrayIter(x_train, y_train, batch_size, shuffle=True) test_iter = mx.io.NDArrayIter(x_val, y_val, batch_size) # log logging.basicConfig(level=logging.DEBUG) # train mod = mx.mod.Module(symbol=model, context=mx.cpu()) mod.fit(train_data=train_iter, eval_data=test_iter, num_epoch=args.num_epoch, optimizer_params={ 'learning_rate': args.learning_rate, 'momentum': args.momentum }, batch_end_callback=mx.callback.Speedometer(batch_size, 200)) # evaluate accuracy metric = mx.metric.Accuracy() test_acc = mod.score(test_iter, metric) print('Testing accuracy: %.2f%%' % (test_acc[0][1] * 100, ))
def main(): model_fn = "./model.pth" device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') x, y = load_mnist(is_train=True, flatten=True) x, y = x.to(device), y.to(device) model = ImageClassifier(28 * 28, 10).to(device) model.load_state_dict(load(model_fn, device)) test(model, x[:20], y[:20], to_be_shown=True)
def run_mnist_SGD(num_training=50000, gpu_id=None): X, Y, X_test, Y_test = load_mnist(num_training) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size,) + X.shape[1::] data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))} initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) exe, exe_params, _ = SGD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, lr=5E-6, prior_precision=1.0, minibatch_size=100)
def precomputed_kernel(): data = load_mnist() def RBF_kernel(x1, x2, gamma): return np.exp(-1.0*gamma*np.sum((x1-x2)**2, axis=0)) def linear_kernel(x1, x2): return np.dot(x1, x2) def new_kernel(x1, x2, gamma): return linear_kernel(x1, x2) + RBF_kernel(np.array(x1), np.array(x2), gamma) # sparse if os.path.exists('data/gram_matrix_train.npy'): gram_matrix = np.load('data/gram_matrix_train.npy') else: gram_matrix = [] for i, x1 in enumerate(data['X_train']): print(i) tmp = {} tmp[0] = i+1 for j, x2 in enumerate(data['X_train']): tmp[j+1] = new_kernel(x1, x2, gamma=1.0/28) gram_matrix.append(tmp) np.save('data/gram_matrix_train.npy', gram_matrix) if os.path.exists('data/gram_matrix_test.npy'): test_gram_matrix = np.load('data/gram_matrix_test.npy') else: test_gram_matrix = [] for i, x1 in enumerate(data['X_test']): print(i) tmp = {} tmp[0] = i # any number for j, x2 in enumerate(data['X_train']): tmp[j+1] = new_kernel(x1, x2, gamma=1.0/28) test_gram_matrix.append(tmp) np.save('data/gram_matrix_test.npy', test_gram_matrix) # train prob = svm_problem(data['T_train'], gram_matrix) param = svm_parameter('-s 0 -t 4 -c 1') m = svm_train(prob, param) # get support vector support_vectors = m.get_SV() # get support vector index in data indecis = np.zeros(shape=data['X_train'].shape[0], dtype=bool) for i, dict_ in enumerate(support_vectors): indecis[int(dict_[0])] = True # vis pca_plot_with_svm(data['X_train'], data['T_train'], data['X_train'][indecis], data['T_train'][indecis], file_name='svm_pca_mode_precomputed')
def main(): mode_list = ['RCUT', 'NCUT'] linear_kernel_partial = partial(linear_kernel) RBF_kernel_partial = partial(RBF_kernel, gamma=1.0/28) new_kernel_partial = partial(new_kernel, gamma=1.0/28) kernel_list = [linear_kernel_partial, RBF_kernel_partial, new_kernel_partial] data = load_mnist() for mode in mode_list: for kernel in kernel_list: result = spectral_clustering( data['X_train'][:5000:1], kernel_fn=kernel, num_clusters=5, mode=mode) # add one, because label are from 1 to 5, cluster index are from 0 to 4 result = result + 1 file_name = '{}_{}'.format(mode, kernel.func.__name__) pca_plot(data['X_train'][:5000:1], result, with_legend=False, file_name=file_name)
def run_mnist_DistilledSGLD(num_training=50000, gpu_id=None): """Run DistilledSGLD on mnist dataset""" X, Y, X_test, Y_test = load_mnist(num_training) minibatch_size = 100 if num_training >= 10000: num_hidden = 800 total_iter_num = 1000000 teacher_learning_rate = 1E-6 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.1 else: num_hidden = 400 total_iter_num = 20000 teacher_learning_rate = 4E-5 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.001 teacher_net = get_mnist_sym(num_hidden=num_hidden) logsoftmax = LogSoftmax() student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden) data_shape = (minibatch_size, ) + X.shape[1::] teacher_data_inputs = { 'data': nd.zeros(data_shape, ctx=dev(gpu_id)), 'softmax_label': nd.zeros((minibatch_size, ), ctx=dev(gpu_id)) } student_data_inputs = { 'data': nd.zeros(data_shape, ctx=dev(gpu_id)), 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(gpu_id)) } teacher_initializer = BiasXavier(factor_type="in", magnitude=1) student_initializer = BiasXavier(factor_type="in", magnitude=1) student_exe, student_params, _ = \ DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net, teacher_data_inputs=teacher_data_inputs, student_data_inputs=student_data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num, student_initializer=student_initializer, teacher_initializer=teacher_initializer, student_optimizing_algorithm="adam", teacher_learning_rate=teacher_learning_rate, student_learning_rate=student_learning_rate, teacher_prior_precision=teacher_prior, student_prior_precision=student_prior, perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(gpu_id))
def __init__(self, path, noise_dim=100, input_dim=(28, 28, 1), optimizer='adam_beta', batch_size=128, visualize=True, clip_constant=0.01): super().__init__(path, noise_dim=100, input_dim=(28, 28, 1), optimizer='adam_beta', batch_size=128, visualize=False) self.name = 'WGAN' self.train_x, self.train_y = load_mnist(label=5) self.clip_constant = clip_constant # Based on the ratio for training critic_epochs and generator_epoch self.critic_epochs = 5 # Same network as GAN however this one should be # going with the name 'Critic' instead of discriminator self._build_discriminator_network() logging.info('Critic model is built') self._build_generator_network() logging.info('Generator model is built') self._build_adversarial_network() logging.info('WGAN is built')
def run_mnist_DistilledSGLD(num_training=50000, gpu_id=None): """Run DistilledSGLD on mnist dataset""" X, Y, X_test, Y_test = load_mnist(num_training) minibatch_size = 100 if num_training >= 10000: num_hidden = 800 total_iter_num = 1000000 teacher_learning_rate = 1E-6 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.1 else: num_hidden = 400 total_iter_num = 20000 teacher_learning_rate = 4E-5 student_learning_rate = 0.0001 teacher_prior = 1 student_prior = 0.1 perturb_deviation = 0.001 teacher_net = get_mnist_sym(num_hidden=num_hidden) logsoftmax = LogSoftmax() student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden) data_shape = (minibatch_size,) + X.shape[1::] teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))} student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(gpu_id))} teacher_initializer = BiasXavier(factor_type="in", magnitude=1) student_initializer = BiasXavier(factor_type="in", magnitude=1) student_exe, student_params, _ = \ DistilledSGLD(teacher_sym=teacher_net, student_sym=student_net, teacher_data_inputs=teacher_data_inputs, student_data_inputs=student_data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=total_iter_num, student_initializer=student_initializer, teacher_initializer=teacher_initializer, student_optimizing_algorithm="adam", teacher_learning_rate=teacher_learning_rate, student_learning_rate=student_learning_rate, teacher_prior_precision=teacher_prior, student_prior_precision=student_prior, perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(gpu_id))
def test_kernel(mode): """ mode==0 : linear mode==1 : polynomial mode==2 : rbf """ # svm y, x = svm_read_problem('data/training.csv') m = svm_train(y, x, '-s 0 -t {} -c 1'.format(mode)) # get support vector support_vectors = m.get_SV() p_labels, p_accs, p_vals = svm_predict( np.zeros(len(support_vectors)), support_vectors, m) # sparse to dense dense_sv = np.zeros(shape=[len(support_vectors), 28*28]) for i, dict_ in enumerate(support_vectors): for key in dict_.keys(): dense_sv[i, key] = dict_[key] # vis data = load_mnist() pca_plot_with_svm(data['X_train'], data['T_train'], dense_sv, np.array( p_labels), file_name='svm_pca_mode{}'.format(mode))
def run_mnist_SGD(num_training=50000, gpu_id=None): X, Y, X_test, Y_test = load_mnist(num_training) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size, ) + X.shape[1::] data_inputs = { 'data': nd.zeros(data_shape, ctx=dev(gpu_id)), 'softmax_label': nd.zeros((minibatch_size, ), ctx=dev(gpu_id)) } initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) exe, exe_params, _ = SGD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, lr=5E-6, prior_precision=1.0, minibatch_size=100)
os.makedirs(log_dir) # callbacks lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=1e-5) csv_logger = CSVLogger(os.path.join(log_dir, 'history.csv')) checkpointer = ModelCheckpoint(filepath=os.path.join( log_dir, 'weights.{epoch:03d}.hdf5'), verbose=1, save_best_only=False, period=1) # load data X_train, X_test, Y_train, Y_test, y_train, y_test, data_info = load_mnist( model_name) [n_classes, img_rows, img_cols, img_channels] = data_info input_shape = (img_rows, img_cols, img_channels) # get the model model = get_mnist_net(model_name, input_shape) # save the random weights model.save_weights(os.path.join(log_dir, 'weights.%.3d.hdf5' % (0))) # Fit the model on the batches generated by datagen.flow(). model.fit(X_train, Y_train, shuffle=True, batch_size=batch_size, validation_data=(X_test, Y_test), epochs=n_epochs,
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' import numpy as np from latte import * from latte.solvers import sgd_update import random from data_loader import load_mnist from latte.math import compute_softmax_loss, softmax_loss_backprop, compute_accuracy train_data, train_label = load_mnist(dataset="training", path="./data") test_data, test_label = load_mnist(dataset="testing", path="./data") num_train = train_data.shape[0] train_data = np.pad(train_data.reshape(num_train, 1, 28, 28), [(0, 0), (0, 7), (0, 0), (0, 0)], mode='constant') train_label = train_label.reshape(num_train, 1) num_test = test_data.shape[0] test_data = np.pad(test_data.reshape(num_test, 1, 28, 28), [(0, 0), (0, 7), (0, 0), (0, 0)], mode='constant') test_label = test_label.reshape(num_test, 1) batch_size = 50 net = Net(batch_size) data = MemoryDataLayer(net, train_data[0].shape)
def AE(x_train, x_test): try: encoder, decoder, autoencoder = load_ae() except OSError: encoder, decoder, autoencoder = create_ae() train(autoencoder, x_train, x_test) save_ae(encoder, decoder, autoencoder) encoder.summary() decoder.summary() autoencoder.summary() return encoder, decoder, autoencoder x_train, x_test, y_train, y_test, _ = data_loader.load_mnist() # Load data x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) print(x_train.shape) encoder, decoder, autoencoder = AE(x_train, x_test) # Load model if exist z = encoder.predict(x_test) # Prediction - latent space print(z.shape) x_synthetic = decoder.predict(z) # Prediction - synthetic data """ # Visualization n = 10 # how many digits we will display plt.figure(figsize=(20, 4)) for i in range(n): ax = plt.subplot(2, n, i + 1) plt.imshow(x_test[i].reshape(28, 28)) # display original plt.gray()
D = np.diag(np.sum(W, 1)) P = np.dot(inv(D), W) P_uu = P[l:, l:] P_ul = P[l:, :l] f_l = Y u = len(P_uu) I = np.identity(u) f_u = np.dot(np.dot(inv(I - P_uu), P_ul), f_l) predicted_labels = np.zeros((u, ), dtype=np.int64) predicted_labels[f_u > 0.5] = np.int64(1) return predicted_labels if __name__ == "__main__": # np.random.seed(0) xtrain, ytrain = load_mnist("../Data") n = len(ytrain) labeled_ind = np.array(range(n)) neg_ind = labeled_ind[ytrain == 0] pos_ind = labeled_ind[ytrain == 1] n = len(xtrain) for l_k in [3, 10, 50]: five = 5 acc = 0 for _ in range(five): labeled = np.zeros(n) neg = np.random.choice(len(neg_ind), l_k) pos = np.random.choice(len(pos_ind), l_k) labeled_ind_sample = np.concatenate((neg_ind[neg], pos_ind[pos]), axis=0) labeled[labeled_ind_sample] = 1
from __future__ import division from collections import OrderedDict import time import numpy as np from data_loader import load_mnist from tqdm import tqdm from le_net import LeNet, get_lenet_layers from optimizer import SGD, SGDMomentum if __name__ == "__main__": layers = get_lenet_layers() xtrain, ytrain, xtest, ytest = load_mnist("../Data") # Optimization parameters opt_params = OrderedDict() opt_params["mu"] = 0.9 # momentum opt_params["epsilon"] = 0.01 # initial learning rate opt_params["gamma"] = 0.0001 opt_params["power"] = 0.75 opt_params["weight_decay"] = 0.0005 # weight decay on w # display information test_interval = 500 display_interval = 1 snapshot = 5000 max_iter = 10000 # batcher parameters batch_size = 64
print_every = int(config['SAVE_PARAMETERS']['image_print_frequency']) t_sne_points = int(config['MODEL_VARIABLES']['t_sne_points']) # pathlib.Path('./experiments').mkdir(parents=True, exist_ok=True) exp_path = "./experiments/" + exp_name pathlib.Path(exp_path).mkdir(parents=True, exist_ok=True) image_save_directory = exp_path + "/training_images" checkpoint_dir = exp_path + '/training_checkpoints/' # transit_image_directory = exp_path + '/digit_transit/' t_sne_save_directory = exp_path + '/t_sne/' pathlib.Path(image_save_directory).mkdir(parents=True, exist_ok=True) pathlib.Path(checkpoint_dir).mkdir(parents=True, exist_ok=True) # pathlib.Path(transit_image_directory).mkdir(parents=True, exist_ok=True) pathlib.Path(t_sne_save_directory).mkdir(parents=True, exist_ok=True) trainloader = load.load_mnist(batch_size) # load training data # x, _ = next(iter(trainloader)) # plt.imshow(x[0].view(28, 28), cmap='gray') # plt.show(block=True) # def display_transit(): # model.eval() # model.to('cpu') # images1, labels1 = iter(trainloader).next() # y1 = labels1.detach().numpy() # x1 = images1[0] # i = 0 # while y1[i] == y1[0]: # i += 1 # x2 = images1[i]
import torch import numpy as np import torchvision import torch.nn as nn import torch.nn.functional as F from torch import nn, optim import model as VAE from scipy.stats import norm import matplotlib import matplotlib.pyplot as plt from torch.utils.tensorboard import SummaryWriter from torchvision.utils import save_image from tqdm import tqdm import data_loader as load dataloader2 = load.load_mnist(200) writer = SummaryWriter() data2 = iter(dataloader2) def loss_function(out, target, mean, logvar, batch_size): bce = F.binary_cross_entropy(out.view(-1, 784), target.view(-1, 784)) # BCE loss to maximize likelihood of training data kld = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp()) # KL Divergence loss to measure the difference # between a standard gaussian and our gaussian approximation of the latent variables return (kld / (784 * batch_size)), bce, bce + (kld / (784 * batch_size)) # scale KL Divergence loss by (dimensions*number_of_examples) to ensure proper weightage # between reconstruction (BCE) and standard gaussian distribution (KLD)
step_size=step_size_hypernet, num_iters=num_iters_hypernet, callback=callback, mass=0) def valid_objective_current(hyper, seed): """The objective for the hyperparameter, with a fixed hypernetwork. :param hyper: The hyperparameter (float) input to the hypernetwork. :param seed: The seed (integer) for sampling a hyperparameter. :return: The validation loss (float). """ return valid_objective(hypernet(init_hypernet_params, hyper), hyper, seed) hyper_cur, m_hyper, v_hyper, cur_iter_hyper = adam(grad(valid_objective_current), hyper_cur, step_size=step_size_hyper, num_iters=num_iters_hyper, callback=callback_outer, m=m_hyper, v=v_hyper, offset=cur_iter_hyper) print("The current hyperparameter is:", hyper_cur) if __name__ == '__main__': params = opt_params(graph_iters) _, train_images, train_labels, test_images, test_labels = load_mnist() n_data, n_data_val, n_data_test = params['n_data'], params['n_data_val'], params['n_data_test'] train_data = (train_images[:n_data], train_labels[:n_data]) valid_data = (train_images[n_data:n_data + n_data_val], train_labels[n_data:n_data + n_data_val]) test_data = (test_images[:n_data_test], test_labels[:n_data_test]) experiment(train_data, valid_data, test_data, params['init_scale'], params['batch_size'], params['num_iters_hypernet'], params['step_size_hypernet'], params['num_iters_hyper'], params['step_size_hyper'], params['num_iters'], params['graph_mod'], params['global_seed'])
def main(): data = load_mnist() lda_plot(data['X_train'], data['T_train'])
z2 = model_mnist.reparameterize(mean2, logvar2) grid_size1 = 15 z_transit = torch.zeros([grid_size1 * grid_size1, z_dim]) for i, _ in enumerate(z_transit): z_transit[i] = z1 + ((z2 - z1) / (grid_size1 * grid_size1)) * i img = model_mnist.decoder(z_transit) figure1 = torch.from_numpy( train_model.display_grid(grid_size=15, digit_size=28, images=img)).float() save_image(figure1, transit_image_directory + 'digit_transit.png') dataloader = load.load_mnist(50) data = iter(dataloader) parser = argparse.ArgumentParser() parser.add_argument("--dataset", help="the dataset to generate from (Only MNIST currently)", default='MNIST') parser.add_argument( "--model_path", help="the path to the pre-trained model", default='./experiments/mnist/training_checkpoints/checkpoint10.pth') parser.add_argument( "--grid_size", help= "the size of the grid to make (a grid_size*grid_size grid will be made )", default='8')
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf import numpy as np import time import argparse import csv from makeNN import define_graph from data_loader import load_mnist import globalV from utils import file_writer, sma_accuracy, parse_args, get_elapsed_time np.set_printoptions(precision=4, suppress=True) (x_train, y_train),(x_test, y_test)=load_mnist() start=time.time() dir_path='/nfs/ghome/live/yashm/Desktop/research/perturbations/results/learning_curves.csv' tf.compat.v1.set_random_seed(globalV.seed) np.random.seed(globalV.seed) parse_args() for hl in range(globalV.n_hl+1): globalV.w_norm[str(hl)]=[] globalV.b_sum[str(hl)]=[] graph=define_graph() print('built with CUDA: ', tf.test.is_built_with_cuda()) print('is using GPU: ', tf.test.is_gpu_available())
if self.pred_fn is None: X = T.tensor4() y_hat_pred = self.model(X, 0.0, 0.0, 0.0) y_pred = T.argmax(y_hat_pred, axis=1) self.pred_fn = theano.function(inputs=[X], outputs=y_pred) preds = np.asarray([]) num_batches = -(-x.shape[0] // self.batch_size) for bidx in range(num_batches): batch_x = x[bidx * self.batch_size: (bidx + 1) * self.batch_size] batch_y_pred = self.pred_fn(batch_x) preds = np.concatenate((preds, batch_y_pred)) return preds if __name__ == '__main__': dataset = load_mnist('../../data/mnist.pkl.gz') train_x, train_y = dataset[0] valid_x, valid_y = dataset[1] test_x, test_y = dataset[2] # reshape the input data to be compliant with the input shape expected by the CNN n_train_x = train_x.shape[0] n_valid_x = valid_x.shape[0] n_test_x = test_x.shape[0] train_x = train_x.reshape(n_train_x, 1, 28, 28) valid_x = valid_x.reshape(n_valid_x, 1, 28, 28) test_x = test_x.reshape(n_test_x, 1, 28, 28) model = ConvNet( input_shape=train_x.shape[1:], n_classes=10,
# DATASET LOADING if args.dataset == 'dyni': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_dyni( ) Y = labels_train.max() + 1 elif args.dataset == 'usc': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_usc( ) Y = labels_train.shape[1] elif args.dataset == 'esc': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_esc( ) Y = labels_train.max() + 1 elif args.dataset == 'mnist': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_mnist( ) Y = labels_train.max() + 1 elif args.dataset == 'gtzan': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_gtzan( ) Y = labels_train.max() + 1 elif args.dataset == 'irmas': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_irmas( ) Y = labels_train.max() + 1 elif args.dataset == 'bird': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_bird( ) Y = labels_train.max() + 1 elif args.dataset == 'tut': wavs_train, labels_train, wavs_valid, labels_valid, wavs_test, labels_test = data_loader.load_tut(
from collections import OrderedDict from mlp import MLP from data_loader import load_mnist import time import sys # Minimal code version for the random hyper-parameter optimization # Makes a fixed number of trials by picking a random combination of the hyper-parameters of the model # This version was written explicitly for the MLP class described in mpl.py # but it can be easily adapted to any kind of model having a set_params method dest_dir = 'opt_logs' if not os.path.exists(dest_dir): os.makedirs(dest_dir) mnist_path = sys.argv[1] dataset = load_mnist(mnist_path) train_x, train_y = dataset[0] valid_x, valid_y = dataset[1] test_x, test_y = dataset[2] params = { 'learning_rate': [0.1, 0.05, 0.01], 'momentum': [0.0, 0.5, 0.9, 0.95], 'dropout_p_input': [0.0, 0.5], 'dropout_p_hidden': [0.0, 0.5], } if not os.path.exists(dest_dir): os.makedirs(dest_dir) num_trials = 10 n_tried = 0
f.write(str(no_layers) + "\n") for l in layers: f.write(str(l) + " ") f.write("\n") for fun in functions: f.write(str(fun) + " ") if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--pop_size", type=int, default=20, help="Population size") parser.add_argument("--max_layers", type=int, default=1, help="Max number of layers") parser.add_argument( "--max_neuron_bits", type=int, default=7, help="Max number of bits for representing layer number") args = parser.parse_args() data = load_mnist() nn = genetic(args, data) write_to_file(nn, args)
def init(self): # TODO: define MLP weigths as Theano SharedVariables # define the shared parameters self.W, self.b = [], [] for i in range(len(self.layers)): # APPEND A NEW WEIGHT MATRIX OF HIDDEN LAYER # APPEND A NEW BIAS VECTOR OF THE HIDDEN LAYER self.Wy = # INITIALIZE THE HIDDEN-TO-OUTPUT WEIGHT MATRIX (like in LogReg) self.by = # INITIALIZE THE HIDDEN-TO-OUTPUT BIAS VECTOR (like in LogReg) params = self.W + self.b + [self.Wy, self.by] return params def model(self, X, dropout_p_input=0.0, dropout_p_hidden=0.0): # TODO: Define the MLP with the required number of hidden layers # dropout should be applyed at the input and after each hidden layer # use the self.activation_fn as activation def softmax(self, x): # numerically stable version of softmax (it avoids blowups due to high values of x) exp_x = T.exp(x - T.max(x, axis=1, keepdims=True)) return exp_x / T.sum(exp_x, axis=1, keepdims=True) # ACTIVATION FUNCTIONS # def tanh(self, x): return T.tanh(x) def sigmoid(self, x): return T.nnet.sigmoid(x) def relu(self, x): #return T.max(0., x) return x * (x > 0) # DROPOUT # def dropout(self, x, p=0): # p (float): dropout probability on x # TODO: APPLY DROPOUT ON x only if p > 0 return x # LOSS FUNCTION # def categorical_cross_entropy(self, y, y_hat): return T.mean(-T.log(y_hat[T.arange(y.shape[0]), y]), axis=0) def floatX(self, arr): return np.asarray(arr, dtype=theano.config.floatX) def init_weights(self, shape, sigma=0.01, name=''): if sigma == 0: W_bound = np.sqrt(6. / (shape[0] + shape[1])) return theano.shared(self.floatX(self.numpy_rng.uniform(low=-W_bound, high=W_bound, size=shape)), borrow=True, name=name) return theano.shared(self.floatX(self.numpy_rng.randn(*shape) * sigma), borrow=True, name=name) def sgd(self, cost, params, learning_rate=0.1): # compute the gradients of each parameter w.r.t. the loss pgrads = T.grad(cost, wrt=params) # define the sgd updates updates = OrderedDict([(p, p - learning_rate * g) for p, g in zip(params, pgrads)]) return updates def apply_momentum(self, updates, momentum=0.5): # updates (dict or OrderedDict): reports for each parameter its symbolic update expression # TODO: apply momentum on the updates return updates def fit(self, x, y, epochs=10, shuffle_training=True): if self.train_fn is None: print 'Compiling the training functions' # symbolic input and output variables x_sym, y_sym = T.matrix(), T.ivector() # build the model and get the output variable self.params = self.init() y_hat = self.model(x_sym, self.dropout_p_input, self.dropout_p_hidden) cost = self.categorical_cross_entropy(y_sym, y_hat) updates = self.optimization_fn(cost, self.params, self.learning_rate) if self.momentum > 0.: updates = self.apply_momentum(updates, self.momentum) self.train_fn = theano.function(inputs=[x_sym, y_sym], outputs=cost, updates=updates) if shuffle_training: shuffle_idx = self.numpy_rng.permutation(x.shape[0]) x = x[shuffle_idx] y = y[shuffle_idx] num_train_batches = -(-x.shape[0] // self.batch_size) train_cost_history = [] print 'Training started' for e in range(epochs): avg_cost = 0 for bidx in range(num_train_batches): batch_x = x[bidx * self.batch_size: (bidx + 1) * self.batch_size] batch_y = y[bidx * self.batch_size: (bidx + 1) * self.batch_size] batch_cost = self.train_fn(batch_x, batch_y) train_cost_history.append(batch_cost) if np.isnan(batch_cost): print 'NaN cost detected. Abort' return avg_cost += batch_cost avg_cost /= num_train_batches print 'Epoch: {} Loss: {:.8f}'.format(e + 1, avg_cost) return train_cost_history def predict(self, x): if self.pred_fn is None: # build the prediction function x_sym = T.matrix() # disable any dropout in prediction y_hat_pred = self.model(x_sym, 0.0, 0.0) # then compute the predicted output as the class with maximum probability y_pred = T.argmax(y_hat_pred, axis=1) self.pred_fn = theano.function(inputs=[x_sym], outputs=y_pred) preds = np.asarray([]) num_batches = -(-x.shape[0] // self.batch_size) for bidx in range(num_batches): batch_x = x[bidx * self.batch_size: (bidx + 1) * self.batch_size] batch_y_pred = self.pred_fn(batch_x) preds = np.concatenate((preds, batch_y_pred)) return preds def set_params(self, learning_rate=0.1, momentum=0.5, dropout_p_input=0.0, dropout_p_hidden=0.0): self.learning_rate = learning_rate self.momentum = momentum self.dropout_p_input = dropout_p_input self.dropout_p_hidden = dropout_p_hidden self.train_fn, self.pred_fn = None, None if __name__ == '__main__': dataset = load_mnist('../../data/mnist.pkl.gz') train_x, train_y = dataset[0] valid_x, valid_y = dataset[1] test_x, test_y = dataset[2] # Try with different combinations of the parameters model = MLP( n_classes=10, n_inputs=train_x.shape[1], optim='rmsprop', activation='relu', dropout_p_input=0.0, dropout_p_hidden=0.5, layers=[256, 128], learning_rate=0.001, momentum=0.0) t0 = time.time() model.fit(train_x, train_y, epochs=25) print 'Training completed in {:.2f} sec'.format(time.time() - t0) valid_y_pred = model.predict(valid_x) valid_accuracy = np.sum(valid_y_pred == valid_y, dtype=np.float32) / valid_y.shape[0] print 'Validation accuracy: {:.2f}'.format(valid_accuracy * 100) # you should get around 97.5% test_y_pred = model.predict(test_x) test_accuracy = np.sum(test_y_pred == test_y, dtype=np.float32) / test_y.shape[0] print 'Test accuracy: {:.2f}'.format(test_accuracy * 100) # you should get around 97.5%
def test_data_load_correctly(self): self.images, self.labels = load_mnist('../mnist', kind='train') self.assertThat(self.images.shape, Equals((60000, 784))) self.assertThat(self.labels.shape, Equals((60000, 1)))
nn.update_parameters(args.learning_rate) # Evaluate the network if cnt % args.eval_every == 0: test_acc, test_cm = \ eval_nn(nn, data["test_imgs"], data["test_labels"]) train_acc, train_cm = \ eval_nn(nn, data["train_imgs"], data["train_labels"], 5000) print("Train acc: %2.6f ; Test acc: %2.6f" % (train_acc, test_acc)) pylab.imshow(test_cm) pylab.draw() matplotlib.pyplot.pause(0.001) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--learning_rate", type = float, default = 0.001, help="Learning rate") parser.add_argument("--eval_every", type = int, default = 200, help="Learning rate") args = parser.parse_args() mnist = load_mnist() input_size = mnist["train_imgs"][0].size print input_size nn = FeedForward(input_size, [(300, logistic), (10, identity)]) print(nn.to_string()) train_nn(nn, mnist, args)
batch_x = x[bidx * self.batch_size: (bidx + 1) * self.batch_size] batch_y_pred = self.pred_fn(batch_x) preds = np.concatenate((preds, batch_y_pred)) return preds def set_params(self, learning_rate=0.1, momentum=0.5, dropout_p_input=0.0, dropout_p_hidden=0.0): self.learning_rate = learning_rate self.momentum = momentum self.dropout_p_input = dropout_p_input self.dropout_p_hidden = dropout_p_hidden self.train_fn, self.pred_fn = None, None if __name__ == '__main__': dataset = load_mnist('../../data/mnist.pkl.gz') train_x, train_y = dataset[0] valid_x, valid_y = dataset[1] test_x, test_y = dataset[2] # Try with different combinations of the parameters model = MLP( n_classes=10, n_inputs=train_x.shape[1], optim='rmsprop', activation='relu', dropout_p_input=0.0, dropout_p_hidden=0.5, layers=[256, 128], learning_rate=0.001, momentum=0.0)
from data_loader import load_mnist if __name__ == '__main__': X_train, y_train = load_mnist('mnist', kind='train')