def main(): x_train, y_train, x_test, y_test = data.mnist() params = { "epochs": 70, "num_hid_nodes": int(x_train.shape[1] * 0.9), "weight_init": [0.0, 0.1], "activations": 'relu', #relu is much better performance "lr": 0.15, "decay": 1e-6, "momentum": 0.1, } # noise to training x_train_noise = np.copy(x_train).astype(float) level = 0.2 cols = x_train.shape[1] for row in range(x_train.shape[0]): noise = np.random.normal(0, np.sqrt(level), cols) x_train_noise[row, :] = x_train_noise[row, :] + noise # noise to test x_test_noise = np.copy(x_test).astype(float) cols = x_test.shape[1] for row in range(x_test.shape[0]): noise = np.random.normal(0, np.sqrt(level), cols) x_test_noise[row, :] = x_test_noise[row, :] + noise auto_enc1 = Autoencoder(**params) history = auto_enc1.train(x_train_noise, x_train, x_test) plot.plot_loss(history, loss_type='MSE') x_reconstr = auto_enc1.test(x_test_noise, binary=True) plot_traintest(x_test_noise, y_test, x_reconstr)
def main(): # double check print(args) # data dataset = mnist(args.data_dir) # init trainer obj trainer = Trainer(args) # ----- # # Train # # ----- # trainer.train(dataset) # --------- # # Fine-Tune # # --------- # # trainer.restore() # trainer.train(dataset) # -------- # # Gen Img # # -------- # trainer.gen()
def load_data(data_dir, normalize=True): global directory, d, train_loader, test_loader, untransformed_test directory = '../experiments/' + data_dir d = t.load(directory + "/model_details") if 'normalize' in d: normalize = d['normalize'] elif d['dataset'] == 'translate': print('Assuming no normalizaion on translated data') normalize = False if d['dataset'] in data.data_dict: train_loader, test_loader = data.data_dict[d['dataset']]( rotate=d['rotate'], normalize=normalize) if d['dataset'] in ['mnist', 'translate', 'scale']: _, untransformed_test = data.mnist(rotate=False, translate=False, normalize=False) else: try: train_loader, test_loader = data.get_precomputed( '../' + d['dataset'], normalize=normalize) except FileNotFoundError: train_loader, test_loader = data.get_precomputed( '../data/' + d['dataset'], normalize=normalize)
def main(): x_train, y_train, x_test, y_test = data.mnist(one_hot=True) # Define Deep Neural Network structure (input_dim, num_of_nodes) layers = [[x_train.shape[1], 256], [256, 128], [128, 64]] # Initialize a deep neural network dnn = DNN(MODEL_FOLDER, os_slash, layers, params) pre_epochs = 100 train_epochs = 100 # Create auto-encoders and train them one by one by stacking them in the DNN pre_trained_weights = dnn.pre_train(x_train, pre_epochs) # Then use the pre-trained weights of these layers as initial weight values for the MLP history = dnn.train(x_train, y_train, train_epochs, init_weights=pre_trained_weights) plot.plot_loss(history, loss_type='MSE') predicted, score = dnn.test(x_test, y_test) print("Test accuracy: ", score[1]) dnn.model.save_weights(MODEL_FOLDER + os_slash + "final_weights.h5") dnn.model.save(MODEL_FOLDER + os_slash + "model.h5") save_results(score[1])
def post_training(modeldir, val_loader=None): if val_loader is None: with open(os.path.join(modeldir, 'params.json'), 'r') as jsf: params = json.load(jsf) _, val_loader = data.mnist(1, sequential=True, permuted=params['permuted']) visualizations(modeldir, val_loader)
def local_mnist(): # laptop args['data_dir'] = '../data/' args['loss_func'] = F.nll_loss train_loader, test_loader = data.mnist(args) model = net.MNIST_Net() trainer.main(model, train_loader, test_loader, args) return
def load_mnist(num, class_label, noise_scale): partial_flatten = lambda x: np.reshape(x, (x.shape[0], -1)) normalize = lambda x: x / x.max(1, keepdims=True) random_subset = lambda x, n: x[np.random.choice(x.shape[0], n)] add_noise = lambda x: x + noise_scale*npr.normal(size=x.shape) train_images, train_labels, test_images, test_labels = mnist() train_images = random_subset(train_images[train_labels == class_label], num) return normalize(partial_flatten(train_images))
def train(self): print("Training day and night") parser = argparse.ArgumentParser(description='Training arguments') parser.add_argument('--lr', default=0.1) # add any additional argument that you want args = parser.parse_args(sys.argv[2:]) print(args) # TODO: Implement training loop here model = MyAwesomeModel() train_set, _ = mnist()
def evaluate(self): print("Evaluating until hitting the ceiling") parser = argparse.ArgumentParser(description='Training arguments') parser.add_argument('load_model_from', default="") # add any additional argument that you want args = parser.parse_args(sys.argv[2:]) print(args) # TODO: Implement evaluation logic here model = torch.load(args.load_model_from) _, test_set = mnist()
def load_mnist(num, class_label, noise_scale): partial_flatten = lambda x: np.reshape(x, (x.shape[0], -1)) normalize = lambda x: x / x.max(1, keepdims=True) random_subset = lambda x, n: x[np.random.choice(x.shape[0], n)] add_noise = lambda x: x + noise_scale * npr.normal(size=x.shape) train_images, train_labels, test_images, test_labels = mnist() train_images = random_subset(train_images[train_labels == class_label], num) return normalize(partial_flatten(train_images))
def load_data(self, data_set, downsampling): if data_set in self.load_data_functions.keys(): X_train, X_val, X_test, y_test = self.load_data_functions[ data_set]() else: # load mnist data set X_train, X_val, X_test, y_test = mnist() X_train = X_train[:downsampling] if downsampling else X_train X_val = X_val[:downsampling] if downsampling else X_val return X_train, X_val, X_test, y_test
def evaluate(batch_size, log10_lr, momentum, log10_l1, log10_l2, dropout, improvement_thresh): # Load in the MNIST data. train_images, train_labels, test_images, test_labels = data.mnist() # Turn the uint8 images into floating-point vectors. train_images = np.reshape(train_images, (train_images.shape[0], train_images.shape[1]*train_images.shape[2]))/255.0 # Use one-hot coding for the labels. train_labels = kayak.util.onehot(train_labels) test_labels = kayak.util.onehot(test_labels) # Hand the training data off to a cross-validation object. # This will create ten folds and allow us to easily iterate. CV = kayak.CrossValidator(num_folds, train_images, train_labels) valid_acc = 0.0 # Loop over our cross validation folds. for ii, fold in enumerate(CV): # Get the training and validation data, according to this fold. train_images, train_labels = fold.train() valid_images, valid_labels = fold.valid() # Train on these data and get a prediction function back. pred_func = train(train_images, train_labels, batch_size, 10.0**log10_lr, momentum, 10.0**log10_l1, 10.0**log10_l2, dropout, improvement_thresh) # Make predictions on the validation data. valid_preds = np.argmax(pred_func( valid_images ), axis=1) # How did we do? acc = np.mean(valid_preds == np.argmax(valid_labels, axis=1)) print("Fold %02d: %0.6f" % (ii+1, acc)) valid_acc += acc print("Overall: %0.6f" % (valid_acc / num_folds)) return valid_acc / num_folds
def main(): x_train, y_train, x_test, y_test = data.mnist() layer_sizes = [10, 100, 400, 784, 900, 1000] sigmoid_histories = [] relu_histories = [] for layer_size in layer_sizes: print("layer_size: ", layer_size) sigm_1run = train(layer_size, 'sigmoid', x_train, x_test) relu_1run = train(layer_size, 'relu', x_train, x_test) sigmoid_histories.append(sigm_1run) relu_histories.append(relu_1run) print("MSE sigmoid: ", sigm_1run) print("MSE relu: ", relu_1run) print("sigmoid histories: ", sigmoid_histories) print("relu histories: ", relu_histories) plot.plot_losses(layer_sizes, sigmoid_histories, relu_histories)
def main(): x_train, y_train, x_test, y_test = data.mnist() params = { "epochs": 50, "num_hid_nodes": int(x_train.shape[1] * 0.9), "weight_init": [0.0, 0.1], "activations": 'sigmoid', #relu is much better performance "lr": 0.15, "decay": 1e-6, "momentum": 0.1, } auto_enc1 = Autoencoder(**params) history = auto_enc1.train(x_train, x_train, x_test) plot.plot_loss(history, loss_type='MSE') x_reconstr = auto_enc1.test(x_test, binary=True) plot_traintest(x_test, y_test, x_reconstr)
def train(kx=200, D=784, n_steps=501): mnist_data = data.mnist() X = mnist_data.train.images X, U, S = zca_whiten(X, kx) S = np.sqrt(S) X = X.T lr = tf.placeholder(tf.float32) W = tf.Variable( normr(tf.random_normal([D, kx], stddev=1, dtype=tf.float32))) X = tf.Variable(X, dtype=tf.float32) grad1, loss1 = grad_loss_logqz(W, X) grad2, loss2 = grad_loss_logdet(W) loss = loss1 + loss2 grad = grad1 + grad2 grad_norm = tf.reduce_mean(tf.reduce_sum(grad * grad, 1)**0.5) grad = grad / (grad_norm + 1e-5) op = [W.assign(normr(W - lr * grad)), loss1, loss2] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(n_steps): if i < 1000: _lr = .1 elif i < 2000: _lr = .1 elif i < 5000: _lr = .01 logs = sess.run(op, feed_dict={lr: _lr}) if i % 10 == 0: print(i, _lr, logs[-2], logs[-1]) if i % 500 == 0: _W = sess.run(W) B = np.dot(U[:, :kx], np.diag(S[:kx])) B = np.dot(B, _W.T) vis(B.T, 'imgs/or_D=%d_k=%d.png' % (D, kx)) B = U[:, :kx] B = np.dot(B, _W.T) vis(B.T, 'imgs/wh_D=%d_k=%d.png' % (D, kx))
def evaluate(batch_size, log10_lr, momentum, log10_l1, log10_l2, dropout, improvement_thresh): # Load in the MNIST data. train_images, train_labels, test_images, test_labels = data.mnist() # Turn the uint8 images into floating-point vectors. train_images = np.reshape(train_images, (train_images.shape[0], train_images.shape[1] * train_images.shape[2])) / 255.0 # Use one-hot coding for the labels. train_labels = kayak.util.onehot(train_labels) test_labels = kayak.util.onehot(test_labels) # Hand the training data off to a cross-validation object. # This will create ten folds and allow us to easily iterate. CV = kayak.CrossValidator(num_folds, train_images, train_labels) valid_acc = 0.0 # Loop over our cross validation folds. for ii, fold in enumerate(CV): # Get the training and validation data, according to this fold. train_images, train_labels = fold.train() valid_images, valid_labels = fold.valid() # Train on these data and get a prediction function back. pred_func = train(train_images, train_labels, batch_size, 10.0**log10_lr, momentum, 10.0**log10_l1, 10.0**log10_l2, dropout, improvement_thresh) # Make predictions on the validation data. valid_preds = np.argmax(pred_func(valid_images), axis=1) # How did we do? acc = np.mean(valid_preds == np.argmax(valid_labels, axis=1)) print("Fold %02d: %0.6f" % (ii + 1, acc)) valid_acc += acc print("Overall: %0.6f" % (valid_acc / num_folds)) return valid_acc / num_folds
def main(): x_train, y_train, x_test, y_test = data.mnist() x_train = x_train[:5000] y_train = y_train[:5000] x_test = x_test[:5000] y_test = y_test[:5000] lrs = [0.01, 0.1, 1, 10, 15, 20, 50] errors = [] stds = [] for lr in lrs: print("learning rate: ", lr) single_errors = [] for i in range(3): error_1run = train(lr, x_train, x_test, y_test) single_errors.append(error_1run) errors.append(np.mean(single_errors)) stds.append(np.std(single_errors)) print("MSE: ", np.mean(single_errors), " // std: ", np.std(single_errors)) plot.plot_parameter('Learning rate', lrs, errors, stds)
def local_pretrained_mnist_lossvar(): # laptop args['data_dir'] = '../data/' args['loss_func'] = F.nll_loss # args['learning_func_name'] = 'loss_var' args['learning_func_name'] = 'grad_var' args['stats_samplesize'] = 3 args['num_eigens_hessian_approx'] = 1 args['lr'] = 1e-3 args['log_interval'] = 1 train_loader, test_loader = data.mnist(args) batches = list(lib.iter_sample_fast(train_loader, args['stats_samplesize'])) batch_loader = dataloader.get_subset_batch_loader(batches, args) args['subset_batches'] = True print(f'\nTraining only on {args["stats_samplesize"]} batches of size {args["batch_size"]}!\n') pt_fn = '../data/models/mnist_model_epoch10.pt' model = net.load_pretrained_model(net.MNIST_Net, pt_fn, args) trainer.main(model, batch_loader, test_loader, args) return
sys.path.append('..') import kayak batch_size = 256 learn_rate = 0.01 momentum = 0.9 layer1_sz = 500 layer2_sz = 500 layer1_dropout = 0.25 layer2_dropout = 0.25 npr.seed(1) # Load in the MNIST data. train_images, train_labels, test_images, test_labels = data.mnist() # Turn the uint8 images into floating-point vectors. train_images = np.reshape(train_images, (train_images.shape[0], train_images.shape[1] * train_images.shape[2])) / 255.0 # Use one-hot coding for the labels. train_labels = kayak.util.onehot(train_labels) test_labels = kayak.util.onehot(test_labels) # Hand the training data off to a cross-validation object. # This will create ten folds and allow us to easily iterate. CV = kayak.CrossValidator(10, train_images, train_labels)
p = 1.0 - p return math.exp(-p * p * 5.0) else: return 1.0 def rampdown(epoch): if epoch >= (args['n_epochs'] - args['rampdown_length']): ep = (epoch - (args['n_epochs'] - args['rampdown_length'])) * 0.5 return math.exp(-(ep * ep) / args['rampdown_length']) else: return 1.0 args['shape'] = (28, 28, 1) if args['dataset'] == 'mnist' else (32, 32, 3) BAE = model.BAE(args) mnist = data.mnist(args) unlabeled_idx = np.copy(mnist.unlabeled_idx) labeled_idx = np.copy(mnist.labeled_idx) sparse_label = np.copy(mnist.sparse_label) batch_size = args['batch_size'] new_label = np.copy(sparse_label) new_label = np.asarray(new_label) mask_AE = np.copy(mnist.train_mask) drops_label_sup=np.ones([batch_size,10])
def mnist(): nb_per_class = 100 (X_train, Y_train), (X_test, Y_test) = data.mnist() (X_train, Y_train) = extract(X_train, Y_train, nb_per_class) return (X_train, Y_train), (X_test, Y_test)
#pylint: skip-file import time import numpy as np import theano import theano.tensor as T import utils_pg as Utils from mlp import * import data lr = 0.1 batch_size = 100 train_set, valid_set, test_set = data.mnist(batch_size) hidden_size = [500, 100] dim_x = train_set[0][0][0].shape[1] dim_y = train_set[0][1][0].shape[1] print dim_x, dim_y model = MLP(dim_x, dim_y, hidden_size) start = time.time() for i in xrange(100): acc = 0.0 in_start = time.time() for index, data_xy in train_set.items(): X = data_xy[0] Y = data_xy[1] model.train(X, Y, lr) in_time = time.time() - in_start
def test_transform(theta, length=17, im=None): if im is None: im = t.zeros((1, 1, length, length)) im[0, 0, 0, 0] = 1 im[0, 0, 0, length - 1] = 2 im[0, 0, length - 1, 0] = 3 im[0, 0, length - 1, length - 1] = 4 im[0, 0, (length - 1) // 2, (length - 1) // 2] = 5 grid = F.affine_grid(t.tensor(theta).view(-1, 2, 3), im.shape) transformed = F.grid_sample(im, grid) return transformed train, test = data.mnist(rotate=False, normalize=False, translate=False) test_im = train.dataset[0][0].view(1, 1, 28, 28) plt.imshow(test_im[0, 0]) plt.show() identity = [1., 0, 0, 0, 1, 0] theta = t.tensor([0.3, -1, -.5, 1.3, -0.2, 0.8]).view(-1, 2, 3) # distance = np.linalg.solve(theta[0,:,0:2], theta[0,:,2]) distance = theta[0, :, 2] print('distance', distance) trans = test_transform([1., 0, distance[0], 0, 1, distance[1]], im=test_im) plt.imshow(trans[0, 0]) plt.colorbar() plt.figure() plt.imshow(
from VAE import * import data import matplotlib.pyplot as plt use_gpu(0) lr = 0.001 drop_rate = 0. batch_size = 128 hidden_size = 500 latent_size = 2 # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum optimizer = "adam" continuous = False train_set, valid_set, test_set = data.mnist() train_xy = data.batched_mnist(train_set, batch_size) dim_x = train_xy[0][0].shape[1] dim_y = train_xy[0][1].shape[1] print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = VAE(dim_x, dim_x, hidden_size, latent_size, continuous, optimizer) print "training..." start = time.time() for i in xrange(50): error = 0.0 in_start = time.time() for batch_id, xy in train_xy.items():
def train(kx=200, D=784): mnist_data = data.mnist() X = mnist_data.train.images X, U, S, zca_mu, zca_W = zca_whiten5(X, kx) S = np.sqrt(S) X = X.T lr = tf.placeholder(tf.float32) W = tf.Variable( normr(tf.random_normal([D, kx], stddev=1, dtype=tf.float32))) _y = tf.placeholder(tf.int32, [None]) _x = tf.placeholder(tf.float32, [kx, None]) grad1, loss1 = grad_loss_logqz(W, _x, q='logistic', batch_size=60000) grad2, loss2 = grad_loss_logdet(W) loss = loss1 + loss2 grad = grad1 + grad2 grad_norm = tf.reduce_mean(tf.reduce_sum(grad * grad, 1)**0.5) grad = grad / (grad_norm + 1e-5) op = [W.assign(normr(W - lr * grad)), loss1, loss2] with tf.variable_scope('clf') as scope: h0 = tf.matmul(W, _x) z = tf.nn.relu(tf.concat([h0, -h0], 0)) decoder_W = tf.Variable(tf.random_normal( [10, D * 2], stddev=0.01)) #weight_variable([z_dim*2,10],1) decoder_b = tf.Variable(tf.random_normal([10, 1], stddev=0.00001)) logits = tf.matmul(decoder_W, z) + decoder_b logits = tf.transpose(logits) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=_y, name='xentropy') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='xentropy_mean') + \ 0.0001*tf.nn.l2_loss(decoder_W) optimizer = tf.train.AdamOptimizer(0.001) grads = optimizer.compute_gradients(cross_entropy_mean, [decoder_W, decoder_b]) train_op2 = optimizer.apply_gradients(grads) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) n_steps = 501 * 1 for i in range(n_steps): if i < 1000: _lr = .1 elif i < 2000: _lr = .1 elif i < 5000: _lr = .01 logs = sess.run(op, feed_dict={lr: _lr, _x: X}) if i % 10 == 0: print(i, _lr, logs[-2], logs[-1]) # if i%500==0: # _W=sess.run(W) # B=np.dot(U[:,:kx],np.diag(S[:kx])) # # B=U[:,:kx] # B=np.dot(B,_W.T) # vis(B.T,'or_D=%d_k=%d'%(D,kx)) # B=U[:,:kx] # B=np.dot(B,_W.T) # vis(B.T,'wh_D=%d_k=%d'%(D,kx)) for i in range(500 * 2): indices = np.random.permutation(X.shape[1]) batch = [X[:,indices[:1000]],\ mnist_data.train.labels[indices[:1000]]] # batch=mnist_data.train.next_batch(1000) logs=sess.run([train_op2,cross_entropy_mean],\ feed_dict={_y:batch[1],_x:batch[0]}) if i % 10 == 0: print(i, logs[-1]) # dW,db,_W=sess.run([decoder_W,decoder_b,W]) tex = mnist_data.test.images test_data = np.dot(tex - zca_mu, zca_W) pred = sess.run(logits, feed_dict={_x: test_data.T}) # test_data=np.dot(test_data, _W.T) # pred=np.dot(dW,test_data.T)+db # print pred.argmax(0) print("accuracy", (pred.argmax(1) == mnist_data.test.labels).sum() * 1.0 / mnist_data.test.labels.size)
from AVAE import * import data import matplotlib.pyplot as plt use_gpu(0) lr = 0.001 drop_rate = 0. batch_size = 128 hidden_size = 400 latent_size = 2 iter_d = 1 # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum optimizer = "adam" train_set, valid_set, test_set = data.mnist() train_xy = data.batched_mnist(train_set, batch_size) dim_x = train_xy[0][0].shape[1] dim_y = train_xy[0][1].shape[1] print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = AVAE(dim_x, dim_x, hidden_size, latent_size, optimizer) print "training..." start = time.time() for i in xrange(200): error = 0.0 error_d = 0.0 error_g = 0.0
return parser.parse_args() if __name__ == "__main__": args = get_args() print("EPSILON", args.epsilon) # load pre-trained model model = Net() model.load_state_dict(torch.load(args.model, map_location="cpu")) # set model to evaluation mode model.eval() loader = data.mnist(batch_size=1, seed=args.seed) n_success = 0 success_rate = 0.0 t_count = 0 t_queries = [] images_saved = 0 if args.blackbox: attack = ES(n_iter=args.n_iter, epsilon=args.epsilon) else: attack = IFGSM(n_iter=args.n_iter, epsilon=args.epsilon) print("Creating %d adversarial example(s)..." % args.n_samples) print("Running %s for %d iterations w/ epsilon %.2f...\n" % (attack.__class__.__name__, args.n_iter, args.epsilon))
def transformation_statistics(model=0, plot=True, di=None, transform='rotate', normalize=None, epochs=1, save_path='', title=''): global untransformed_test assert transform in ['rotate', 'translate', 'scale'] if type(model) == int: model = get_model(model, di=di) _, untransformed_test = data.mnist(rotate=False, normalize=False, translate=False) elif 'untransformed_test' not in globals(): _, untransformed_test = data.mnist(rotate=False, normalize=False, translate=False) device = t.device("cuda" if next(model.parameters()).is_cuda else "cpu") transformation = np.zeros((0, 2) if transform == 'translate' else (0, 1)) tran_by_label = [] angles = np.array([]) distances = np.zeros((0, 2)) scales = np.zeros((0, 2)) dets = np.array([]) shears = np.array([]) angle_by_label = [] distance_by_label = [] scale_by_label = [] det_by_label = [] shear_by_label = [] labels = np.array([]) with t.no_grad(): model.eval() if transform == 'translate': noise = data.MNIST_noise(60) elif transform == 'scale': noise = data.MNIST_noise(112, scale=True) for epoch in range(epochs): for x, y in untransformed_test: if transform == 'rotate': angle = np.random.uniform(-90, 90, x.shape[0]) transformation = np.append(transformation, angle) transformed = t.tensor( [rotate(im[0], a) for im, a in zip(x, angle)], dtype=t.float).reshape(-1, 1, 28, 28) if normalize or normalize is None: transformed = (transformed - 0.1307) / 0.3081 elif transform == 'translate': distance = np.random.randint(-16, 17, (x.shape[0], 2)) transformation = np.append(transformation, distance, axis=0) transformed = t.zeros(x.shape[0], 1, 60, 60, dtype=t.float) for i, (im, (xd, yd)) in enumerate(zip(x, distance)): transformed[i, 0, 16 - yd:44 - yd, 16 + xd:44 + xd] = im[0] transformed[i] = noise(transformed[i]) if normalize is True or (normalize is None and d['normalize']): transformed = (transformed - 0.0363) / 0.1870 elif transform == 'scale': logscale = np.random.uniform(-1, 2, x.shape[0]) transformation = np.append(transformation, logscale) scale = np.power(2, logscale) transformed = F.pad(x, (42, 42, 42, 42)) for i, s in enumerate(scale): transformed[i] = tvF.to_tensor( tvF.affine(tvF.to_pil_image(transformed[i]), angle=0, translate=(0, 0), shear=0, scale=s, resample=PIL.Image.BILINEAR, fillcolor=0)) transformed[i] = noise(transformed[i]) if normalize is True or (normalize is None and d['normalize']): transformed = (transformed - 0.0414) / 0.1751 theta = model.localization[0](model.pre_stn[0]( transformed.to(device))).cpu() angle, shear, sx, sy, det = angle_from_matrix( theta, all_transformations=True) scale = np.stack((sx, sy), axis=1) distance = distance_from_matrix(theta) angles = np.append(angles, angle) distances = np.append(distances, distance, axis=0) scales = np.append(scales, scale, axis=0) dets = np.append(dets, det) shears = np.append(shears, shear) labels = np.append(labels, y) variance = 0 for i in range(10): indices = labels == i tran_by_label.append(transformation[indices]) angle_by_label.append(angles[indices]) distance_by_label.append(distances[indices]) scale_by_label.append(scales[indices]) det_by_label.append(dets[indices]) shear_by_label.append(shears[indices]) transformations = tran_by_label[i] if transform == 'rotate': predictions = angle_by_label[i] elif transform == 'translate': predictions = distance_by_label[i] elif transform == 'scale': predictions = np.log2(scale_by_label[i]) s = (sum(transformations) + sum(predictions)) / len(transformations) variance += sum([ np.linalg.norm(t + p - s)**2 for t, p in zip(transformations, predictions) ]) print('Standard deviation', np.sqrt(variance / (epochs * 10000))) if plot: if transform == 'rotate': plot_angles(rot=transformation, pred=angles, save_path=save_path, title=title) elif transform == 'translate': plot_distance(tran=transformation, pred=distances, save_path=save_path, title=title) else: plot_scale(logscale=transformation, pred=scales, save_path=save_path, title=title) return tran_by_label, angle_by_label, distance_by_label, scale_by_label, det_by_label, shear_by_label
def run(batch_size=128, n_features=64, n_layers=6, n_scales=1, n_bins=16, exp_name='pixelCNN', exp_dir='/home/jason/experiments/pytorch_pixelcnn/', optimizer='adam', learnrate=1e-4, dropout=0.5, cuda=True, resume=False): exp_name += '_%ifeat_%iscales_%ilayers_%ibins' % (n_features, n_scales, n_layers, n_bins) exp_dir = os.path.join(exp_dir, exp_name) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) if not resume: # Store experiment params in params.json params = { 'batch_size': batch_size, 'n_features': n_features, 'n_layers': n_layers, 'n_scales': n_scales, 'n_bins': n_bins, 'optimizer': optimizer, 'learnrate': learnrate, 'dropout': dropout, 'cuda': cuda } with open(os.path.join(exp_dir, 'params.json'), 'w') as f: json.dump(params, f) # Model net = model.PixelCNN(1, n_features, n_layers, n_scales, n_bins, dropout) else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_dir, 'params.json')) and os.path.isfile(os.path.join(exp_dir, 'stats.json')) and os.path.isfile(os.path.join(exp_dir, 'last_checkpoint'))): raise Exception( 'Missing param, stats or checkpoint file on resume') net = torch.load(os.path.join(exp_dir, 'last_checkpoint')) # Data loaders train_loader, val_loader = data.mnist(batch_size) # Up-weight 1s (~8x rarer) to balance loss, interpolate intermediate values weight = torch.from_numpy(np.linspace(1, 8, n_bins, dtype='float32')) if cuda: weight = weight.cuda() # Define loss fcn, incl. label formatting from input def input2label(x): return torch.squeeze( torch.round((n_bins - 1) * x).type(torch.LongTensor), 1) loss_fcn = torch.nn.NLLLoss2d(torch.autograd.Variable(weight)) # Train train.fit(train_loader, val_loader, net, exp_dir, input2label, loss_fcn, optimizer, learnrate=learnrate, cuda=cuda, resume=resume)
def run(batch_size, permuted, modeltype='surprise_gru', n_hidden=64, zoneout=0.25, layer_norm=True, optimizer='adam', learnrate=1e-3, aux_weight=0.1, cuda=True, resume=False): assert isinstance(batch_size, int) assert isinstance(permuted, bool) assert modeltype in MODELS_IMPLEMENTED assert isinstance(n_hidden, int) assert isinstance(zoneout, (int, float)) assert isinstance(layer_norm, bool) assert isinstance(optimizer, str) assert isinstance(learnrate, (int, float)) assert isinstance(cuda, bool) assert isinstance(resume, bool) # Name the experiment s.t. parameters are easily readable exp_name = ( '%s_perm%r_h%i_z%2f_norm%r_%s' % (modeltype, permuted, n_hidden, zoneout, layer_norm, optimizer)) exp_path = os.path.join('/home/jason/experiments/recurrent_pytorch/', exp_name) if not os.path.isdir(exp_path): os.makedirs(exp_path) if not resume: # Store experiment params in params.json params = { 'batch_size': batch_size, 'permuted': permuted, 'modeltype': modeltype, 'n_hidden': n_hidden, 'zoneout': zoneout, 'layer_norm': layer_norm, 'optimizer': optimizer, 'learnrate': learnrate, 'aux_weight': aux_weight, 'cuda': cuda } with open(os.path.join(exp_path, 'params.json'), 'w') as f: json.dump(params, f) # Model if modeltype.lower() == 'rnn': net = model.RNN(1, n_hidden, 10, layer_norm) elif modeltype.lower() == 'gru': net = model.GRU(1, n_hidden, 10, layer_norm) elif modeltype.lower() == 'surprise_gru': net = model.SurpriseGRU(1, n_hidden, 10, layer_norm) else: raise ValueError else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_path, 'params.json')) and os.path.isfile(os.path.join(exp_path, 'stats.json')) and os.path.isfile(os.path.join(exp_path, 'checkpoint'))): raise Exception( 'Missing params, stats or checkpoint file (resume)') net = torch.load(os.path.join(exp_path, 'checkpoint')) # Data loaders train_loader, val_loader = data.mnist(batch_size, sequential=True, permuted=permuted) # Train train.fit_recurrent(train_loader, val_loader, net, exp_path, zoneout, optimizer, aux_weight=aux_weight, cuda=cuda, resume=resume) # Post-trainign visualization post_training(exp_path, val_loader)
pass if __name__=="__main__": # mnist image parameters image_width, image_height = 28, 28 vectorized_image_length = image_height * image_width # model size parameters num_neurons = vectorized_image_length num_memories = 3 # instantiate a network net = Hopfield(num_neurons) # generate some sample memories images, _, _, _ = mnist() images = images.reshape(-1, vectorized_image_length) memories = [binarize(images[i + 1], thresh=176).ravel() for i in range(num_memories)] # add the memories to the network net.add_memories(memories) # corrupt one of the memories memory = memories[0] noise_std = 0.1 noise_vec = npr.choice([-1., 1.], replace=True, size=num_neurons, p=[noise_std, 1. - noise_std]) corrupted_memory = np.multiply(memory, noise_vec) # decode one of the memories decoded = net.decode(corrupted_memory, bias=100.)
sys.path.append('..') import kayak batch_size = 256 learn_rate = 0.01 momentum = 0.9 layer1_sz = 500 layer2_sz = 500 layer1_dropout = 0.25 layer2_dropout = 0.25 npr.seed(1) # Load in the MNIST data. train_images, train_labels, test_images, test_labels = data.mnist() # Turn the uint8 images into floating-point vectors. train_images = np.reshape(train_images, (train_images.shape[0], train_images.shape[1]*train_images.shape[2]))/255.0 # Use one-hot coding for the labels. train_labels = kayak.util.onehot(train_labels) test_labels = kayak.util.onehot(test_labels) # Hand the training data off to a cross-validation object. # This will create ten folds and allow us to easily iterate. CV = kayak.CrossValidator(10, train_images, train_labels) # Here I define a nice little training function that takes inputs and targets.
from tensorflow.python.framework import ops hvd.init() # def train(): graph = tf.Graph() with graph.as_default(): batch_size = 32 learning_rate = 0.0002 patch_size = [20, 20] # logdir = './log/cifar10' # names_records, h, w, c, n_exp = cifar10(create_tfrecords=False, batch_size=batch_size) # # logdir = './log/mnist' names_records, h, w, c, n_exp = mnist(create_tfrecords=False, batch_size=batch_size) ''' sess = tf.Session() with sess.as_default(): coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) ttl = 0 for i in range(10000): ttl += len(sess.run(names_records['X'])) print(ttl) ''' if hvd.rank() == 0: ts = tg.utils.ts() logdir = './log/brain/{}'.format(ts) names_records, h, w, c, n_exp = brain(create_tfrecords=False, batch_size=batch_size,