def train(self): log.infov("Training Starts!") output_save_step = 1000 self.session.run(self.global_step.assign(0)) # reset global step if self.config.dataset == 'mnist': from load import load_mnist inputs, targets = load_mnist() else: raise NotImplementedError if self.config.method == 'kmeans': y_pred, _ = clustering(np.reshape(inputs, (len(inputs), -1)), self.config.num_clusters) metrics(targets, y_pred) return ''' pre-training ''' if not self.config.skip_pretrain: self.pre_train_enc_dec(inputs, targets, batch_size=self.config.batch_size, num_epochs=1000) # save model self.save_curr_model(os.path.join(self.res_pretrain_dir, 'model')) else: self.try_load_checkpoint(self.res_pretrain_dir) # plot latent_z, _ = self.get_latent_rep_and_pred(inputs, targets) y_pred, centroids = clustering(latent_z, self.config.num_clusters) plot_latent_z_space(latent_z, y_pred, \ '%s/pre_train_z' % self.res_dir, with_legend=True) #sys.exit(0) if self.config.method == 'svgd': if not self.config.skip_svgd: self.session.run(self.model.mu.assign(centroids)) #scale = np.zeros((self.config.num_clusters, self.config.z_dim*(self.config.z_dim+1)//2)) scale = np.zeros((self.config.num_clusters, self.config.z_dim)) for c in range(self.config.num_clusters): z_c = latent_z[np.where(y_pred == c)[0]] s0 = np.std(z_c, axis=0) scale[c] = s0 self.session.run(self.model.scale_diag.assign(scale)) self.train_svgd(inputs, targets, num_epochs=400, batch_size=self.config.batch_size) self.save_curr_model(os.path.join(self.res_dir, 'model')) else: self.try_load_checkpoint(self.res_dir) # plot latent_z, y_pred = self.get_latent_rep_and_pred(inputs, targets) #y_pred, centroids = clustering(latent_z, self.config.num_clusters) plot_latent_z_space(latent_z, y_pred, \ '%s/%s_z' % (self.res_dir, self.config.method), with_legend=True)
import numpy as np from passage.models import RNN from passage.updates import NAG, Regularizer from passage.layers import Generic, GatedRecurrent, Dense from passage.utils import load, save from load import load_mnist trX, teX, trY, teY = load_mnist() #Use generic layer - RNN processes a size 28 vector at a time scanning from left to right layers = [ Generic(size=28), GatedRecurrent(size=512, p_drop=0.2), Dense(size=10, activation='softmax', p_drop=0.5) ] #A bit of l2 helps with generalization, higher momentum helps convergence updater = NAG(momentum=0.95, regularizer=Regularizer(l2=1e-4)) #Linear iterator for real valued data, cce cost for softmax model = RNN(layers=layers, updater=updater, iterator='linear', cost='cce') model.fit(trX, trY, n_epochs=20) tr_preds = model.predict(trX[:len(teY)]) te_preds = model.predict(teX) tr_acc = np.mean(trY[:len(teY)] == np.argmax(tr_preds, axis=1)) te_acc = np.mean(teY == np.argmax(te_preds, axis=1))
from __future__ import print_function, absolute_import import cgt from cgt import nn from cgt.distributions import categorical import numpy as np from load import load_mnist import time epochs = 10 batch_size = 128 Xtrain, Xtest, ytrain, ytest = load_mnist(onehot=False) # shuffle the data np.random.seed(42) sortinds = np.random.permutation(Xtrain.shape[0]) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] # reshape for convnet Xtrainimg = Xtrain.reshape(-1, 1, 28, 28) Xtestimg = Xtest.reshape(-1, 1, 28, 28) # Model: # Make it VGG-like # VGG nets have 3x3 kernels with length 1 padding and max-pooling has all 2s. # # VGG is a large model so here well just do a small part of it. X = cgt.tensor4('X', fixed_shape=(None, 1, 28, 28)) y = cgt.vector('y', dtype='i8')
def train(nlayers, num_epochs, rnn_dim, bsz, lr, twin): # use hugo's binarized MNIST torch.manual_seed(seed) torch.cuda.manual_seed(seed) log_interval = 100 folder_id = 'condmnist_twin_logs' model_id = 'condmnist_twin{}'.format(twin) log_file_name = os.path.join(folder_id, model_id + '.txt') model_file_name = os.path.join(folder_id, model_id + '.pt') log_file = open(log_file_name, 'w') # "home-made" binarized MNIST version. Use with fixed binarization # during training. def binarize(rng, x): return (x > rng.rand(x.shape[0], x.shape[1])).astype('int32') train_x, valid_x, test_x, train_y, valid_y, test_y = \ load.load_mnist('./mnist/data') train_x = binarize(rng, train_x) valid_x = binarize(rng, valid_x) test_x = binarize(rng, test_x) model = Model(rnn_dim, nlayers) model.cuda() hidden = model.init_hidden(bsz) opt = torch.optim.Adam(model.parameters(), lr=lr) nbatches = train_x.shape[0] // bsz t = time.time() for epoch in range(num_epochs): step = 0 old_valid_loss = np.inf b_fwd_loss, b_bwd_loss, b_twin_loss, b_all_loss = (0., 0., 0., 0.) model.train() print('Epoch {}: ({})'.format(epoch, model_id.upper())) for x, y in get_epoch_iterator(bsz, train_x, train_y): opt.zero_grad() # x = (0, x1, x2, x3, x4) # fwd_inp = (0, x1, x2, x3) # fwd_trg = (x1, x2, x3, x4) x_ = np.concatenate([np.zeros((1, bsz)).astype('int32'), x], axis=0) fwd_x = torch.from_numpy(x_) fwd_inp = Variable(fwd_x[:-1]).long().cuda() fwd_trg = Variable(fwd_x[1:]).float().cuda() # reverse the contents # bwd_x = (0, x4, x3, x2, x1) # bwd_inp = (0, x4, x3, x2) # bwd_trg = (x4, x3, x2, x1) bwd_x = numpy.flip(x, 0).copy() x_ = np.concatenate([np.zeros((1, bsz)).astype('int32'), bwd_x], axis=0) bwd_x = torch.from_numpy(x_) bwd_inp = Variable(bwd_x[:-1]).long().cuda() bwd_trg = Variable(bwd_x[1:]).float().cuda() y = Variable(torch.from_numpy(numpy.eye(10)[y])).float().cuda() # compute all the states for forward and backward fwd_out, fwd_vis = model(fwd_inp, y, hidden) assert fwd_out.size(0) == 784 fwd_loss = binary_crossentropy(fwd_trg, fwd_out).mean() bwd_loss = binary_crossentropy(bwd_trg, bwd_out).mean() bwd_loss = bwd_loss * (twin > 0.) # reversing backstates fwd_vis = (out_x1, out_x2, out_x3, out_x4) bwd_vis_inv = (out_x1, out_x2, out_x3, out_x4) # therefore match: fwd_vis and bwd_vis_inv idx = np.arange(bwd_vis.size(0))[::-1].tolist() idx = torch.LongTensor(idx) idx = Variable(idx).cuda() bwd_vis_inv = bwd_vis.index_select(0, idx) bwd_vis_inv = bwd_vis_inv.detach() twin_loss = ((fwd_vis - bwd_vis_inv)**2).mean() twin_loss = twin_loss * twin all_loss = fwd_loss + bwd_loss + twin_loss all_loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 1.) opt.step() b_fwd_loss += fwd_loss.data[0] b_bwd_loss += bwd_loss.data[0] b_twin_loss += twin_loss.data[0] b_all_loss += all_loss.data[0] if (step + 1) % log_interval == 0: s = time.time() log_line = 'Epoch [%d/%d], Step [%d/%d], loss: %f, %.2fit/s' % ( epoch, num_epochs, step + 1, nbatches, b_fwd_loss / log_interval, log_interval / (s - t)) b_all_loss = 0. b_fwd_loss = 0. b_bwd_loss = 0. b_twin_loss = 0. t = time.time() print(log_line) print(time.time(), time.clock()) log_file.write(log_line + '\n') step += 1 # evaluate per epoch print('--- Epoch finished ----') val_loss = evaluate(model, bsz, valid_x, valid_y) log_line = 'valid -- nll: %f' % (val_loss) print(log_line) log_file.write(log_line + '\n') test_loss = evaluate(model, bsz, test_x, test_y) log_line = 'test -- nll: %f' % (test_loss) print(log_line) log_file.write(log_line + '\n') if old_valid_loss > val_loss: old_valid_loss = val_loss torch.save(model.state_dict(), model_file_name) else: for param_group in opt.param_groups: lr = param_group['lr'] if lr > 0.00005: lr *= 0.5 param_group['lr'] = lr
#print self.G.shape, self.Y.shape h = dual(self.G, self.Y, self.a, self.b, i) if h < 0: return self.i else: return self.j if __name__=="__main__": # Argparse parser = argparse.ArgumentParser() parser.add_argument('-m', help='ECOC or VOTE') args = parser.parse_args(sys.argv[1:]) # Load data using specialized script train_dataset = load_mnist(path="../data/mnist/", dataset="training") test_dataset = load_mnist(path="../data/mnist/", dataset="testing") # Take a fraction of the data to speed computation train_images, train_labels = sample(train_dataset, 5000) test_images, test_labels = sample(test_dataset, 100) # Get the bounds of the haar rectangles bounds = genbounds(28, 28, 100) # Create data, using same rectangles for training and testing train_data = genfeatures(train_images, bounds).astype(float) test_data = genfeatures(test_images, bounds).astype(float) # Normalize the data zmscaler = preprocessing.StandardScaler()
self.trainer.scheduler_updates() def predict(self, X): """ Currently clips the last few rows of X and requires a minimum of batch_size * n_batches examples """ predictions = [] for batch in self.iter_data(X): predictions.append(self.fprop(batch)) return np.vstack(predictions) if __name__ == "__main__": data_dir = '/home/mmay/data/mnist' trX, _, teX, _ = load_mnist(data_dir) augmenter = SaltAndPepper(low=0.,high=1.,p_corrupt=0.5) bce = T.nnet.binary_crossentropy # Factor out trainer # Generalize to multiple layers n_vis=784 n_hidden=2000 batch_size = 128 activation = T.nnet.sigmoid layers = [ InputLayer(n_vis,batch_size=batch_size,augmenter=augmenter), HiddenLayer(n_hidden, activation), HiddenLayer(n_vis, activation) ]
from scipy.misc import imresize, imsave import pickle MIN = 2 MAX = 4 #size of frame SIZE = 64 #border BORDER = 2 #downsample ISIZE = 20 #n. of digits ND = 2 MNIST_SAMPLES = 60000 DATA_PATH = "./mnist/data" train_x, valid_x, test_x, train_y, valid_y, test_y = load.load_mnist(DATA_PATH) def create_rand_multi_mnist(data, labels, samples=60000): """ Create a dataset where multiple (MIN to MAX) random MNIST digits are randomly located in a long image. """ new_images = [] new_labels = [] img_by_labels = {} for i, (x, l) in enumerate(zip(data, labels)): img_by_labels[l] = img_by_labels.get(l, []) + [i] while len(new_images) != samples: if len(new_images) % 1000 == 0: print('done {}'.format(len(new_images))) pos = [] mask = np.zeros((SIZE, SIZE)) while len(pos) != ND:
import load X_train, y_train = load.load_mnist('mnist/', kind='train') print('Rows: %d, columns: %d' % (X_train.shape[0], X_train.shape[1])) X_test, y_test = load.load_mnist('mnist/', kind='t10k') print('Rows: %d, columns: %d' % (X_test.shape[0], X_test.shape[1]))
import numpy as np import matplotlib.pyplot as plt from numpy import log as ln import random from numpy import random import math from matplotlib.pyplot import plot,savefig from PIL import Image import load #Import MNIST dataset mnist=np.array(load.load_mnist(one_hot=True)) train_data = mnist[0][0][0:10000].T train_label = mnist[0][1][0:10000].T test_data = mnist[1][0][0:10000].T test_label = mnist[1][1][0:10000].T print(np.shape(train_data)) print(np.shape(train_label)) #Import Cifar dataset from PIL import Image import os import pickle def load_CIFAR_batch(filename): with open(filename, 'rb') as f: datadict = pickle.load(f,encoding='latin1') X = datadict['data'] Y = datadict['labels'] X = X.reshape(10000, 3, 32,32).transpose(0,2,3,1).astype("float") Y = np.array(Y) return X, Y def load_CIFAR10(ROOT):
def train(params): # fix random seed np.random.seed(params.random_seed) print('%s starting......' % params.cell) if params.dataset.startswith('mnist'): train_X, test_X, train_y, test_y = load.load_mnist(params) elif params.dataset.startswith( 'sine_synthetic' ) and not params.dataset.startswith('sine_synthetic_out'): train_X, test_X, train_y, test_y = load.load_sine_synthetic(params) elif params.dataset.startswith('poly_synthetic'): train_X, test_X, train_y, test_y = load.load_poly_synthetic(params) else: assert 0, "unknown dataset %s" % (params.dataset) #params.freqs = np.logspace(np.log2(0.25), np.log2(params.time_steps/3), 120-1, base=2).tolist() #params.freqs.append(0.0) #params.freqs.sort() #params.freqs = np.linspace(0, params.time_steps/3, 10).tolist() print "parameters = ", params model = rnn.RNNModel(params) # load model if params.load_model: model.load("%s.%s" % (params.model_dir, params.cell)) # train model train_error, test_error = model.train(params, train_X, train_y, test_X, test_y) # save model if params.model_dir: if os.path.isdir(os.path.dirname(params.model_dir)) == False: os.makedirs(params.model_dir) model.save("%s.%s" % (params.model_dir, params.cell)) # predict train_pred = model.predict(train_X, params.batch_size) test_pred = model.predict(test_X, params.batch_size) # must close model when finish model.close() # write prediction to file if params.pred_dir: if os.path.isdir(os.path.dirname(params.pred_dir)) == False: os.makedirs(params.pred_dir) with open( "%s.%s.%s.y" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for pred in [train_pred, test_pred]: for entry in pred: for index, value in enumerate(entry): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) with open( "%s.%s.%s.X" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for X in [train_X, test_X]: for entry in X: for index, value in enumerate(entry.ravel()): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) return train_error, test_error
def train(params): print('%s starting......' % params.cell) sys.stdout.flush() if params.dataset.startswith('mnist'): train_X, test_X, train_y, test_y = load.load_mnist(params) elif params.dataset.startswith('add'): train_X, test_X, train_y, test_y = load.adding_task(params) else: assert 0, "unknown dataset %s" % (params.dataset) print ("parameters = ", params) class List: def __init__(self): self.list = list() def append(self, item): self.list.append(item) model = rnn.RNNModel(params) # load model if params.load_model: model.load("%s" % (params.load_model_dir)) # train model train_error, test_error,epochs = model.train(params, train_X, train_y, test_X, test_y) #save data to file(Egor) with open('data_'+params.cell+'_dataset_'+params.dataset+'_L_'+str(params.num_layers)+'_rsize_'+str(params.r_size) + '_lr_decay_' + str(params.lr_decay) + '_batch_size_' + str(params.batch_size),'w') as file: for i in range(len(train_error)): file.write(str(epochs[i])+' '+str(train_error[i])+' '+str(test_error[i])+'\n') # save model if params.model_dir: if os.path.isdir(os.path.dirname(params.model_dir)) == False: os.makedirs(params.model_dir) model.save("%s.%s" % (params.model_dir, params.cell)) # predict train_pred = model.predict(train_X, params.batch_size) test_pred = model.predict(test_X, params.batch_size) # must close model when finish model.close() # write prediction to file if params.pred_dir: if os.path.isdir(os.path.dirname(params.pred_dir)) == False: os.makedirs(params.pred_dir) with open("%s.%s.%s.y" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for pred in [train_pred, test_pred]: for entry in pred: for index, value in enumerate(entry): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) with open("%s.%s.%s.X" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for X in [train_X, test_X]: for entry in X: for index, value in enumerate(entry.ravel()): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) return train_error, test_error