def build(self): # input and output variables x = T.matrix('x') y = T.matrix('y') index = T.lscalar() batch_count = T.lscalar() LR = T.scalar('LR', dtype=theano.config.floatX) M = T.scalar('M', dtype=theano.config.floatX) # before the build, you work with symbolic variables # after the build, you work with numeric variables self.train_batch = theano.function(inputs=[index,LR,M], updates=self.model.updates(x,y,LR,M),givens={ x: self.shared_x[index * self.batch_size:(index + 1) * self.batch_size], y: self.shared_y[index * self.batch_size:(index + 1) * self.batch_size]}, name = "train_batch", on_unused_input='warn') self.test_batch = theano.function(inputs=[index],outputs=self.model.errors(x,y),givens={ x: self.shared_x[index * self.batch_size:(index + 1) * self.batch_size], y: self.shared_y[index * self.batch_size:(index + 1) * self.batch_size]}, name = "test_batch") if self.format == "DFXP" : self.update_range = theano.function(inputs=[batch_count],updates=self.model.range_updates(batch_count), name = "update_range")
def __compileFunctions(self): self.__logger.info("Compiling computational graph:") index = T.lscalar('index') miniBatchSize = T.lscalar('miniBatchSize') self.__logger.info(" - Setting up and compiling outputs") self.__setUpOutputs(self.input) self.__logger.info(" - Setting up and compiling cost functions") self.__setUpCostFunctions(self.input, self.output, self.supCostWeight, self.unsupCostWeight) self.__logger.info(" - Setting up and compiling optimizers") self.__setUpOptimizers(index, miniBatchSize, self.input, self.output, self.epsilon, self.decay, self.momentum) self.__setUpHelpers(index,miniBatchSize)
def train_model(self, X_train, Y_train, X_valid, Y_valid, num_epochs=3000, learning_rate=0.001, batch_size=20, L1_reg=0., L2_reg=0.): logging.info('... training model (learning_rate: %f)' % learning_rate) cost = self.NLL + L1_reg*self.L1 + L2_reg*self.L2_sqr grads = T.grad(cost=cost, wrt=self.params) updates = [[param, param - learning_rate*grad] for param, grad in zip(self.params, grads)] start = T.lscalar() end = T.lscalar() train = theano.function( inputs=[start, end], outputs=cost, updates=updates, givens={ self.X: X_train[start:end], self.Y: Y_train[start:end] } ) validate = theano.function( inputs=[start, end], outputs=[cost, self.py_x], givens={ self.X: X_valid[start:end], self.Y: Y_valid[start:end] } ) m_train = X_train.get_value(borrow=True).shape[0] m_valid = X_valid.get_value(borrow=True).shape[0] stopping_criteria = StoppingCriteria() index = range(0, m_train+1, batch_size) y_valid = np.argmax(Y_valid.get_value(borrow=True), axis=1) for i in range(num_epochs): costs = [train(index[j], index[j+1]) for j in range(len(index)-1)] E_tr = np.mean(costs) E_va, py_x = validate(0, m_valid) y_pred = np.argmax(py_x, axis=1) A_valid = AccuracyTable(y_pred, y_valid) stopping_criteria.append(E_tr, E_va) logging.debug('epoch %3d/%d. Cost: %f Validation: Q3=%.2f%% C3=%f' '(%.2f %.2f %.2f)', i+1, num_epochs, E_tr, A_valid.Q3, A_valid.C3, A_valid.Ch, A_valid.Ce, A_valid.Cc) if stopping_criteria.PQ(1): logging.debug('Early Stopping!') break return stopping_criteria
def fiting_variables(self, batch_size, train_set_x, train_set_y, test_set_x=None): """Sets useful variables for locating batches""" self.index = T.lscalar('index') # index to a [mini]batch self.n_ex = T.lscalar('n_ex') # total number of examples assert type(batch_size) is IntType or FloatType, "Batch size must be an integer." if type(batch_size) is FloatType: warnings.warn('Provided batch_size is FloatType, value has been truncated') batch_size = int(batch_size) # Proper implementation of variable-batch size evaluation # Note that the last batch may be a smaller size # So we keep around the effective_batch_size (whose last element may # be smaller than the rest) # And weight the reported error by the batch_size when we average # Also, by keeping batch_start and batch_stop as symbolic variables, # we make the theano function easier to read self.batch_start = self.index * batch_size self.batch_stop = T.minimum(self.n_ex, (self.index + 1) * batch_size) self.effective_batch_size = self.batch_stop - self.batch_start self.get_batch_size = theano.function(inputs=[self.index, self.n_ex], outputs=self.effective_batch_size) # compute number of minibatches for training # note that cases are the second dimension, not the first self.n_train = train_set_x.get_value(borrow=True).shape[0] self.n_train_batches = int(np.ceil(1.0 * self.n_train / batch_size)) if test_set_x is not None: self.n_test = test_set_x.get_value(borrow=True).shape[0] self.n_test_batches = int(np.ceil(1.0 * self.n_test / batch_size))
def compile(self, objective, optimizer, constraints=None): if not constraints: constraints = [lambda x: x for _ in self.params] # Dummy variables as placeholder for training data, # which need to be shared tensor variables self.X_train = shared_vals(np.zeros((2, 2)), name='X_train') self.Y_train = shared_vals(np.zeros((2, 2)), name='Y_train') batch_ix = T.lscalar('ix') batch_size = T.lscalar('size') y_sym = T.matrix('Y') loss = objective(y_sym, self.output) updates = optimizer.get_updates(self.params, constraints, loss) self.train = theano.function( inputs=[batch_ix, batch_size], outputs=loss, updates=updates, givens={ self.X: self.X_train[batch_ix * batch_size: (batch_ix + 1) * batch_size], y_sym : self.Y_train[batch_ix * batch_size: (batch_ix + 1) * batch_size] } ) self._predict = theano.function( inputs=[self.X], outputs=self.output )
def pretraining_functions(self, train_set_x, train_set_y, batch_size): index = tensor.lscalar('index') index = tensor.lscalar('index') corruption_level = tensor.scalar('corruption') corruption_level = tensor.scalar('corruption') learning_rate = tensor.scalar('lr') learning_rate = tensor.scalar('lr') switch = tensor.iscalar('switch') n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size batch_begin = index * batch_size batch_end = batch_begin + batch_size pretrain_fns = [] for sugar in self.sugar_layers: cost, updates = sugar.get_cost_updates(corruption_level, learning_rate, switch) fn = function(inputs=[index, Param(corruption_level, default=0.2), Param(learning_rate, default=0.1), Param(switch, default=1)], outputs=[cost], updates=updates, givens={self.x: train_set_x[batch_begin:batch_end], self.y: train_set_y[batch_begin:batch_end]}, on_unused_input='ignore') pretrain_fns.append(fn) return pretrain_fns
def train_rnn(): rng = numpy.random.RandomState(1234) q = T.lvector("q") pos = T.lscalar("pos") neg = T.lscalar("neg") inputs = [q, pos, neg] embLayer = emb_layer(None, 100, 5) rnn = rnn_layer(input=inputs, emb_layer=embLayer, nh=5) cost = rnn.loss() gradient = T.grad(cost, rnn.params) lr = 0.001 updates = OrderedDict((p, p - lr * g) for p, g in zip(rnn.params, gradient)) train = theano.function(inputs=[q, pos, neg], outputs=cost, updates=updates) print rnn.emb.eval()[0] e0 = rnn.emb.eval() for i in range(0, 3): idq = rng.randint(size=10, low=0, high=100) idpos = rng.random_integers(100) idneg = rng.random_integers(100) train(idq, idpos, idneg) rnn.normalize() print rnn.emb.eval() - e0
def trainer(X,Y,alpha,lr,predictions,updates,data,labels): data = U.create_shared(data, dtype=np.int8) labels = U.create_shared(labels,dtype=np.int8) index_start = T.lscalar('start') index_end = T.lscalar('end') print "Compiling function..." train_model = theano.function( inputs = [index_start,index_end,alpha,lr], outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)), updates = updates, givens = { X: data[index_start:index_end], Y: labels[index_start:index_end] } ) test_model = theano.function( inputs = [index_start,index_end], outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)), givens = { X: data[index_start:index_end], Y: labels[index_start:index_end] } ) print "Done." return train_model,test_model
def test_doc(self): """Ensure the code given in pfunc.txt works as expected""" # Example #1. a = lscalar() b = shared(1) f1 = pfunc([a], (a + b)) f2 = pfunc([Param(a, default=44)], a + b, updates={b: b + 1}) self.assertTrue(b.get_value() == 1) self.assertTrue(f1(3) == 4) self.assertTrue(f2(3) == 4) self.assertTrue(b.get_value() == 2) self.assertTrue(f1(3) == 5) b.set_value(0) self.assertTrue(f1(3) == 3) # Example #2. a = tensor.lscalar() b = shared(7) f1 = pfunc([a], a + b) f2 = pfunc([a], a * b) self.assertTrue(f1(5) == 12) b.set_value(8) self.assertTrue(f1(5) == 13) self.assertTrue(f2(4) == 32)
def __init__( self, da, stop_val, corruption, rate, train_path, test_path ): self.fid = open( 'output.txt', 'r+' ) self.model = da self.stop_val = stop_val self.last_cost = 9999 self.train_path = train_path self.test_path = test_path self.train_set = numpy.load( train_path ) self.test_set = numpy.load( test_path ) self.shared_train = theano.shared( self.train_set ) self.shared_test = theano.shared( self.test_set ) self.print_set( self.shared_train, "train_set" ) self.print_set( self.shared_test, "test_set" ) self.learning_rate = rate self.corruption_level = corruption self.start_index = T.lscalar() self.end_index = T.lscalar() self.cost, self.updates = da.get_cost_updates( corruption, rate ) self.train = theano.function( [ self.start_index, self.end_index ], self.cost, updates = self.updates, givens = { da.x : self.shared_train [ self.start_index : self.end_index ] } ) self.test = theano.function( [ self.start_index, self.end_index ], self.cost, updates = self.updates, givens = { da.x : self.shared_test [ self.start_index : self.end_index ] } )
def getTrainModel(self, data_x, data_y, data_sm): self.ngram_start_index = T.lscalar() self.ngram_end_index = T.lscalar() self.sm_start_index = T.lscalar() self.sm_end_index = T.lscalar() self.learning_rate = T.scalar() # TRAIN_MODEL self.train_outputs = [self.cost, self.grad_norm] self.train_set_x, self.train_set_y, self.train_set_sm = io_read_ngram.shared_dataset([data_x, data_y, data_sm]) self.int_train_set_y = T.cast(self.train_set_y, "int32") self.train_model = theano.function( inputs=[ self.ngram_start_index, self.ngram_end_index, self.sm_start_index, self.sm_end_index, self.learning_rate, ], outputs=self.train_outputs, updates=self.updates, givens={ self.x: self.train_set_x[self.ngram_start_index : self.ngram_end_index], self.y: self.int_train_set_y[self.ngram_start_index : self.ngram_end_index], self.sm: self.train_set_sm[self.sm_start_index : self.sm_end_index], self.lr: self.learning_rate, }, ) return self.train_model
def test_argsort(): # Set up rng = np.random.RandomState(seed=utt.fetch_seed()) m_val = rng.rand(3, 2) v_val = rng.rand(4) # Example 1 a = tensor.dmatrix() w = argsort(a) f = theano.function([a], w) gv = f(m_val) gt = np.argsort(m_val) assert np.allclose(gv, gt) # Example 2 a = tensor.dmatrix() axis = tensor.lscalar() w = argsort(a, axis) f = theano.function([a, axis], w) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert np.allclose(gv, gt) # Example 3 a = tensor.dvector() w2 = argsort(a) f = theano.function([a], w2) gv = f(v_val) gt = np.argsort(v_val) assert np.allclose(gv, gt) # Example 4 a = tensor.dmatrix() axis = tensor.lscalar() l = argsort(a, axis, "mergesort") f = theano.function([a, axis], l) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert np.allclose(gv, gt) # Example 5 a = tensor.dmatrix() axis = tensor.lscalar() a1 = ArgSortOp("mergesort", []) a2 = ArgSortOp("quicksort", []) # All the below should give true assert a1 != a2 assert a1 == ArgSortOp("mergesort", []) assert a2 == ArgSortOp("quicksort", []) # Example 6: Testing axis=None a = tensor.dmatrix() w2 = argsort(a, None) f = theano.function([a], w2) gv = f(m_val) gt = np.argsort(m_val, None) assert np.allclose(gv, gt)
def predict(self, X): start = T.lscalar() end = T.lscalar() return theano.function( inputs=[start, end], outputs=self.py_x, givens={self.X: X[start:end]} )
def pretraining_functions(self, train_set_x, train_set_y, alpha, batch_size): ''' Generates a list of functions, each of them implementing one component (sub-CNN) in trainnig the iCNN. The function will require as input the minibatch index, and to train a sub-CNN you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared variable that contains all datapoints used for training the sub-CNN : train_set_y: ... :type batch_size: int :param batch_size: size of a [mini]batch ''' index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('learning_rate') # learning rate to use # number of batches #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for subcnn in self.subcnns: # create a function to compute the mistakes that are made by the model index = T.lscalar('index') # index to a [mini]batch #batch_size_var = T.lscalar('batch_size_var') # batch_size # compute the gradients with respect to the model parameters grads = T.grad(subcnn.cost, subcnn.params_pretrain) # add momentum # initialize the delta_i-1 delta_before=[] for param_i in subcnn.params: delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) updates = [] for param_i, grad_i, delta_before_i in zip(subcnn.params, grads, delta_before): delta_i=-learning_rate * grad_i + alpha*delta_before_i updates.append((param_i, param_i + delta_i )) updates.append((delta_before_i,delta_i)) # compile the theano function fn = theano.function([index,theano.Param(learning_rate, default=0.1)], [subcnn.cost,subcnn.errors], updates=updates, givens={ self.x: train_set_x[index*batch_size:(index+1)*batch_size], self.y: train_set_y[index*batch_size:(index+1)*batch_size]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns
def test(model): dim = 128 v_size = 7810 margin = 1.0 #load model f = open(model, 'rb') input_params = cPickle.load(f) emb, wx, wh, bh, wa = input_params f.close() embLayer = emb_layer(pre_train=emb, v = v_size, dim = dim) rnnLayer = rnn_layer(input=None, wx=wx, wh=wh, bh=bh, emb_layer = embLayer, nh = dim) att = attention_layer(input=None, rnn_layer=rnnLayer, margin = margin) q = T.lvector('q') a = T.lscalar('a') p = T.lvector('p') t = T.lscalar('t') inputs = [q,a,p,t] score = att.predict(inputs) pred = theano.function(inputs=inputs,outputs=score) pool = ThreadPool() f = open('./data/test-small.id','r') count = 1 print 'time_b:%s' %time.clock() to_pred = [] for line in f: if count % 10000 == 0: print count / 10000 count += 1 #print 'time_b:%s' %time.clock() line = line[:-1] tmp = line.split('\t') in_q = numpy.array(tmp[0].split(' ')).astype(numpy.int) - 1 in_a = int(tmp[1].split(' ')[2]) - 1 in_p = numpy.array(tmp[1].split(' ')).astype(numpy.int) - 1 in_t = int(tmp[2]) - 1 lis = (in_q, in_a, in_p, in_t) to_pred.append(lis) #print 'time_load:%s' %time.clock() #print 'time_score:%s' %time.clock() f.close() ay = numpy.asarray(to_pred) #results = map(pred, list(ay[:,0]), list(ay[:,1]),list(ay[:,2]),list(ay[:,3])) results = pool.map(pred, to_pred) #results = [] #for p in to_pred: # results.append(att.predict(p,params)) print 'time_e:%s' %time.clock() #print results pool.close() pool.join()
def classify_lenet5(learning_rate=0.005, n_epochs=8000, image_path='D:/dev/datasets/isbi/train-input/train-input_0000.tif', paramfile='lenet0_membrane_epoch_25100.pkl.gz', nkerns=[20, 50], batch_size=1): rng = numpy.random.RandomState(23455) # allocate symbolic variables for the data index_x = T.lscalar() # index to a [mini]batch index_y = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (28, 28) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y)
def test(model): dim = 128 v_size = 7810 margin = 1.0 #load model f = open(model, 'rb') input_params = cPickle.load(f) emb, wx, wh, bh, wa = input_params f.close() embLayer = emb_layer(pre_train=emb, v = v_size, dim = dim) rnnLayer = rnn_layer(input=None, wx=wx, wh=wh, bh=bh, emb_layer = embLayer, nh = dim) att = attention_layer(input=None, rnn_layer=rnnLayer, margin = margin) q = T.lvector('q') a = T.lscalar('a') p = T.lvector('p') t = T.lscalar('t') inputs = [q,a,p,t] #emb_num = T.lscalar('emb_num') #nh = T.scalar('nh') #dim = T.scalar('dim') score = att.predict(inputs) pred = theano.function(inputs=inputs,outputs=score) wf = open('./data/res','w') f = open('./data/test.id','r') count = 1 print 'time_b:%s' %time.clock() for line in f: if count % 10000 == 0: print count / 10000 print 'time_1w:%s' %time.clock() count += 1 #print 'time_b:%s' %time.clock() line = line[:-1] tmp = line.split('\t') in_q = numpy.array(tmp[0].split(' ')).astype(numpy.int) - 1 #x = emb[q].reshape((q.shape[0], emb.shape[1])) in_a = int(tmp[1].split(' ')[2]) - 1 in_p = numpy.array(tmp[1].split(' ')).astype(numpy.int) - 1 in_t = int(tmp[2]) - 1 #in_lis = [in_q, in_a, in_p, in_t] #print 'time_load:%s' %time.clock() s = pred(in_q, in_a, in_p, in_t) #print s wf.write(str(s) + '\n') #print 'time_score:%s' %time.clock() f.close() wf.close()
def apply_net(self, input_image, perform_downsample=False, perform_pad=False, perform_upsample=False, perform_blur=False, perform_offset=False): if perform_pad: input_image = np.pad(input_image, ((self.pad_by, self.pad_by), (self.pad_by, self.pad_by)), 'symmetric') if perform_downsample and self.downsample != 1: input_image = np.float32(mahotas.imresize(input_image, 1.0/self.downsample)) nx = input_image.shape[0] - self.pad_by*2 ny = input_image.shape[1] - self.pad_by*2 nbatches = nx * ny output = np.zeros((nx, ny), dtype=np.float32) t_input_image = theano.shared(np.asarray(input_image,dtype=theano.config.floatX),borrow=True) index_x = T.lscalar() index_y = T.lscalar() # eval_network_l0 = theano.function([index_x, index_y], self.all_layers[0].output, # givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]}) # eval_network_l1 = theano.function([index_x, index_y], self.all_layers[1].output, # givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]}) # eval_network_l2 = theano.function([index_x, index_y], self.all_layers[2].output, # givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]}) eval_network = theano.function([index_x, index_y], self.all_layers[-1].output, givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]}) for xi in range(nx): for yi in range(ny): # print eval_network_l0(xi, yi)[0,0,:,:] # print eval_network_l1(xi, yi)[0,0,:,:] # print eval_network_l2(xi, yi)[0,0,:,:] # print eval_network(xi, yi)[0,0] output[xi, yi] = eval_network(xi, yi)[0,0] print "up to x={0} of {1}".format(xi+1, nx) if perform_upsample: output = np.float32(mahotas.imresize(output, self.downsample)) if perform_blur and self.best_sigma != 0: output = scipy.ndimage.filters.gaussian_filter(output, self.best_sigma) if perform_offset: #Translate output = np.roll(output, self.best_offset[0], axis=0) output = np.roll(output, self.best_offset[1], axis=1) # Crop to valid size #output = output[self.pad_by:-self.pad_by,self.pad_by:-self.pad_by] return output
def cost_function(self,learning_rate,batch_size): index = T.lscalar() index1 = T.lscalar() """ cost function""" cost=self.negative_log_likelihood(self.y) """ Gradient of cost function""" g_W = T.grad(cost=cost, wrt=self.W) g_b = T.grad(cost=cost, wrt=self.b) """ Gradient update equations used by gradient descent algorithms""" updates = [(self.W, self.W - learning_rate * g_W),(self.b, self.b - learning_rate * g_b)] num_samples=classifier.train[0].get_value(borrow=True).shape[0] print '\n\n********************************' #tbatch_size=batch_size;#feature.get_value(borrow=True).shape[0]; print 'num of training samples :' + `num_samples` print 'num of dimensions :' + `self.n_in` print 'num of classes :' + `self.n_classes` print 'Training batch size :' + `batch_size` #print 'Test batch size :' + `tbatch_size` self.n_train_batches = self.train[0].get_value(borrow=True).shape[0] / batch_size self.n_valid_batches= self.validate[0].get_value(borrow=True).shape[0] / batch_size self.n_test_batches= self.test[0].get_value(borrow=True).shape[0] / batch_size #print 'num of training batches :'+`self.n_train_batches` self.train[1]=T.cast(self.train[1],'int32'); self.test[1]=T.cast(self.test[1],'int32'); self.validate[1]=T.cast(self.validate[1],'int32'); """ Defining functions for training,testing and validation """ self.train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ self.x: self.train[0][index*batch_size:(index + 1)*batch_size], self.y: self.train[1][index*batch_size:(index + 1)*batch_size]}); self.test_model = theano.function(inputs=[index1], outputs=[self.errors(self.y),self.y_pred], givens={ self.x: self.test[0][index1*batch_size:(index1 + 1)*batch_size], self.y: self.test[1][index1*batch_size:(index1 + 1)*batch_size]}); self.validate_model = theano.function(inputs=[index], outputs=self.errors(self.y), givens={ self.x: self.validate[0][index * batch_size:(index + 1) * batch_size], self.y: self.validate[1][index * batch_size:(index + 1) * batch_size]})
def __init__(self, dnodex,inputdim, name=""): pos_p=T.lscalar() neg_poi=T.lscalar() user=T.lscalar() eta=T.scalar() pfp_loss=T.scalar() if dnodex.pmatrix is None: dnodex.umatrix=theano.shared(floatX(np.random.randn(*(dnodex.nuser, inputdim)))) dnodex.pmatrix=theano.shared(floatX(np.random.randn(*(dnodex.npoi,inputdim)))) n_updates=[(dnodex.pmatrix, T.set_subtensor(dnodex.pmatrix[neg_poi,:],dnodex.pmatrix[neg_poi,:]-eta*pfp_loss*dnodex.umatrix[user,:]-eta*eta*dnodex.pmatrix[neg_poi,:]))] p_updates=[(dnodex.pmatrix, T.set_subtensor(dnodex.pmatrix[pos_p,:],dnodex.pmatrix[pos_p,:]+eta*pfp_loss*dnodex.umatrix[user,:]-eta*eta*dnodex.pmatrix[pos_p,:])),(dnodex.umatrix, T.set_subtensor(dnodex.umatrix[user,:],dnodex.umatrix[user,:]+eta*pfp_loss*(dnodex.pmatrix[pos_p,:]-dnodex.pmatrix[neg_poi,:])-eta*eta*dnodex.umatrix[user,:]))] self.trainpos=theano.function([pos_p,neg_poi,user,eta,pfp_loss],updates=p_updates,allow_input_downcast=True) self.trainneg=theano.function([neg_poi,user,eta,pfp_loss],updates=n_updates,allow_input_downcast=True)
def _test_scan2(self): def step(a, b): return a + b, b h = T.lscalar("h") x = T.lscalar("x") [cui, bui], _ = theano.scan(step, sequences=np.array([1,2,3]), outputs_info=[theano.shared(value=0, name='W_in'), None]) func = theano.function([], cui) print func()
def __init__(self, transition_model, observation_model, n_particles, observation_input=None, n_history=1): self.transition_model=transition_model self.observation_model=observation_model self.data_dims=observation_model.output_dims self.state_dims=transition_model.output_dims self.n_particles=n_particles self.n_history=n_history #this is used to keep track of what set of particles corresponds #to the previous point in time self.time_counter=theano.shared(0) self.theano_rng=RandomStreams() #init_particles=np.zeros((n_history+1, n_particles, self.state_dims)).astype(np.float32) init_particles=np.random.randn(n_history+1, n_particles, self.state_dims).astype(np.float32) init_weights=(np.ones((n_history+1, n_particles))/float(n_particles)).astype(np.float32) self.particles=theano.shared(init_particles) self.weights=theano.shared(init_weights) self.next_state=self.particles[(self.time_counter+1)%(self.n_history+1)] self.current_state=self.particles[self.time_counter%(self.n_history+1)] self.previous_state=self.particles[(self.time_counter-1)%(self.n_history+1)] self.next_weights=self.weights[(self.time_counter+1)%(self.n_history+1)] self.current_weights=self.weights[self.time_counter%(self.n_history+1)] self.previous_weights=self.weights[(self.time_counter-1)%(self.n_history+1)] self.proposal_distrib=None self.true_log_transition_probs=self.transition_model.rel_log_prob self.true_log_observation_probs=self.observation_model.rel_log_prob self.perform_inference=None self.resample=None self.sample_joint=None self.observation_input=observation_input ess=self.compute_ESS() self.get_ESS=theano.function([],ess) n_samps=T.lscalar() n_T=T.lscalar() data_samples, state_samples, init_state_samples, data_sample_updates=self.sample_future(n_samps,n_T) self.sample_from_future=theano.function([n_samps, n_T],[data_samples,state_samples,init_state_samples],updates=data_sample_updates) self.get_current_particles=theano.function([],self.current_state) self.get_current_weights=theano.function([],self.current_weights)
def test_dtype(self): random = RandomStreams(utt.fetch_seed()) low = tensor.lscalar() high = tensor.lscalar() out = random.random_integers(low=low, high=high, size=(20,), dtype='int8') assert out.dtype == 'int8' f = function([low, high], out) val0 = f(0, 9) assert val0.dtype == 'int8' val1 = f(255, 257) assert val1.dtype == 'int8' assert numpy.all(abs(val1) <= 1)
def build_set_function(self): index_new = T.lscalar('index_new') dataset_new = T.lscalar('dataset_new') updates = [(self.__index, index_new), (self.__dataset, dataset_new)] set_function = theano.function( inputs=[index_new, dataset_new], outputs=[], updates=updates ) return set_function
def build_norm_estimation_functions(self, data_sets): (corpus_feats, _) = data_sets.get_shared() start_idx = T.lscalar("start_idx") # index to a [mini]batch end_idx = T.lscalar("end_idx") # index to a [mini]batch # gives a vector of 0's and 1's where the 0's are correct hypotheses norm_func = theano.function( inputs=[start_idx, end_idx], outputs=[self.zeroth_order_stats(self.x), self.first_order_stats(self.x), self.second_order_stats(self.x)], givens={self.x: corpus_feats[start_idx:end_idx]}, ) return norm_func
def test_dtype(self): rng_R = random_state_type() low = tensor.lscalar() high = tensor.lscalar() post_r, out = random_integers(rng_R, low=low, high=high, size=(20,), dtype="int8") assert out.dtype == "int8" f = compile.function([rng_R, low, high], [post_r, out]) rng = numpy.random.RandomState(utt.fetch_seed()) rng0, val0 = f(rng, 0, 9) assert val0.dtype == "int8" rng1, val1 = f(rng0, 255, 257) assert val1.dtype == "int8" assert numpy.all(abs(val1) <= 1)
def test_infer_shape(self): x = tensor.lscalar() self._compile_and_check([x], [self.op(x)], [numpy.random.random_integers(3, 50, size=())], self.op_class) self._compile_and_check([x], [self.op(x)], [0], self.op_class) self._compile_and_check([x], [self.op(x)], [1], self.op_class)
def pretraining_functions(self, train_set_x, batch_size): index = T.lscalar('index') corruption_level = T.scalar('corruption') learning_rate = T.scalar('lr') batch_begin = index * batch_size batch_end = batch_begin + batch_size pretrain_fns = [] for dA in self.dA_layers: cost, updates = dA.get_cost_updates(corruption_level, learning_rate) fn = theano.function( inputs=[ index, theano.In(corruption_level, value=0.1), theano.In(learning_rate, value=0.1) ], outputs=cost, updates=updates, givens={ self.x: train_set_x[batch_begin: batch_end] } ) pretrain_fns.append(fn) return pretrain_fns
def get_train_fn(self, dataX, batch_size=1, k=1): """ dataX: theano shared data dataY: theano shared label """ learning_rate = T.scalar('lr') Beta = T.scalar('beta') Gamma = T.scalar('gamma') Sparseness = T.scalar('sparseness') cost, updates = self._get_cost_update(lr=learning_rate, beta=Beta, gamma=Gamma, s_constrain=Sparseness, k=k) index = T.lscalar('index') fn = theano.function(inputs=[index, theano.Param(learning_rate, default=0.01), theano.Param(Beta, default=0.1), theano.Param(Gamma, default=0.0001), theano.Param(Sparseness, default=0.05)], outputs=cost, updates=updates, givens={self.x: dataX[index * batch_size:(index + 1) * batch_size]}, name='train_rbm_S_L2') return fn
def fine_train(nn,datasets,learning_Rate,batch_sizes,epochs): train_set_x, train_set_y = datasets[0] n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_sizes train_label = T.cast(train_label,'float64') index = T.lscalar() x = T.matrix('x') y = T.matrix('y') min_batch_cost = [] if nn is None: mynn = ForwordNN(x,y,n_in,n_out,hidden_sizes) else: mynn=nn cost,update = mynn.get_cost_update(x,y,learning_Rate) train_nn = theano.function([index], cost, updates = update, givens = { x:train_data[index*batch_sizes:(index+1)*batch_sizes,:], y:train_label[index*batch_sizes:(index+1)*batch_sizes,:] } ) for num_epochs in range(epochs): t1=time.time() for num_batch in xrange(n_train_batchs): min_batch_cost.append(train_nn(num_batch)) t2=time.time() print 'The %d/%dth training,takes %f seconds,cost is %f' %(num_epochs+1,epochs,(t2-t1),np.mean(min_batch_cost)) return mynn
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): # 导入数据 datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # 计算miniBatches数 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' index = T.lscalar() # index to a [mini]batch # 产生数据X, y矩阵 x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # 利用逻辑回归训练模型, 每个样本有28 * 28个特征,输出为10 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # 计算代价函数 cost = classifier.negative_log_likelihood(y) # 计算误分数 test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # 验证模型 validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # 计算cost的梯度 g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # 更新参数 updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # 优化模型 train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training the model' # "early-stopping" 参数 patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): # 迭代开始 epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # 计算0-1损失 validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # 保存最优模型的参数 with open('best_model.pkl', 'w') as f: cPickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_dA(learning_rate=0.01, training_epochs=15, dataset="", modelfile="", batch_size=20, output_folder='dA_plots', n_visible=1346, n_hidden=100, beta=0, rho=0.5, noise=0.3, linear=False, lost_func='KL', loader=None): data = map(lambda x: x.partition(' ')[2], open(dataset)) train_set_x, n_visible = loader.load_training_data(data) print >> sys.stderr, "number of training example", len(train_set_x) print >> sys.stderr, "batch size", batch_size print >> sys.stderr, "number of visible nodes", n_visible print >> sys.stderr, "number of hidden nodes", n_hidden print >> sys.stderr, "corruption_level", noise print >> sys.stderr, "sparse rate", rho, "weight", beta print >> sys.stderr, "learning rate", learning_rate # compute number of minibatches for training, validation and testing n_train_batches = len(train_set_x) / batch_size #print(n_train_batches) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images data_x = numpy.array([[0 for i in range(n_visible)] for j in range(batch_size)]) shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=True) ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=n_visible, n_hidden=n_hidden) cost, updates = da.get_cost_updates(corruption_level=noise, learning_rate=learning_rate, beta=beta, rho=rho, linear=linear, lost_func=lost_func) train_da = theano.function([], cost, updates=updates, givens={x: shared_x}) start_time = time.clock() # TRAINING # for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): sub = train_set_x[batch_index * batch_size:(1 + batch_index) * batch_size] sub = numpy.array(sub) shared_x.set_value(sub) c.append(train_da()) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, (' ran for %.2fm' % (training_time / 60.)) modelfile = gzip.open(modelfile, "wb") cPickle.dump([n_visible, n_hidden], modelfile) cPickle.dump([da.W, da.b, da.b_prime], modelfile) modelfile.close()
def learning_feature( self, train_set, n_epochs, learning_rate, batch_size, corruption_level, balance_coef ): # perform `denoising` tilde_x = self.get_corrupted_input(self.x, corruption_level) # map the corrupted input to hidden layer y = T.nnet.sigmoid(T.dot(tilde_x, self.W) + self.b) # maps back hidden representation to unsupervised reconstruction z1 = T.nnet.sigmoid(T.dot(y, self.Wu) + self.bu) L1 = T.mean(-T.sum(self.x * T.log(z1) + (1 - self.x) * T.log(1 - z1), axis=1)) # perform one-sided regression to fit the cost ! z2 = T.dot(y, self.Ws) + self.bs # cost_vector = T.matrix('cost_vector') # Z_nk = T.matrix('Z_nk') # xi = T.maximum((Z_nk * (z2 - cost_vector)), 0.) # xi is a matrix # L2 = T.sum(xi) # TODO: smooth logistic loss function (upper bound) delta = T.log(1 + T.exp(Z_nk * (z2 - cost_vector))) L2 = T.sum(delta) # symbolic variable for balance_coef bc = T.scalar('bc') cost = L1 + bc * L2 gparams = T.grad(cost, self.params) updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams) ] batch_index = T.lscalar('batch_index') train_set_x, train_set_y, train_set_c = train_set train_set_z = np.zeros(train_set_c.shape) - 1 for i in xrange(train_set_z.shape[0]): train_set_z[i][train_set_y[i]] = 1 train_set_x = make_shared_data(train_set_x) train_set_c = make_shared_data(train_set_c) train_set_z = make_shared_data(train_set_z) pretrain_model = theano.function( inputs=[batch_index, bc], outputs=[cost, L1, L2], # TODO: debug updates=updates, givens={ self.x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size], cost_vector: train_set_c[batch_index * batch_size: (batch_index + 1) * batch_size], Z_nk: train_set_z[batch_index * batch_size: (batch_index + 1) * batch_size] }, name='pretrain_model' ) n_batches = train_set_x.get_value().shape[0] / batch_size for epoch in xrange(n_epochs): epoch_cost = 0. L1_cost = 0. L2_cost = 0. for batch in xrange(n_batches): batch_cost = pretrain_model(batch, balance_coef) epoch_cost += batch_cost[0] L1_cost += batch_cost[1] L2_cost += batch_cost[2] epoch_cost /= n_batches L1_cost /= n_batches L2_cost /= n_batches print ' epoch #%d, loss = (%f, %f, %f)' % (epoch + 1, epoch_cost, L1_cost, L2_cost) y_new = T.nnet.sigmoid(T.dot(self.x, self.W) + self.b) transform_data = theano.function( inputs=[], outputs=y_new, givens={ self.x: train_set_x }, name='trainform_data' ) return [transform_data(), train_set_y, train_set_c.get_value()]
def test_mlp_parity(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=64, n_hidden=500, n_hiddenLayers=1, verbose=False): reader = csv.reader(open("joint_knee.csv", "rb"), delimiter=',') x = list(reader) #print x result = numpy.array(x) #print result.shape def score_to_numeric(x, a): if (x == 'Hospice - Home'): return 11 if (x == 'Psychiatric Hospital or Unit of Hosp'): return 10 if (x == 'Hospice - Medical Facility'): return 9 if (x == 'Expired'): return 8 if (x == 'Facility w/ Custodial/Supportive Care'): return 7 if (x.lower() == 'left against medical advice'): return 6 if (x.lower() == 'short-term hospital'): return 5 if (x.lower() == 'multi-racial' or x.lower() == 'home or self care'): return 4 if (x.lower() == 'other race' or x.lower() == 'emergency' or x.lower() == 'skilled nursing home' or x.lower() == 'not available'): return 3 if (x.lower() == 'm' or x.lower() == 'black/african american' or x.lower() == 'urgent' or x.lower() == 'inpatient rehabilitation facility'): return 2 if (x.lower() == 'f' or x.lower() == 'white' or x.lower() == 'elective' or x.lower() == 'home w/ home health services'): return 1 if (a == 1): return int(x[:2]) if (a == 2): return float(x[1:]) else: return float(x) rownum = 0 for row in result: # Save header row. if rownum == 0: rownum += 1 header = row for i in range(0, len(header)): if header[i].lower() == 'gender': gender = i if header[i].lower() == 'race': race = i if header[i].lower() == 'type of admission': admi = i if header[i].lower() == 'patient disposition': disp = i if header[i].lower() == 'age group': age = i if header[i].lower() == 'total charges': price = i else: row[gender] = score_to_numeric(row[gender], 0) row[race] = score_to_numeric(row[race], 0) row[admi] = score_to_numeric(row[admi], 0) row[disp] = score_to_numeric(row[disp], 0) row[age] = score_to_numeric(row[age], 1) row[price] = score_to_numeric(row[price], 2) for i in range(0, len(row)): row[i] = float(row[i]) #y = row[i].astype(numpy.float) #row[i] = y #print type(row[i]) #print type(result) #result = numpy.array(result).astype('float') #print result[1:(len(result)),1:] res = result[1:(len(result)), 1:].astype(numpy.float) for i in range(len(res)): for j in range(len(res[0])): if (j == 9): res[i, j] = int(round(res[i, j] / 10000)) else: res[i, j] = int(round(res[i, j])) myset = set(res[:, 9]) nout = len(myset) y = res[:, 9] #print y x = res[:, 0:9] iris = load_iris() clf = ExtraTreesClassifier() clf = clf.fit(x, y) model = SelectFromModel(clf, prefit=True) X_new = model.transform(x) data = np.c_[X_new, y] totallen = len(data) numpy.random.shuffle(data) training, validation, testing = data[:totallen / 2, :], data[totallen / 2:( 3 * totallen / 4), :], data[(3 * totallen / 4):, :] l = len(data[0]) - 1 train_set = [training[:, 0:l], training[:, l]] valid_set = [validation[:, 0:l], validation[:, l]] test_set = [testing[:, 0:l], testing[:, l]] #print train_set #print valid_set #print test_set # Convert raw dataset to Theano shared variables. train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) test_set_x, test_set_y = shared_dataset(test_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = myMLP(rng=rng, input=x, n_in=l, n_hidden=n_hidden, n_out=len(myset), n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) y_p_train = theano.function(inputs=[], outputs=[classifier.logRegressionLayer.y_pred], givens={x: train_set_x}) y_predict = theano.function(inputs=[], outputs=[classifier.logRegressionLayer.y_pred], givens={x: test_set_x}) y_pred1 = y_p_train() y_pred2 = y_predict() return y_pred1, y_pred2
def _construct_mlp(datasets, learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=200): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron Note: Parameters need tuning. :type datasets: tuple :param datasets: (inputs, targets) :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type batch_size: int :param batch_size: number of examples in one batch :type n_hidden: int :param n_hidden: number of hidden units to be used in class HiddenLayer """ inputs, targets = datasets temp_train_set_x = [] temp_train_set_y = [] train_set_x = [] train_set_y = [] valid_set_x = [] valid_set_y = [] test_set_x = [] test_set_y = [] # stratified k-fold to split test and temporary train, which contains # validation and train skf = StratifiedShuffleSplit(targets, 1, 0.2) for temp_train_index, test_index in skf: # print("TEMP_TRAIN:", temp_train_index, "TEST:", test_index) temp_train_set_x.append(inputs[temp_train_index]) temp_train_set_y.append(targets[temp_train_index]) test_set_x.append(inputs[test_index]) test_set_y.append(targets[test_index]) # convert from list-wrapping array to array test_set_x = test_set_x[0] test_set_y = test_set_y[0] temp_train_set_x = temp_train_set_x[0] temp_train_set_y = temp_train_set_y[0] # stratified k-fold to split valid and train skf = StratifiedShuffleSplit(temp_train_set_y, 1, 0.25) for train_index, valid_index in skf: # print("TRAIN: ", train_index, ", VALID: ", valid_index) train_set_x.append(temp_train_set_x[train_index]) train_set_y.append(temp_train_set_y[train_index]) valid_set_x.append(temp_train_set_x[valid_index]) valid_set_y.append(temp_train_set_y[valid_index]) # convert from list-wrapping array to array train_set_x = train_set_x[0] train_set_y = train_set_y[0] valid_set_x = valid_set_x[0] valid_set_y = valid_set_y[0] # check shape # print("train_set_x shape: " + str(train_set_x.shape)) # print("train_set_y shape: " + str(train_set_y.shape)) # print("valid_set_x shape: " + str(valid_set_x.shape)) # print("valid_set_y shape: " + str(valid_set_y.shape)) # print("test_set_x shape: " + str(test_set_x.shape)) # print("test_set_y shape: " + str(test_set_y.shape)) # convert to theano.shared variable train_set_x = theano.shared(value=train_set_x, name='train_set_x') train_set_y = theano.shared(value=train_set_y, name='train_set_y') valid_set_x = theano.shared(value=valid_set_x, name='valid_set_x') valid_set_y = theano.shared(value=valid_set_y, name='valid_set_y') test_set_x = theano.shared(value=test_set_x, name='test_set_x') test_set_y = theano.shared(value=test_set_y, name='test_set_y') # compute number of minibatches for training, validation and testing n_train_batches = int(train_set_x.get_value().shape[0] / batch_size) n_valid_batches = int(valid_set_x.get_value().shape[0] / batch_size) n_test_batches = int(test_set_x.get_value().shape[0] / batch_size) # check batch # print("n_train_batches:" + str(n_train_batches)) # print("n_valid_batches:" + str(n_valid_batches)) # print("n_test_batches:" + str(n_test_batches)) print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.lvector('y') # the labels are presented as 1D vector of [int] labels # set a random state that is related to the time # noinspection PyUnresolvedReferences rng = numpy.random.RandomState(int((time.time()))) # construct the MLP class classifier = MLP(rng=rng, input_=x, n_in=_std_height * _std_width, n_hidden=n_hidden, n_out=len(_captcha_provider.chars)) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') # compute the gradient of cost with respect to theta (sorted in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant if T.lt(n_train_batches, patience / 2): validation_frequency = n_train_batches else: validation_frequency = patience / 2 # go through this many minibatches before checking the network # on the validation set; in this case we check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.time() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in range(n_train_batches): # noinspection PyUnusedLocal minibatch_avg_cost = train_model(minibatch_index) iteration = (epoch - 1) * n_train_batches + minibatch_index if (iteration + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch {0}, minibatch {1}/{2}, validation error {3}'. format(epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) best_validation_loss = this_validation_loss best_iter = iteration # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print( ' epoch {0}, minibatch {1}/{2}, test error of best ' 'model {3}'.format(epoch, minibatch_index + 1, n_train_batches, test_score * 100)) if patience <= iteration: done_looping = True break end_time = time.time() print('Optimization complete. Best validation score of {0} obtained at ' 'iteration {1}, with test performance {2}'.format( best_validation_loss * 100, best_iter + 1, test_score * 100)) print('Time used for testing the mlp is', end_time - start_time) return classifier
def validate(conf, net_weights): logger.info("... loading data") logger.debug("Theano.config.floatX is %s" % theano.config.floatX) path = conf['data']['location'] batch_size = 1 assert (type(batch_size) is int) logger.info('Batch size %d' % (batch_size)) try: x_train_allscales = try_pickle_load(path + 'x_' + conf['run-dataset'] + '.bin') x_train = x_train_allscales[0] # first scale y_train = try_pickle_load(path + 'y_' + conf['run-dataset'] + '.bin') except IOError: logger.error("Unable to load Theano dataset from %s", path) exit(1) y_valid = try_pickle_load(path + 'y_validation.bin') print path + 'y_validation.bin' n_classes = int(max(y_train.max(), y_valid.max()) + 1) logger.info("Dataset has %d classes", n_classes) image_shape = (x_train.shape[-2], x_train.shape[-1]) logger.info("Image shape is %s", image_shape) logger.info('Dataset has %d images' % x_train.shape[0]) logger.info('Input data has shape of %s ', x_train.shape) # compute number of minibatches n_train_batches = x_train.shape[0] // batch_size logger.info("Number of train batches %d" % n_train_batches) logger.info("... building network") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # input is presented as (batch, channel, x, y) x0 = T.tensor4('x') x2 = T.tensor4('x') x4 = T.tensor4('x') # matrix row - batch index, column label of pixel # every column is a list of pixel labels (image matrix reshaped to list) y = T.imatrix('y') # create all layers builder_name = conf['network']['builder-name'] layers, out_shape, conv_out = get_net_builder(builder_name)( x0, x2, x4, y, batch_size, classes=n_classes, image_shape=image_shape, nkerns=conf['network']['layers'][:3], seed=conf['network']['seed'], activation=lReLU, bias=0.001, sparse=False) logger.info("Image out shape is %s", out_shape) y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1]) # resize marked images to out_size of the network y_train_downscaled = np.empty(y_train_shape) # for i in xrange(y_train.shape[0]): # y_train_downscaled[i] = resize_marked_image(y_train[i], out_shape) x_train_shared, y_train_shared = \ shared_dataset((x_train, y_train_downscaled)) x2_train_shared = theano.shared(x_train_allscales[1], borrow=True) x4_train_shared = theano.shared(x_train_allscales[2], borrow=True) ############### # BUILD MODEL # ############### logger.info("... building model") layers, new_layers = extend_net_w1l_drop( conv_out, conf['network']['layers'][-2] * 3, layers, n_classes, nkerns=conf['network']['layers'][-1:], seed=conf['network']['seed'], activation=lReLU, bias=0.001) test_model = theano.function( [index], [layers[0].y_pred], givens={ x0: x_train_shared[index * batch_size:(index + 1) * batch_size], x2: x2_train_shared[index * batch_size:(index + 1) * batch_size], x4: x4_train_shared[index * batch_size:(index + 1) * batch_size] }) # try to load weights try: if net_weights is not None: for net_weight, layer in zip(net_weights, layers): layer.set_weights(net_weight) logger.info("Loaded net weights from file.") net_weights = None except: logger.error("Uncompatible network to load weights in") exit(1) set_layers_training_mode(layers, 0) logger.info("---> Results - no postprocessing") start_time = time.clock() validation = [ test_model(i)[0].reshape(NET_OUT_SHAPE) for i in xrange(n_train_batches) ] end_time = time.clock() logfiles_path = conf['data']['location'] +\ 'samples_' + conf['run-dataset'] + '.log' logger.info("Validated %d images in %.2f seconds", n_train_batches, end_time - start_time) get_stats(validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset']) logger.info("---> Results - superpixels") stats_func = lambda p: get_stats(validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset'], postproc=oversegment, postproc_params=p, show=False, log=False) start_time = time.clock() best_params = find_best_superpixel_params(stats_func) end_time = time.clock() logger.info("Done in %.2f seconds", end_time - start_time) logger.info("Best params are %s", best_params) # run one more time with params, log output this time get_stats(validation, y_train, layers[0].n_classes, conf['data']['dont-care-classes'], logfiles_path, conf['run-dataset'], postproc=oversegment, postproc_params=best_params, show=False)
def SGD(self, training_data, epochs, mini_batch_size, eta, validation_data, test_data, lmbda=0.0): """Train the network using mini-batch stochastic gradient descent.""" training_x, training_y = training_data validation_x, validation_y = validation_data test_x, test_y = test_data # compute number of minibatches for training, validation and testing num_training_batches = size(training_data)/mini_batch_size num_validation_batches = size(validation_data)/mini_batch_size num_test_batches = size(test_data)/mini_batch_size # define the (regularized) cost function, symbolic gradients, and updates l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers]) cost = self.layers[-1].cost(self)+\ 0.5*lmbda*l2_norm_squared/num_training_batches grads = T.grad(cost, self.params) updates = [(param, param-eta*grad) for param, grad in zip(self.params, grads)] # define functions to train a mini-batch, and to compute the # accuracy in validation and test mini-batches. i = T.lscalar() # mini-batch index train_mb = theano.function( [i], cost, updates=updates, givens={ self.x: training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], self.y: training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] }) validate_mb_accuracy = theano.function( [i], self.layers[-1].accuracy(self.y), givens={ self.x: validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], self.y: validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] }) test_mb_accuracy = theano.function( [i], self.layers[-1].accuracy(self.y), givens={ self.x: test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], self.y: test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] }) self.test_mb_predictions = theano.function( [i], self.layers[-1].y_out, givens={ self.x: test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size] }) # Do the actual training best_validation_accuracy = 0.0 for epoch in xrange(epochs): for minibatch_index in xrange(num_training_batches): iteration = num_training_batches*epoch+minibatch_index if iteration % 1000 == 0: print("Training mini-batch number {0}".format(iteration)) cost_ij = train_mb(minibatch_index) if (iteration+1) % num_training_batches == 0: validation_accuracy = np.mean( [validate_mb_accuracy(j) for j in xrange(num_validation_batches)]) print("Epoch {0}: validation accuracy {1:.2%}".format( epoch, validation_accuracy)) if validation_accuracy >= best_validation_accuracy: print("This is the best validation accuracy to date.") best_validation_accuracy = validation_accuracy best_iteration = iteration if test_data: test_accuracy = np.mean( [test_mb_accuracy(j) for j in xrange(num_test_batches)]) print('The corresponding test accuracy is {0:.2%}'.format( test_accuracy)) print("Finished training network.") print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format( best_validation_accuracy, best_iteration)) print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
def test_multilayer_perceptron(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MultiLayerPerceptron class classifier = MultiLayerPerceptron(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # start-snippet-5 # compute the gradient of cost with respect to theta (sorted in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-5 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng=rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer(rng=rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(rng=rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=300, activation=T.tanh) layer3 = LogisticRegression(input=layer2.output, n_in=300, n_out=10) cost = layer3.negative_log_likelihood(y) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = numpy.inf best_iter = 0 epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter fo = open('best_cnn_model.pkl', 'wb') pickle.dump([[layer0.W, layer0.b], [layer1.W, layer1.b], [layer2.W, layer2.b], [layer3.W, layer3.b]], fo) fo.close() if patience <= iter: done_looping = True break print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' % (best_validation_loss * 100., best_iter + 1))
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_rbm(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}, name='train_rbm') plotting_time = 0. start_time = time.clock() # go through training epochs for epoch in xrange(training_epochs): print 'starting epoch %d... ' % epoch # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): print 'batch: %d' % batch_index mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared(numpy.asarray( test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates = \ theano.scan(rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = PIL.Image.fromarray(image_data) image.save('samples.png') os.chdir('../')
def evaluate_lenet5(learning_rate=0.10, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[16, 16, 16, 12, 12, 12], batch_size=500): rng = numpy.random.RandomState(32324) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size index = T.lscalar() # index for each mini batch train_epoch = T.lscalar('train_epoch') x = T.matrix('x') y = T.ivector('y') # ------------------------------- Building Model ---------------------------------- print "...Building the model" layer_0_input = x.reshape((batch_size, 1, 28, 28)) # output image size = (28-5+1+)/1 = 24 layer_0 = LeNetConvPoolLayer(rng, input=layer_0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(1, 1)) #output image size = (24-3+1) = 22 layer_1 = LeNetConvPoolLayer(rng, input=layer_0.output, image_shape=(batch_size, nkerns[0], 24, 24), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1)) #output image size = (22-3+1)/2 = 10 layer_2 = LeNetConvPoolLayer(rng, input=layer_1.output, image_shape=(batch_size, nkerns[1], 22, 22), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2)) #output image size = (10-3+1)/2 = 4 layer_3 = LeNetConvPoolLayer(rng, input=layer_2.output, image_shape=(batch_size, nkerns[2], 10, 10), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(2, 2)) #output image size = (4-3+2+1) = 4 layer_4 = LeNetConvPoolLayer(rng, input=layer_3.output, image_shape=(batch_size, nkerns[3], 4, 4), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode=1) #output image size = (4-3+1)/2 = 2 layer_5 = LeNetConvPoolLayer(rng, input=layer_4.output, image_shape=(batch_size, nkerns[4], 4, 4), filter_shape=(nkerns[5], nkerns[4], 3, 3), poolsize=(2, 2), border_mode=1) # make the input to hidden layer 2 dimensional layer_6_input = layer_5.output.flatten(2) layer_6 = HiddenLayer(rng, input=layer_6_input, n_in=nkerns[5] * 2 * 2, n_out=200, activation=T.tanh) layer_7 = LogReg(input=layer_6.output, n_in=200, n_out=10) teacher_p_y_given_x = theano.shared(numpy.asarray( pickle.load(open('prob_best_model.pkl', 'rb')), dtype=theano.config.floatX), borrow=True) p_y_given_x = T.matrix('p_y_given_x') e = theano.shared(value=0, name='e', borrow=True) cost = layer_7.neg_log_likelihood( y) + 2.0 / (e) * T.mean(-T.log(layer_7.p_y_given_x) * p_y_given_x - layer_7.p_y_given_x * T.log(p_y_given_x)) tg = theano.shared(numpy.asarray(pickle.load( open('modified_guided_data.pkl', 'rb')), dtype=theano.config.floatX), borrow=True) guiding_weights = T.tensor4('guiding_weights') #guide_cost = T.mean(-T.log(layer_3.output)*guiding_weights - layer_3.output*T.log(guiding_weights)) guide_cost = T.mean((layer_3.output - guiding_weights)**2) test_model = theano.function( [index], layer_7.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer_7.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # list of parameters params = layer_7.params + layer_6.params + layer_5.params + layer_4.params + layer_3.params + layer_2.params + layer_1.params + layer_0.params params_gl = layer_3.params + layer_2.params + layer_1.params + layer_0.params # import pdb # pdb.set_trace() grads_gl = T.grad(guide_cost, params_gl) updates_gl = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params_gl, grads_gl)] grads = T.grad(cost, params) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index, train_epoch], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], p_y_given_x: teacher_p_y_given_x[index], e: train_epoch }) train_till_guided_layer = theano.function( [index], guide_cost, updates=updates_gl, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], guiding_weights: tg[index] }, on_unused_input='ignore') # -----------------------------------------Starting Training ------------------------------ print('..... Training ') # for early stopping patience = 10000 patience_increase = 2 improvement_threshold = 0.95 validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = numpy.inf # initialising loss to be inifinite best_itr = 0 test_score = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) if epoch < n_epochs / 5: cost_ij_guided = train_till_guided_layer(minibatch_index) cost_ij = train_model(minibatch_index, epoch) if (iter + 1) % validation_frequency == 0: # compute loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) # import pdb # pdb.set_trace() with open('Student_6_terminal_out_2', 'a+') as f_: f_.write( 'epoch %i, minibatch %i/%i, validation error %f %% \n' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # check with best validation score till now if this_validation_loss < best_validation_loss: # improve if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_itr = iter test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) with open('Student_6_terminal_out_2', 'a+') as f_: f_.write( 'epoch %i, minibatch %i/%i, testing error %f %%\n' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) with open('best_model_7layer_2.pkl', 'wb') as f: pickle.dump(params, f) with open('Results_student_6_2.txt', 'wb') as f1: f1.write(str(test_score * 100) + '\n') #if patience <= iter: # done_looping = True # break end_time = timeit.default_timer() with open('Student_6_terminal_out_2', 'a+') as f_: f_.write('Optimization complete\n') f_.write( 'Best validation score of %f %% obtained at iteration %i with test performance %f %% \n' % (best_validation_loss * 100., best_itr, test_score * 100)) f_.write('The code ran for %.2fm\n' % ((end_time - start_time) / 60.))
n_train_batches = n_batches print 'Number of song for training in single chunk file: ' + str( n_train_batches) ########################################################### ########################################################### ############ CONSTRUCTING MODEL ARCHITECTURE ############## ########################################################### print 'Building model...' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix( 'x' ) # the data is presented as a vector of inputs with many exchangeable examples of this vector rng = numpy.random.RandomState(1234) # Reshape matrix of rasterized images of shape (batch_size, 2000 * 60) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((minibatch_size, 1, 1000, 60)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(minibatch_size, 1, 1000, 60), filter_shape=(layer0_filters, 1, 5, 5), poolsize=(5, 1), dim2=1)
def test_Highway_Momentum_output(datasets, learning_rate=0.1, lr_decay=0.95, momentum=0.9, n_epochs=200, n_hidden=10, n_hiddenLayers=1, n_highwayLayers = 5, activation_hidden = T.nnet.nnet.relu, activation_highway = T.nnet.nnet.sigmoid, b_T = -5, L1_reg = 0, L2_reg = 0, batch_size=500,verbose=False, early_stopping=True): rng = numpy.random.RandomState(23455) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_in = train_set_x.get_value(borrow=True).shape[1] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch itr = T.fscalar() # index to an iteration # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### highway_net = HighwayNetwork( rng=rng, input=x, n_in=n_in, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers, n_highwayLayers = n_highwayLayers, activation_hidden = activation_hidden, activation_highway = activation_highway, b_T = b_T ) print('... building the model') # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( highway_net.logRegressionLayer.negative_log_likelihood(y) #+ L1_reg * L1 #+ L2_reg * L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=highway_net.logRegressionLayer.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=highway_net.logRegressionLayer.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) updates = MomentumG(cost, highway_net.params, itr, lr_base=learning_rate, lr_decay=lr_decay, momentum=momentum) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index,itr], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }, on_unused_input='ignore' ) gate_output = theano.function(inputs=[index], outputs=highway_net.gate_outputs, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], }) block_output = theano.function(inputs=[index], outputs=highway_net.block_outputs, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], }) result = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, gate_output, block_output, verbose, early_stopping) res = pd.DataFrame([result.RunningTime, result.BestXEntropy, result.TestPerformance, result.BestValidationScore, n_epochs, result.N_Epochs, activation_hidden, activation_highway, L2_reg, L1_reg, batch_size, result.N_Iterations, n_hidden, n_hiddenLayers, n_highwayLayers, learning_rate, lr_decay, momentum, result.Patience], index=['Running time','XEntropy','Test performance','Best Validation score', 'Max epochs','N epochs','Activation function - hidden', 'Activation function - highway','L2_reg parameter', 'L1_reg parameter','Batch size','Iterations', 'Hidden neurons per layer', 'Hidden Layers', 'Highway Layers', 'Learning rate', 'lr_decay', 'momentum', 'Patience']).transpose() res.to_csv('Results.csv',mode='a',index=None,header=False) idx = pd.read_csv('Results.csv').index.values[-1] pickle.dump(result.XEntropy,open("cross_entropy"+str(idx)+".p","wb")) print('Cross entropy is stored in cross_entropy'+str(idx)+'.p') return highway_net.params, result.Gate_outputs, result.Block_outputs
W = csp(X_train, Y_train) V = np.ones((301, 1)) sc = classify_csp(W, V, X_train, Y_train, X_test, Y_test) # Fine tune CSP pipeline # Note input data dim: [batches, time, channel] # Filter dim: [channel_in, channel_out] X_train_T = theano.shared(X_train.transpose(2, 0, 1)) X_test_T = theano.shared(X_test.transpose(2, 0, 1)) Y_train_T = T.cast(theano.shared(Y_train[0, :]), 'int32') Y_test_T = T.cast(theano.shared(Y_test[0, :]), 'int32') lr = .01 # learning rate batch_size = 28 epochs = 1700 index = T.lscalar('index') y = T.ivector('y') X = T.tensor3('X') csp_w = theano.shared(W) avg_v = theano.shared(V) proj_csp = T.tensordot(X, csp_w, axes=[2, 0]) layer0_out = T.pow(proj_csp, 2) variance = T.tensordot(layer0_out, avg_v, axes=[1, 0]) layer1_out = T.log((variance))[:, :, 0] layer2 = LogisticRegression(input=layer1_out, n_in=26, n_out=2) loss = layer2.negative_log_likelihood(y) + .01 * T.sum(T.pow(avg_v, 2)) f = open('params_dnn_al.pkl') params_model = cPickle.load(f) csp_w.set_value(params_model[0].get_value()) avg_v.set_value(params_model[1].get_value())
def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches //= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches //= batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [(param, param - gparam * learning_rate) for param, gparam in zip(self.params, gparams)] train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }, name='train') test_score_i = theano.function( [index], self.errors, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size], self.y: test_set_y[index * batch_size:(index + 1) * batch_size] }, name='test') valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, name='valid') # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set def test_score(): return [test_score_i(i) for i in range(n_test_batches)] return train_fn, valid_score, test_score
def test_dA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500 ) cost, updates = da.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) image = Image.fromarray(tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') os.chdir('../')
def test_mlp_dropout( p,learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=500, batch_size=20, n_hidden=500, verbose=True, acttest=T.tanh,): # load the dataset; download the dataset if it is not present f = open("dropout.txt",'w') datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model',file=f) print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels training_enabled = T.iscalar('training_enabled') # pseudo boolean for switching between training and prediction rng = numpy.random.RandomState(1234) #TODO: create an object of DropoutHiddenLayer class #hiddenlayer_one = ...... hiddenlayer_one=DropoutHiddenLayer( rng=rng, is_train=training_enabled, input=x, n_in=32*32*3, n_out=n_hidden, p=p ) #TODO: create an object of DropoutHiddenLayer class #hiddenlayer_two = ...... hiddenlayer_two=DropoutHiddenLayer( rng=rng, is_train=training_enabled, input=hiddenlayer_one.output, n_in=n_hidden, n_out=n_hidden, p=p ) # The logistic regression layer gets as input the hidden units # of the hidden layer #TODO: create an object of LogisticRegression class #logRegressionLayer = ...... logRegressionLayer = LogisticRegression( input=hiddenlayer_two.output, n_in=n_hidden, n_out=10 ) # L1 norm ; one regularization option is to enforce L1 norm to # be small #TODO: Define the expression for L1 #L1 = ...... L1 = ( abs(hiddenlayer_one.W).sum() + abs(hiddenlayer_two.W).sum() + abs(logRegressionLayer.W).sum() ) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small #TODO: Define the expression for L2_sqr #L2_sqr = ...... L2_sqr = ( (hiddenlayer_one.W ** 2).sum() + (hiddenlayer_two.W ** 2).sum() + (logRegressionLayer.W ** 2).sum() ) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer #TODO: Define the expression for negative_log_likelihood #negative_log_likelihood = ...... negative_log_likelihood = ( logRegressionLayer.negative_log_likelihood ) # same holds for the function computing the number of errors #TODO: Define the expression for errors #errors = ...... errors=logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of #TODO: Define the expression for params #params = ...... params = hiddenlayer_one.params + hiddenlayer_two.params + logRegressionLayer.params # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically #TODO: Define the expression for cost #cost = ...... cost = ( negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }, ) validate_model = theano.function( inputs=[index], outputs=errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }, ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs momentum =theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value()*numpy.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate*param_update)) updates.append((param_update, momentum*param_update + (numpy.cast[theano.config.floatX](1.) - momentum)*T.grad(cost, param))) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }, ) ############### # TRAIN MODEL # ############### print('... training') print('... training',file=f) print('p=%f'%p) print('p=%f'%p,file=f) # early-stopping parameters patience = 20000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) if verbose: print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ),file=f ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) if verbose: print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.),file=f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.),file=f) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)),file=f)
train_loss_nonorm = l6.error(normalisation=False) train_loss = l6.error() # but compute and print this! valid_loss = l6.error(dropout_active=False) all_parameters = layers.all_parameters(l6) all_bias_parameters = layers.all_bias_parameters(l6) xs_shared = [ theano.shared(np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) for _ in xrange(num_input_representations) ] y_shared = theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)) learning_rate = theano.shared( np.array(LEARNING_RATE_SCHEDULE[0], dtype=theano.config.floatX)) idx = T.lscalar('idx') givens = { l0.input_var: xs_shared[0][idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE], l0_45.input_var: xs_shared[1][idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE], l6.target_var: y_shared[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE], } # updates = layers.gen_updates(train_loss, all_parameters, learning_rate=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) updates_nonorm = layers.gen_updates_nesterov_momentum_no_bias_decay( train_loss_nonorm, all_parameters, all_bias_parameters, learning_rate=learning_rate, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) cost = classifier.negative_log_likelihood(y) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print '... training the model' patience = 5000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf test_score = 0. start_time = timeit.default_timer() copy_reg.pickle(types.MethodType, _pickle_method) done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of' ' best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) with open('best_logReg_model.pkl', 'w') as f: cPickle.dump(classifier, f, protocol=cPickle.HIGHEST_PROTOCOL) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_dA_joint(learning_rate=0.01, training_epochs=15000, dataset='mnist.pkl.gz', batch_size=5, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ ##datasets = load_data(dataset) #from SdA_mapping import load_data_half #datasets = load_data_half(dataset) print 'loading data' datasets, x_mean, y_mean, x_std, y_std = load_vc() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print 'loaded data' # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x1 = T.matrix('x1') # the data is presented as rasterized images x2 = T.matrix('x2') # the data is presented as rasterized images cor_reg = T.scalar('cor_reg') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) #da = dA_joint( #numpy_rng=rng, #theano_rng=theano_rng, #input1=x1, #input2=x2, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, #n_hidden=500 #) print 'initialize functions' da = dA_joint( numpy_rng=rng, theano_rng=theano_rng, input1=x1, input2=x2, cor_reg=cor_reg, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, n_visible1=24, n_visible2=24, n_hidden=50) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) cor_reg_val = numpy.float32(5.0) train_da = theano.function( [index], cost, updates=updates, givens={ x1: train_set_x[index * batch_size:(index + 1) * batch_size], x2: train_set_y[index * batch_size:(index + 1) * batch_size] }) fprop_x1 = theano.function([], outputs=da.output1, givens={x1: test_set_x}, name='fprop_x1') fprop_x2 = theano.function([], outputs=da.output2, givens={x2: test_set_y}, name='fprop_x2') fprop_x1t = theano.function([], outputs=da.output1, givens={x1: train_set_x}, name='fprop_x1') fprop_x2t = theano.function([], outputs=da.output2, givens={x2: train_set_y}, name='fprop_x2') rec_x1 = theano.function([], outputs=da.rec1, givens={x1: test_set_x}, name='rec_x1') rec_x2 = theano.function([], outputs=da.rec2, givens={x2: test_set_y}, name='rec_x2') fprop_x1_to_x2 = theano.function([], outputs=da.reg, givens={x1: test_set_x}, name='fprop_x12x2') updates_reg = [(da.cor_reg, da.cor_reg + theano.shared(numpy.float32(0.1))) ] update_reg = theano.function([], updates=updates_reg) print 'initialize functions ended' start_time = time.clock() ############ # TRAINING # ############ print 'training started' X1 = test_set_x.eval() X1 *= x_std X1 += x_mean X2 = test_set_y.eval() X2 *= y_std X2 += y_mean from dcca_numpy import cor_cost # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) #cor_reg_val += 1 #da.cor_reg = theano.shared(cor_reg_val) update_reg() X1H = rec_x1() X2H = rec_x2() X1H *= x_std X1H += x_mean X2H *= y_std X2H += y_mean H1 = fprop_x1() H2 = fprop_x2() print 'Training epoch' print 'Reconstruction ', numpy.mean(numpy.mean((X1H-X1)**2,1)),\ numpy.mean(numpy.mean((X2H-X2)**2,1)) if epoch % 5 == 2: # pretrain middle layer print '... pre-training MIDDLE layer' H1t = fprop_x1t() H2t = fprop_x2t() h1 = T.matrix('x') # the data is presented as rasterized images h2 = T.matrix('y') # the labels are presented as 1D vector of from mlp import HiddenLayer numpy_rng = numpy.random.RandomState(89677) log_reg = HiddenLayer(numpy_rng, h1, 50, 50, activation=T.tanh) if 1: # for middle layer learning_rate = 0.1 #H1=theano.shared(H1) #H2=theano.shared(H2) # compute the gradients with respect to the model parameters logreg_cost = log_reg.mse(h2) gparams = T.grad(logreg_cost, log_reg.params) # compute list of fine-tuning updates updates = [(param, param - gparam * learning_rate) for param, gparam in zip(log_reg.params, gparams)] train_fn_middle = theano.function(inputs=[], outputs=logreg_cost, updates=updates, givens={ h1: theano.shared(H1t), h2: theano.shared(H2t) }, name='train_middle') epoch = 0 while epoch < 100: print epoch, train_fn_middle() epoch += 1 ##X2H=fprop_x1_to_x2() X2H = numpy.tanh(H1.dot(log_reg.W.eval()) + log_reg.b.eval()) X2H = numpy.tanh(X2H.dot(da.W2_prime.eval()) + da.b2_prime.eval()) X2H *= y_std X2H += y_mean print 'Regression ', numpy.mean(numpy.mean((X2H - X2)**2, 1)) print 'Correlation ', cor_cost(H1, H2) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W1.get_value(borrow=True).T, img_shape=(28, 14), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') from matplotlib import pyplot as pp pp.plot(H1[:10, :2], 'b') pp.plot(H2[:10, :2], 'r') pp.show() print cor
def run_mnl(): """ Discrete choice model estimation with Theano Setup ----- step 1: Load variables from csv file step 2: Define hyperparameters used in the computation step 3: define symbolic Theano tensors step 4: build model and define cost function step 5: define gradient calculation algorithm step 6: define Theano symbolic functions step 7: run main estimaiton loop for n iterations step 8: perform analytics and model statistics """ # compile and import dataset from csv# d_x_ng, d_x_g, d_y, avail, d_ind = extractdata(csvString) data_x_ng = shared(np.asarray(d_x_ng, dtype=floatX), borrow=True) data_x_g = shared(np.asarray(d_x_g, dtype=floatX), borrow=True) data_y = T.cast(shared(np.asarray(d_y - 1, dtype=floatX), borrow=True), 'int32') data_av = shared(np.asarray(avail, dtype=floatX), borrow=True) sz_n = d_x_g.shape[0] # number of samples sz_k = d_x_g.shape[1] # number of generic variables sz_m = d_x_ng.shape[2] # number of non-generic variables sz_i = d_x_ng.shape[1] # number of alternatives sz_minibatch = sz_n # model hyperparameters learning_rate = 0.3 momentum = 0.9 x_ng = T.tensor3('data_x_ng') # symbolic theano tensors x_g = T.matrix('data_x_g') y = T.ivector('data_y') av = T.matrix('data_av') index = T.lscalar('index') # construct model model = Logistic(sz_i, av, input=[x_ng, x_g], n_in=[(sz_m, ), (sz_k, sz_i)]) cost = -model.loglikelihood(y) # calculate the gradients wrt to the loss function grads = T.grad(cost=cost, wrt=model.params) opt = optimizers.adadelta(model.params, model.masks, momentum) updates = opt.updates(model.params, grads, learning_rate) # hessian function fn_hessian = function(inputs=[], outputs=T.hessian(cost=cost, wrt=model.params), givens={ x_ng: data_x_ng, x_g: data_x_g, y: data_y, av: data_av }, on_unused_input='ignore') # null loglikelihood function fn_null = function(inputs=[], outputs=model.loglikelihood(y), givens={ x_ng: data_x_ng, x_g: data_x_g, y: data_y, av: data_av }, on_unused_input='ignore') # compile the theano functions fn_estimate = function( name='estimate', inputs=[index], outputs=[model.loglikelihood(y), model.errors(y)], updates=updates, givens={ x_ng: data_x_ng[index * sz_minibatch:T.min(((index + 1) * sz_minibatch, sz_n))], x_g: data_x_g[index * sz_minibatch:T.min(((index + 1) * sz_minibatch, sz_n))], y: data_y[index * sz_minibatch:T.min(((index + 1) * sz_minibatch, sz_n))], av: data_av[index * sz_minibatch:T.min(((index + 1) * sz_minibatch, sz_n))] }, allow_input_downcast=True, on_unused_input='ignore', ) """ Main estimation process loop """ print('Begin estimation...') epoch = 0 # process loop parameters sz_epoches = 9999 sz_batches = np.ceil(sz_n / sz_minibatch).astype(np.int32) done_looping = False patience = 300 patience_inc = 10 best_loglikelihood = -np.inf null_Loglikelihood = fn_null() start_time = timeit.default_timer() while epoch < sz_epoches and done_looping is False: epoch_error = [] epoch_loglikelihood = [] for i in range(sz_batches): (batch_loglikelihood, batch_error) = fn_estimate(i) epoch_error.append(batch_error) epoch_loglikelihood.append(batch_loglikelihood) this_loglikelihood = np.sum(epoch_loglikelihood) print('@ iteration %d loglikelihood: %.3f' % (epoch, this_loglikelihood)) if this_loglikelihood > best_loglikelihood: if this_loglikelihood > 0.997 * best_loglikelihood: patience += patience_inc best_loglikelihood = this_loglikelihood with open('best_model.pkl', 'wb') as f: pickle.dump(model, f) if epoch > patience: done_looping = True epoch += 1 final_Loglikelihood = best_loglikelihood rho_square = 1. - (final_Loglikelihood / null_Loglikelihood) end_time = timeit.default_timer() """ Analytics and model statistics """ print('... solving Hessians') h = np.hstack([np.diagonal(mat) for mat in fn_hessian()]) n_est_params = np.count_nonzero(h) aic = 2 * n_est_params - 2 * final_Loglikelihood bic = np.log(sz_n) * n_est_params - 2 * final_Loglikelihood print('@iteration %d, run time %.3f ' % (epoch, end_time - start_time)) print('Null Loglikelihood: %.3f' % null_Loglikelihood) print('Final Loglikelihood: %.3f' % final_Loglikelihood) print('rho square %.3f' % rho_square) print('AIC %.3f' % aic) print('BIC %.3f' % bic) with open('best_model.pkl', 'rb') as f: best_model = pickle.load(f) run_analytics(best_model, h)
batch_size = 100 datasets = ds.load_mnist("../data/mnist.pkl.gz") train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print "[MESSAGE] The data is loaded" X = T.matrix("data") y = T.ivector("label") idx = T.lscalar() dropout_rate = T.fscalar() layer_0 = ReLULayer(in_dim=784, out_dim=500) layer_1 = ReLULayer(in_dim=500, out_dim=200) layer_2 = SoftmaxLayer(in_dim=200, out_dim=10) dropout = multi_dropout([(batch_size, 784), (batch_size, 500), (batch_size, 200)], dropout_rate) model = FeedForward(layers=[layer_0, layer_1, layer_2], dropout=dropout) model_test = FeedForward(layers=[layer_0, layer_1, layer_2]) #model=FeedForward(layers=[layer_0, layer_1, layer_2]); out = model.fprop(X) out_test = model_test.fprop(X)
def make_node(self, *args): # HERE `args` must be THEANO VARIABLES return gof.Apply(op=self, inputs=args, outputs=[tensor.lscalar()])
def test_mlp(datasets, learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer """ train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] # compute number of minibatches for training, validation n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size + 1 n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size + 1 ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class dim = train_set_x.get_value(borrow=True).shape[1] classifier = MLP( rng=rng, input=x, n_in=dim, n_hidden=n_hidden, n_out=2 ) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # end-snippet-4 validate_model = theano.function( inputs=[index], outputs=classifier.y_pred, givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of # same length, zip generates a list C of same size, where each element # is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print('... training') best_fscore = 0 start_time = time.clock() epoch = 0 while epoch < n_epochs: epoch += 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # compute f-score on validation set y_preds = [validate_model(i) for i in range(n_valid_batches)] y_pred = [pij for pi in y_preds for pij in pi] y_real = valid_set_y.get_value(borrow=True) fscore = f_score(y_real, y_pred) print('epoch {0:d}, fscore {1:f} %'.format(epoch, fscore * 100.)) # if we got the best validation score until now if fscore > best_fscore: best_fscore = fscore print('-----Best score: {0:f}-----'.format(best_fscore)) end_time = time.clock() print('Optimization complete with best validation score of {0:.1f} %,' .format(best_fscore * 100.)) print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
# predictions path predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH) outputs_path = predictions_dir + '/%s' % config_name utils.auto_make_dir(outputs_path) # logs logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH) sys.stdout = logger.Logger(logs_dir + '/%s.log' % config_name) sys.stderr = sys.stdout # builds model and sets its parameters model = config().build_model() x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape)) idx_z = T.lscalar('idx_z') idx_y = T.lscalar('idx_y') idx_x = T.lscalar('idx_x') window_size = config().window_size stride = config().stride n_windows = config().n_windows givens = {} givens[model.l_in.input_var] = x_shared get_predictions_patch = theano.function([], nn.layers.get_output( model.l_out, deterministic=True), givens=givens, on_unused_input='ignore')
#building on top of logistic regression classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) #cost function with L2 Regularization of lambda = 0.01 cost = (classifier.negative_log_likelihood(y) + L2_reg * classifier.L2_sqr) #taking the cost function to evaluate gradient g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) #updating the weights and errors vector updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # creating a training function that computes the cost and updates the parameter of the model based on the rules index = T.lscalar() train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size],
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # calcul du nombre de minibatch : training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ########## # MODELE # ########## print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = numpy.random.RandomState(1234) # instance du mlp classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) # fonction de perte cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # Fonction calculant les erreurs que le modele fait sur un minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # gradient de la perte par rapport a theta et update des parametres gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = [] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) # fonction train_model qui retourne la perte ET update les parametres train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training' # parametres de fin de boucle patience = 10000 patience_increase = 2 improvement_threshold = 0.995 # seul une amelioration> threshold est consideree comme significative validation_frequency = min(n_train_batches, patience / 2) best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # nombre iteration iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # calcul zero-one loss sur un validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # si on a le meilleur score jusqu a present if this_validation_loss < best_validation_loss: #augmente patience si loss improvement est significatif if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test sur le test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_conv_net(use_test, perf_or_predict, datasets, U, img_w=300, filter_hs=[3, 4, 5], hidden_units=[100, 3], dropout_rate=[0.5], shuffle_batch=True, n_epochs=25, batch_size=50, lr_decay=0.95, conv_non_linear="relu", activations=[Iden], sqr_norm_lim=9, non_static=True): """ Train a simple conv net img_h = sentence length (padded where necessary) img_w = word vector length (300 for word2vec) filter_hs = filter window sizes hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer sqr_norm_lim = s^2 in the paper lr_decay = adadelta decay parameter """ rng = np.random.RandomState(3435) img_h = len(datasets[0][0]) - 1 filter_w = img_w feature_maps = hidden_units[0] filter_shapes = [] pool_sizes = [] test_real_size = datasets[1].shape[0] test_vote_array = np.zeros((datasets[1].shape[0], 10)) for filter_h in filter_hs: filter_shapes.append((feature_maps, 1, filter_h, filter_w)) pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1)) parameters = [("image shape", img_h, img_w), ("filter shape", filter_shapes), ("hidden_units", hidden_units), ("dropout", dropout_rate), ("batch_size", batch_size), ("non_static", non_static), ("learn_decay", lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim), ("shuffle_batch", shuffle_batch)] print parameters #define model architecture index = T.lscalar() x = T.matrix('x') y = T.ivector('y') Words = theano.shared(value=U, name="Words") zero_vec_tensor = T.vector() zero_vec = np.zeros(img_w) set_zero = theano.function([zero_vec_tensor], updates=[ (Words, T.set_subtensor(Words[0, :], zero_vec_tensor)) ]) layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (x.shape[0], 1, x.shape[1], Words.shape[1])) conv_layers = [] layer1_inputs = [] for i in xrange(len(filter_hs)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, img_h, img_w), filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs, 1) hidden_units[0] = feature_maps * len(filter_hs) classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) #define parameters of the model and update functions using adadelta params = classifier.params for conv_layer in conv_layers: params += conv_layer.params if non_static: #if word vectors are allowed to change, add them as model parameters params += [Words] cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate #extra data (at random) np.random.seed() if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = np.random.permutation(datasets[0]) extra_data = train_set[:extra_data_num] new_data = np.append(datasets[0], extra_data, axis=0) else: new_data = datasets[0] if use_test == 1 and datasets[1].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[1].shape[0] % batch_size extra_data = datasets[1][:extra_data_num] datasets[1] = np.append(datasets[1], extra_data, axis=0) new_data = np.random.permutation(new_data) n_batches = new_data.shape[0] / batch_size n_train_batches = int(np.round(n_batches * 0.9)) if use_test == 1: n_test_batches = int(np.round(datasets[1].shape[0] / batch_size)) #divide train set into train/val sets test_set_x_4check = datasets[1][:, :img_h] test_set_y_4check = np.asarray(datasets[1][:, -1], "int32") train_set = new_data[:n_train_batches * batch_size, :] val_set = new_data[n_train_batches * batch_size:, :] train_set_x, train_set_y = shared_dataset( (train_set[:, :img_h], train_set[:, -1])) val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1])) test_set_x, test_set_y = shared_dataset( (datasets[1][:, :img_h], datasets[1][:, -1])) n_val_batches = n_batches - n_train_batches val_model = theano.function( [index], classifier.errors(y), givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_y[index * batch_size:(index + 1) * batch_size] }) #compile theano functions to get train/val/test errors test_model = theano.function( [index], classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) train_model = theano.function( [index], cost, updates=grad_updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) get_test_label = theano.function( [index], classifier.testlabel(), givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]}) test_pred_layers = [] if use_test == 1: test_size = batch_size else: test_size = datasets[1].shape[0] test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (test_size, 1, img_h, Words.shape[1])) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_size) test_pred_layers.append(test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) test_y_pred = classifier.predict(test_layer1_input) test_error = T.mean(T.neq(test_y_pred, y)) test_model_all = theano.function([x, y], test_error) get_test_result = theano.function([x], test_y_pred) #start training over mini-batches print '... training' epoch = 0 best_test_perf = 0 final_test_perf = 0 predict_vector = [] val_perf = 0 test_perf = 0 cost_epoch = 0 while (epoch < n_epochs): epoch = epoch + 1 if shuffle_batch: for minibatch_index in np.random.permutation( range(n_train_batches)): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) train_losses = [test_model(i) for i in xrange(n_train_batches)] train_perf = 1 - np.mean(train_losses) val_losses = [val_model(i) for i in xrange(n_val_batches)] val_perf = 1 - np.mean(val_losses) print('epoch %i, train perf %f %%, val perf %f' % (epoch, train_perf * 100., val_perf * 100.)) if epoch >= 6 and epoch % 2 == 0: if use_test == 1: test_result = [] for minibatch_index in xrange(n_test_batches): test_result_tmp = get_test_label(minibatch_index) test_result_tmp = np.array(test_result_tmp) test_result.append(test_result_tmp) test_result = np.array(test_result) test_result = test_result.reshape( (n_test_batches * batch_size, 1)) for i in range(test_real_size): test_vote_array[i][test_result[i]] += 1 sum_4_test = 0 for i in range(test_real_size): if test_result[i] == test_set_y_4check[i]: sum_4_test += 1 test_perf = float(sum_4_test) / test_real_size if test_perf > best_test_perf: best_test_perf = test_perf print("test_perf: " + str(test_perf)) if use_test == 0: test_result = get_test_result(test_set_x_4check) test_result = np.array(test_result) for i in range(test_real_size): test_vote_array[i][test_result[i]] += 1 sum_4_test = 0 for i in range(test_real_size): if test_result[i] == test_set_y_4check[i]: sum_4_test += 1 test_perf = float(sum_4_test) / test_real_size if test_perf > best_test_perf: best_test_perf = test_perf print("test_perf: " + str(test_perf)) if epoch == n_epochs: if perf_or_predict == 0: final_test_perf = vote_for_answer(test_vote_array, test_set_y_4check, perf_or_predict) return final_test_perf if perf_or_predict == 1: predict_vector = vote_for_answer(test_vote_array, test_set_y_4check, perf_or_predict) return predict_vector
def train_FSRCNN(train_set_x,train_set_y,valid_set_x,valid_set_y,test_set_x,test_set_y, n_train_batches, n_valid_batches, n_test_batches, n_epochs, batch_size,lr,upsampling_factor=4): #Assume x to be shape (batch_size,3,33,33) x = T.matrix('x') y = T.matrix('y') theano.config.optimizer = 'fast_compile' #print "theano optimizer: " + str(theano.config.optimizer) rng = np.random.RandomState(11111) index = T.lscalar() reshaped_input = x.reshape((batch_size,3,8,8)) reshaped_gt = y.reshape((batch_size,3,33,33)) learning_rate = theano.shared(np.cast[theano.config.floatX](lr)) #Upsampling layer now done in preprocessing to save compute #upsampled_input = T.nnet.abstract_conv.bilinear_upsampling(reshaped_input,upsampling_factor,batch_size=batch_size,num_input_channels=3) # r_fun = theano.function([index],upsampled_input.shape,givens = { # x: train_set_x[index * batch_size: (index + 1) * batch_size] # }) # theano.printing.debugprint(r_fun(0)) #Filter params f1 = 9 f2 = 5 f3 = 10 input_image_size = 8 output_len = input_image_size + f3 -1 #output_len = 16 #Conv for Patch extraction #print('batch size', batch_size) conv1 = Conv_Layer_ReLU(rng, reshaped_input, image_shape=(batch_size,3,input_image_size,input_image_size),filter_shape = (64,3,f1,f1)); conv1_len = input_image_size #Conv for Non linear mapping #print('conv1 done....') conv2 = Conv_Layer_ReLU(rng, conv1.output, image_shape=(batch_size,64,conv1_len,conv1_len),filter_shape = (32,64,f2,f2)) conv2_len = conv1_len #Conv for Reconstruction #conv2_output = conv2.output.repeat(2,2) #conv2_output = conv2_output.repeat(2,3) #conv3 = Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size,32,conv2_len*2,conv2_len*2),filter_shape = (3,32,f3,f3)) #model_output = conv3.output conv3 = De_Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size,32,conv2_len,conv2_len),filter_shape = (3,32,f3,f3)) model_output = conv3.output #print(model_output.shape) #grab center pixels #print('output len...', output_len) center_start = (33 - output_len) / 2 center_end = 33 - center_start sub_y = reshaped_gt[:,:,center_start:center_end,center_start:center_end] #sub_y = reshaped_gt #MSE between center pixels of prediction and ground truth cost = T.mean((sub_y-model_output) ** 2) cost2 = 1.0/batch_size * T.sum((sub_y - model_output) ** 2) #PSNR of a patch is based on color space MSE_per_pixel = cost2/(output_len*output_len*3) psnr = 20 * T.log10(255) - 10 * T.log10(MSE_per_pixel) reconstucted_imgs = model_output #Perchannel cost iok # costs = [] # for d in sub_y.shape[0]: # channel_cost = cost = 1.0/batch_size * T.sum((sub_y[d,:,:]-model_output[d,:,:]) ** 2) # costs.append(channel_cost) params = conv3.params + conv2.params + conv1.params # #ADAM opt beta1 =theano.shared(np.cast[theano.config.floatX](0.9), name='beta1') beta2 =theano.shared(np.cast[theano.config.floatX](0.999), name='beta2') eps =theano.shared(np.cast[theano.config.floatX](1e-8), name='eps') updates = [] for param in params: m = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) new_m = beta1 * m + (np.cast[theano.config.floatX](1.) - beta1) * T.grad(cost, param) new_v = beta2 * v + (np.cast[theano.config.floatX](1.) - beta2) * T.sqr(T.grad(cost, param)) updates.append((m, new_m)) updates.append((v, new_v)) updates.append((param, param - learning_rate*new_m/(T.sqrt(new_v) + eps))) #RMSProp # updates = [] # for param in params: # cache = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # rms_decay = np.cast[theano.config.floatX](0.999) # eps =theano.shared(np.cast[theano.config.floatX](1e-8)) # clip_grad = T.grad(cost,param) # # if T.ge(1.0,clip_grad): # # clip_grad = np.cast[theano.config.floatX](1.0) # # if T.le(-1,clip_grad): # # clip_grad = np.cast[theano.config.floatX](-1.0) # new_cache = rms_decay * cache + (np.cast[theano.config.floatX](1.0) - rms_decay) * clip_grad**2 # updates.append((cache, new_cache)) # updates.append((param,param - learning_rate * clip_grad/(T.sqrt(new_cache) + eps))) #nesterov momentum # updates = [] # mu = np.cast[theano.config.floatX](.9) # for param in params: # v_prev = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # clip_grad = T.grad(cost,param) # if T.ge(np.cast[theano.config.floatX](1.0),clip_grad): # clip_grad = np.cast[theano.config.floatX](1.0) # if T.le(np.cast[theano.config.floatX](-1.0),clip_grad): # clip_grad = np.cast[theano.config.floatX](-1.0) # new_v_prev = v # new_v = mu * v - learning_rate * clip_grad # updates.append((v_prev, new_v_prev)) # updates.append((v, new_v)) # updates.append((param,param - mu * new_v_prev + (np.cast[theano.config.floatX](1.0) + mu) * new_v)) #SGD # clip_thresh = 1.0 # for param in params: # clip_grad = T.grad(cost,param) # if T.ge(clip_thresh,clip_grad): # clip_grad = np.cast[theano.config.floatX](clip_thresh) # if T.le(-clip_thresh,clip_grad): # clip_grad = np.cast[theano.config.floatX](-clip_thresh) # updates = [ # (param, param - learning_rate * clip_grad) # ] #Theano function complilation #if neccessary, could load here test_model = theano.function( [index], [cost,MSE_per_pixel,psnr,reconstucted_imgs], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], [cost,MSE_per_pixel,psnr,reconstucted_imgs], givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) train_model = theano.function( [index], [cost,MSE_per_pixel,psnr], updates=updates, givens={ y: train_set_y[index * batch_size: (index + 1) * batch_size], x: train_set_x[index * batch_size: (index + 1) * batch_size] }) decay_learning_rate_function = theano.function([],learning_rate,updates = [(learning_rate,learning_rate * .995)]) train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs,output_len,decay_learning_rate_function, verbose = True) return validate_model,test_model