def load_cls_for_simclr(cfg): simCLR,_,_ = load_simclr(cfg) logit = LogisticRegression(simCLR,cfg.cls.dataset.n_classes) if cfg.cls.load: model_fp = os.path.join( cfg.cls.model_path, "checkpoint_{}.tar".format(cfg.cls.epoch_num) ) logit.load_state_dict(torch.load(model_fp, map_location=cfg.cls.device.type)) cfg_adam = cfg.cls.optim.adam optimizer = torch.optim.Adam(model.parameters(), lr=cfg_adam.lr) # TODO: LARS scheduler = None return logit,optimizer,scheduler
def __init__(self,K,node_num, nfeat, nhid, nclass, sampleSize, dropout,trainAttention): super(GAT, self).__init__() self.gc1 = GraphConvolution(K, node_num, nfeat, nhid, sampleSize[1],'False','True',trainAttention) self.gc2 = GraphConvolution(1, node_num, K*nhid, 14*nclass, sampleSize[0],'False','False',trainAttention) #self.gc3 = GraphConvolution(1, node_num, 4*7*nclass, 7*nclass, 'False','False') self.gc6 = LogisticRegression(14*nclass,1) self.dropout = dropout
def __init__(self, numpy_rng, theano_rng=None, y=None, alpha=0.9, sample_rate=0.1, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1], allX=None,allY=None,srng=None): self.sigmoid_layers = [] self.sugar_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.allXs = [] if y == None: self.y = tensor.ivector(name='y') else: self.y = y assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = tensor.matrix('x') self.x = tensor.matrix('x') self.y = tensor.ivector('y') self.y = tensor.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output if i == 0: self.allXs.append(allX) else: self.allXs.append(tensor.dot(self.allXs[i-1], self.sigmoid_layers[-1].W) + self.sigmoid_layers[-1].b) sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tensor.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) sugar_layer = sugar(numpy_rng=numpy_rng, alpha=alpha, sample_rate=sample_rate, x=layer_input, y=self.y, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b, allX=self.allXs[i], allY=allY, srng=srng) self.sugar_layers.append(sugar_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop): lr=params["lr"] batch_size=params["batch_size"] n_output=params['n_output'] corruption_level=params["corruption_level"] X = T.matrix(name="input",dtype=dtype) # batch of sequence of vector Y = T.matrix(name="output",dtype=dtype) # batch of sequence of vector is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction bin_noise=rng.binomial(size=(batch_size,n_output/3,1), n=1,p=1 - corruption_level,dtype=theano.config.floatX) #bin_noise_3d= T.reshape(T.concatenate((bin_noise, bin_noise,bin_noise),axis=1),(batch_size,n_output/3,3)) bin_noise_3d= T.concatenate((bin_noise, bin_noise,bin_noise),axis=2) noise= rng.normal(size=(batch_size,n_output), std=0.03, avg=0.0,dtype=theano.config.floatX) noise_bin=T.reshape(noise,(batch_size,n_output/3,3))*bin_noise_3d X_train=T.reshape(noise_bin,(batch_size,n_output))+X X_tilde= T.switch(T.neq(is_train, 0), X_train, X) W_1_e =u.init_weight(shape=(n_output,1024),rng=rng,name="w_hid",sample="glorot") b_1_e=u.init_bias(1024,rng) W_2_e =u.init_weight(shape=(1024,2048),rng=rng,name="w_hid",sample="glorot") b_2_e=u.init_bias(2048,rng) W_2_d = W_2_e.T b_2_d=u.init_bias(1024,rng) W_1_d = W_1_e.T b_1_d=u.init_bias(n_output,rng) h_1_e=HiddenLayer(rng,X_tilde,0,0, W=W_1_e,b=b_1_e,activation=nn.relu) h_2_e=HiddenLayer(rng,h_1_e.output,0,0, W=W_2_e,b=b_2_e,activation=nn.relu) h_2_d=HiddenLayer(rng,h_2_e.output,0,0, W=W_2_d,b=b_2_d,activation=u.do_nothing) h_1_d=LogisticRegression(rng,h_2_d.output,0,0, W=W_1_d,b=b_1_d) self.output = h_1_d.y_pred self.params =h_1_e.params+h_2_e.params self.params.append(b_2_d) self.params.append(b_1_d) cost=get_err_fn(self,cost_function,Y) L2_reg=0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param[0] ** 2)+T.sum(param[1] ** 2)) cost += L2_reg*L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True) self.predictions = theano.function(inputs = [X,is_train], outputs = self.output,allow_input_downcast=True) self.mid_layer = theano.function(inputs = [X,is_train], outputs = h_2_e.output,allow_input_downcast=True) self.n_param=count_params(self.params)
def __init__(self, nkerns=[48], miniBatchSize=200): rng = numpy.random.RandomState(23455) nClasses = 2 nMaxPool = 2 nHidden = 200 self.p = 65 #self.x = T.tensor3('x') # membrane data set self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # 1D vector of [int] labels self.miniBatchSize = miniBatchSize # Reshape matrix of rasterized images # to a 4D tensor, # compatible with the LeNetConvPoolLayer #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p)) layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 65 -> 62 -> 31 #-------------------------------------------------- fs0 = 4 # filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 31) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, 1, self.p, self.p), filter_shape=(nkerns[0], 1, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # Fully connected sigmoidal layer, goes from # X -> 200 #-------------------------------------------------- layer1_input = layer0.output.flatten(2) layer1 = HiddenLayer(rng, input=layer1_input, n_in=nkerns[0] * os0 * os0, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 2 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer2 = LogisticRegression(input=layer1.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2)
def fit_logistic(image_size=(28, 28), datasets='../data/mnist.pkl.gz', outpath='../output/mnist_logistic_regression.params', learning_rate=0.13, n_epochs=1000, batch_size=600, patience=5000, patience_increase=2, improvement_threshold=0.995): index = T.lscalar() x = T.matrix('x') y = T.ivector('y') classifier = LogisticRegression( input=x, n_in=reduce(np.multiply, image_size), n_out=10 ) cost = classifier.negative_log_likelihood(y) learner = SupervisedMSGD( index, x, y, batch_size, learning_rate, load_data(datasets), outpath, classifier, cost ) best_validation_loss, best_iter, epoch, elapsed_time = learner.fit( n_epochs=n_epochs, patience=patience, patience_increase=patience_increase, improvement_threshold=improvement_threshold ) display_results(best_validation_loss, elapsed_time, epoch) return learner
def cifar_fast_net(batch_size=128,n_epochs=300,test_frequency=13, learning_rate=0.001): rng1 = numpy.random.RandomState(23455) rng2 = numpy.random.RandomState(12423) rng3 = numpy.random.RandomState(23245) rng4 = numpy.random.RandomState(12123) rng5 = numpy.random.RandomState(25365) rng6 = numpy.random.RandomState(15323) train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4']) valid_set_x, valid_set_y = load_cifar_data(['data_batch_5'],WHICHSET='valid') test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test') n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size index = T.lscalar() x = T.matrix('x') y = T.ivector('y') img_input = x.reshape((batch_size,3,32,32)) img_input = img_input.dimshuffle(1,2,3,0) ####define the layers: conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input, filter_shape=(3,5,5,32), image_shape=(3,32,32,batch_size), activation='vshape', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.0001,initB=0,partial_sum=4, pooling='max', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004 ) conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool1.output, filter_shape=(32,5,5,32), image_shape=(32,16,16,batch_size), activation='vshape', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004) conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool2.output, filter_shape=(32,5,5,64), image_shape=(32,8,8,batch_size), activation='vshape', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004) layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2) #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0) fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0, epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.03) fc_10 = LogisticRegression(input=fc_64.output,rng=rng5,n_in=64,n_out=10,initW=0.1, epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.03) ####build the models: cost = fc_10.negative_log_likelihood(y) test_model = theano.function([index], fc_10.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], fc_10.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) Ws = [conv_pool1.W, conv_pool2.W, conv_pool3.W, fc_64.W, fc_10.W] pgradWs = [conv_pool1.grad_W, conv_pool2.grad_W, conv_pool3.grad_W, fc_64.grad_W, fc_10.grad_W] bs = [conv_pool1.b, conv_pool2.b, conv_pool3.b, fc_64.b, fc_10.b] pgradbs = [conv_pool1.grad_b, conv_pool2.grad_b, conv_pool3.grad_b, fc_64.grad_b, fc_10.grad_b] momWs = [conv_pool1.momW, conv_pool2.momW, conv_pool3.momW, fc_64.momW, fc_10.momW] momBs = [conv_pool1.momB, conv_pool2.momB, conv_pool3.momB, fc_64.momB, fc_10.momB] wcs = [conv_pool1.wc, conv_pool2.wc, conv_pool3.wc, fc_64.wc, fc_10.wc] epsWs = [conv_pool1.epsW, conv_pool2.epsW, conv_pool3.epsW, fc_64.epsW, fc_10.epsW] epsBs = [conv_pool1.epsB, conv_pool2.epsB, conv_pool3.epsB, fc_64.epsB, fc_10.epsB] gradWs = T.grad(cost, Ws) gradbs = T.grad(cost, bs) updates = [] for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs): grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i updates.append((W_i, W_i+grad_i)) updates.append((pgW_i,grad_i)) for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs): grad_i = - epsB_i*gradb_i + momB_i*pgB_i updates.append((b_i, b_i+grad_i)) updates.append((pgB_i,grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 #below is the code for reduce learning_rate ########################################### if epoch == 50: epsWs = [k/10.0 for k in epsWs] epsBs = [k/10.0 for k in epsBs] print 'reduce eps by a factor of 10' updates = [] for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs): grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i updates.append((W_i, W_i+grad_i)) updates.append((pgW_i,grad_i)) for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs): grad_i = - epsB_i*gradb_i + momB_i*pgB_i updates.append((b_i, b_i+grad_i)) updates.append((pgB_i,grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############################################## for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter conv_pool1.bestW = conv_pool1.W.get_value().copy() conv_pool1.bestB = conv_pool1.b.get_value().copy() conv_pool2.bestW = conv_pool2.W.get_value().copy() conv_pool2.bestB = conv_pool2.b.get_value().copy() conv_pool3.bestW = conv_pool3.W.get_value().copy() conv_pool3.bestB = conv_pool3.b.get_value().copy() fc_64.bestW = fc_64.W.get_value().copy() fc_64.bestB = fc_64.b.get_value().copy() fc_10.bestW = fc_10.W.get_value().copy() fc_10.bestB = fc_10.b.get_value().copy() ##saving current best print 'saving current best params..' current_params = (conv_pool1.bestW,conv_pool1.bestB,conv_pool2.bestW, conv_pool2.bestB,conv_pool3.bestW,conv_pool3.bestB,fc_64.bestW,fc_64.bestB, fc_10.bestW,fc_10.bestB,momWs,momBs,epsWs,epsBs,wcs) outfile = file('current_best_params.pkl','wb') cPickle.dump(current_params,outfile) outfile.close() # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
poolsize=(1, kmaxEntities)) layers.append(cnnEntities) hidden_in = 2 * (2 * nkernsContext * kmaxContext + nkernsEntities * kmaxEntities) hiddenLayer = HiddenLayer(rng=rng, n_in=hidden_in, n_out=hiddenUnits) layers.append(hiddenLayer) hiddenLayerET = HiddenLayer(rng=rng, n_in=2 * nkernsContext * kmaxContext + nkernsEntities * kmaxEntities, n_out=hiddenUnitsET) layers.append(hiddenLayerET) randomInit = False if doCRF: randomInit = True outputLayer = LogisticRegression(n_in=hiddenUnits, n_out=numClasses, rng=rng, randomInit=randomInit) layers.append(outputLayer) outputLayerET = LogisticRegression(n_in=hiddenUnitsET, n_out=numClassesET, rng=rng, randomInit=randomInit) layers.append(outputLayerET) if doCRF: crfLayer = CRF(numClasses=numClasses + numClassesET, rng=rng, batchsizeVar=batchsizeVar, sequenceLength=3) layers.append(crfLayer) x1_resh = x1.reshape((batchsizeVar * numPerBag, contextsize))
def build_model(self, flag_preserve_params=False): logging.info('... building the model') # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer(rng=self.rng, input=self.x, n_in=self.n_in, n_out=self.n_hidden, activation=self.hidden_activation) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=self.n_hidden, n_out=self.n_out, activation=self.logreg_activation) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params self.cost = self.negative_log_likelihood(self.y) \ + self.alpha_l1 * self.L1 \ + self.alpha_l2 * self.L2_sqr self.grads = T.grad(self.cost, self.params) # fixed batch size based prediction self.predict_proba_batch = theano.function( [self.x], self.logRegressionLayer.p_y_given_x) self.predict_batch = theano.function( [self.x], T.argmax(self.logRegressionLayer.p_y_given_x, axis=1)) self.predict_cost_batch = theano.function([self.x, self.y], self.cost, allow_input_downcast=True)
def train_cifar(learning_rate_base=1.0,batch_size=128,n_epochs=200,test_frequency=1300, check_point_frequency=5000,show_progress_frequency=100): check_point_path = '/home/chensi/mylocal/sichen/data/check_points/' parser = optparse.OptionParser() parser.add_option("-f",dest="filename", default='None') (options, args) = parser.parse_args() #defining the rngs rng1 = numpy.random.RandomState(23455) rng2 = numpy.random.RandomState(12423) rng3 = numpy.random.RandomState(23245) rng4 = numpy.random.RandomState(12123) rng5 = numpy.random.RandomState(25365) train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4','data_batch_5']) test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test') n_training_batches = train_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_training_batches /= batch_size n_test_batches /= batch_size index = T.lscalar() x = T.matrix('x') y = T.ivector('y') img_input = x.reshape((batch_size,3,32,32)) #bc01 img_input = img_input.dimshuffle(1,2,3,0) #c01b ##################### #defining the layers# ##################### if options.filename == 'None': print 'start new training...' print 'building model...' conv1_input = img_input conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=conv1_input, filter_shape=(3,5,5,32), image_shape=(3,32,32,batch_size), activation='relu', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.0001,initB=0,partial_sum=4, pooling='max', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004, name='conv1' ) conv_pool2_input = drop_out_layer(rng2,conv_pool1.output,0.5) conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool2_input, filter_shape=(32,5,5,32), image_shape=(32,16,16,batch_size), activation='relu', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004, name='conv2') conv_pool3_input = drop_out_layer(rng3,conv_pool2.output,0.5) conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool3_input, filter_shape=(32,5,5,64), image_shape=(32,8,8,batch_size), activation='relu', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=0.004, name='conv3') layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2) #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0) fc_2_input = drop_out_layer(rng1,input=layer4_input,p=0.5) fc_2 = LogisticRegression(input=fc_2_input,rng=rng5,n_in=64*4*4,n_out=10,initW=0.01, epsW=0.001, epsB=0.002, momW=0.9, momB=0.9, wc=1.0, name='fc2') else: print 'resume training %s...' % options.filename params_file = open(check_point_path+options.filename,'rb') params = cPickle.load(params_file) params_file.close() layer1_W = theano.shared(params[0],borrow=True) layer1_b = theano.shared(params[1],borrow=True) layer2_W = theano.shared(params[2],borrow=True) layer2_b = theano.shared(params[3],borrow=True) layer3_W = theano.shared(params[4],borrow=True) layer3_b = theano.shared(params[5],borrow=True) fc10_W = theano.shared(params[6],borrow=True) fc10_b = theano.shared(params[7],borrow=True) print 'building model...' conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input, filter_shape=(3,5,5,32), image_shape=(3,32,32,batch_size), poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.0001,initB=0,partial_sum=4, activation='relu', pooling='max', epsW=0.001, epsB=0.001, momW=0.9, momB=0.9, wc=0.004, name='conv1', W1=layer1_W, b1=layer1_b ) conv_pool2_input = drop_out_layer(rng2,conv_pool1.output,0.5) conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool2_input, filter_shape=(32,5,5,32), image_shape=(32,16,16,batch_size), poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', activation='relu', epsW=0.001, epsB=0.001, momW=0.9, momB=0.9, wc=0.004, name='conv2', W1=layer2_W, b1=layer2_b ) conv_pool3_input = drop_out_layer(rng3,conv_pool2.output,0.5) conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool3_input, filter_shape=(32,5,5,64), image_shape=(32,8,8,batch_size), poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', activation='relu', epsW=0.001, epsB=0.001, momW=0.9, momB=0.9, wc=0.004, name='conv3', W1=layer3_W, b1=layer3_b ) layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2) #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0) fc_2_input = drop_out_layer(rng1,input=layer4_input,p=0.5) fc_2 = LogisticRegression(input=fc_2_input,rng=rng5,n_in=64*4*4,n_out=10,initW=0.01, epsW=0.001, epsB=0.001, momW=0.9, momB=0.9, wc=1.0, W=fc10_W, b=fc10_b, name='fc2' ) all_layers = [conv_pool1,conv_pool2,conv_pool3,fc_2] ############################################# ############### test model################### ############################################# print 'building test model...' conv1_input_test = img_input conv_pool1_test = LeNetConvPoolLayer(rng=rng1,input=conv1_input_test, filter_shape=(3,5,5,32), image_shape=(3,32,32,batch_size), activation='relu', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.0001,initB=0,partial_sum=4, pooling='max', W1=conv_pool1.W*0.5, b1=conv_pool1.b, name='conv1' ) conv_pool2_test = LeNetConvPoolLayer(rng=rng2,input=conv_pool1_test.output, filter_shape=(32,5,5,32), image_shape=(32,16,16,batch_size), activation='relu', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', W1=conv_pool2.W*0.5, b1=conv_pool2.b, name='conv2') conv_pool3_test = LeNetConvPoolLayer(rng=rng3,input=conv_pool2_test.output, filter_shape=(32,5,5,64), image_shape=(32,8,8,batch_size), activation='relu', poolsize=(3,3),poolstride=2,pad=2, convstride=1,initW=0.01,initB=0,partial_sum=4, pooling='average', W1=conv_pool3.W*0.5, b1=conv_pool3.b, name='conv3') layer4_input_test = conv_pool3_test.output.dimshuffle(3,0,1,2).flatten(2) fc_2_test = LogisticRegression(input=layer4_input_test,rng=rng5,n_in=64*4*4,n_out=10,initW=0.01, W=fc_2.W, b=fc_2.b, name='fc2') #cost_test = fc_2_test.negative_log_likelihood(y) test_model = theano.function(inputs=[index], outputs=fc_2_test.errors(y), givens={ x:test_set_x[index*batch_size: (index+1)*batch_size], y:test_set_y[index*batch_size: (index+1)*batch_size] }) ########train model cost = fc_2.negative_log_likelihood(y) Ws = [] pgradWs = [] bs = [] pgradbs = [] momWs = [] mombs = [] epsWs = [] epsbs = [] wcs = [] for i in range(len(all_layers)): Ws.append(all_layers[i].W) pgradWs.append(all_layers[i].grad_W) bs.append(all_layers[i].b) pgradbs.append(all_layers[i].grad_b) momWs.append(all_layers[i].momW) mombs.append(all_layers[i].momB) epsWs.append(all_layers[i].epsW) epsbs.append(all_layers[i].epsB) wcs.append(all_layers[i].wc) gradWs = T.grad(cost, Ws) gradbs = T.grad(cost, bs) updates = [] for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws, gradWs, momWs, wcs, epsWs, pgradWs): epsW_i *= learning_rate_base grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i updates.append((W_i, W_i+grad_i)) updates.append((pgW_i, grad_i)) for b_i, gradb_i, momb_i, epsb_i, pgb_i in zip(bs, gradbs, mombs, epsbs, pgradbs): grad_i = - epsb_i*gradb_i + momb_i*pgb_i updates.append((b_i, b_i+grad_i)) updates.append((pgb_i,grad_i)) train_model = theano.function(inputs=[index],outputs=[cost,fc_2.errors(y)],updates=updates, givens={ x: train_set_x[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size] }) ############# #train model# ############# print 'training...' best_validation_loss = numpy.inf best_epoch = 0 epoch = 0 pweights = [] pbias = [] for i in range(len(all_layers)): pweights.append(numpy.mean(numpy.abs(all_layers[i].W.get_value()[0,:]))) pbias.append(numpy.mean(numpy.abs(all_layers[i].b.get_value()))) time_start = time.time() start_time = time.time() while(epoch<n_epochs): epoch = epoch + 1 for minibatch_index in range(n_training_batches): iter = (epoch-1)*n_training_batches + minibatch_index train_out = train_model(minibatch_index) if iter % show_progress_frequency == 0: time_end = time.time() print 'epoch: %d, batch_num: %d, cost: %f, training_error: %f, (%f seconds)' % (epoch, minibatch_index, train_out[0], train_out[1], time_end-time_start) time_start = time.time() if (iter+1) % test_frequency == 0: time1 = time.time() test_losses = [test_model(i) for i in range(n_test_batches)] this_test_loss = numpy.mean(test_losses) print '=====================testing output===========================' print 'epoch: %d, batch_num: %d, test_error: %f ' % (epoch, minibatch_index, this_test_loss*100.) for i in range(len(all_layers)): weights = numpy.mean(numpy.abs(all_layers[i].W.get_value()[0,:])) bias = numpy.mean(numpy.abs(all_layers[i].b.get_value())) print 'Layer: %s, weights[0]:%e [%e]' % (all_layers[i].name, weights*1.00, weights-pweights[i]) print 'Layer: %s,bias: %e[%e]' % (all_layers[i].name, bias*1.00, bias-pbias[i]) pweights[i] = weights pbias[i] = bias if this_test_loss < best_validation_loss: best_epoch = epoch best_validation_loss = this_test_loss best_params = [] for i in range(len(all_layers)): best_params.append(all_layers[i].W.get_value().copy()) best_params.append(all_layers[i].b.get_value().copy()) outfile_name = check_point_path+'current_best_params.pkl' outfile = open(outfile_name,'wb') cPickle.dump(best_params,outfile) outfile.close() print 'saved best params to %s' % outfile_name time2 = time.time() print '==================================================(%f seconds)' % (time2-time1) if (iter+1) % check_point_frequency == 0: print '~~~~~~~~~~~~~~~~~~saving check_point~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' time1 = time.time() current_params = [] for i in range(len(all_layers)): current_params.append(all_layers[i].W.get_value().copy()) current_params.append(all_layers[i].b.get_value().copy()) outfile_name = check_point_path + 'current_params_' + str(time.localtime().tm_mon) + '_' + str(time.localtime().tm_mday) \ + '_' + str(time.localtime().tm_hour) + '_' + str(time.localtime().tm_min) + '_' + str(time.localtime().tm_sec)+'.pkl' outfile = open(outfile_name,'wb') cPickle.dump(current_params,outfile) outfile.close() print 'saved check_point to %s' % outfile_name time2 = time.time() print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~(%f seconds)' % (time2-time1) end_time = time.time() print 'Best test score is %f at epoch %d. Total time:%f hour' % (best_validation_loss * 100., best_epoch, (end_time-start_time)/3600.)
class DBN(object): def __init__(self, input, output, n_in, hidden_layers_sizes, n_out, dropout=None, optimizer=SGD, is_train=0): self.dense_layers = [] self.rbm_layers = [] self.params = [] self.consider_constants = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 self.rng = np.random.RandomState(888) self.theano_rng = RandomStreams(self.rng.randint(2**30)) for i in range(self.n_layers): if i == 0: input_size = n_in layer_input = input else: input_size = hidden_layers_sizes[i - 1] layer_input = self.dense_layers[-1].output dense_layer = DenseLayer(rng=self.rng, theano_rng=self.theano_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.softplus, dropout=dropout, is_train=is_train) rbm_layer = RBM(input=layer_input, rng=self.rng, theano_rng=self.theano_rng, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=dense_layer.W, hbias=dense_layer.b, dropout=dropout, h_activation=T.nnet.softplus, optimizer=optimizer, is_train=is_train) self.dense_layers.append(dense_layer) self.rbm_layers.append(rbm_layer) self.params.extend(dense_layer.params) if dense_layer.consider_constant is not None: self.consider_constants.extend(dense_layer.consider_constant) # end-for self.logistic_layer = LogisticRegression( input=self.dense_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.logistic_layer.params) self.finetune_cost = self.logistic_layer.negative_loglikelihood(output) self.finetune_errors = self.logistic_layer.errors(output) self.input = input self.output = output self.is_train = is_train # model updates self.finetune_opt = optimizer(self.params) def _finetune_updates(self, learning_rate): return self.finetune_opt.update(self.finetune_cost, self.params, learning_rate, self.consider_constants) def build_pretraining_functions(self, datasets, batch_size, k=1): train_set_x = datasets[0][0] valid_set_x = datasets[1][0] index = T.lscalar('index') # index to a [mini]batch learning_rate = T.scalar('learning_rate') self.rbm_pretraining_fns = [] self.rbm_pretraining_errors = [] batch_begin = index * batch_size batch_end = batch_begin + batch_size for n, rbm_layer in enumerate(self.rbm_layers): persistent_chain = theano.shared(value=np.zeros( shape=(batch_size, rbm_layer.n_hidden), dtype=theano.config.floatX), borrow=True) rbm_cost, rbm_updates = rbm_layer.get_cost_updates( learning_rate, persistent_chain, k) train_rbm = theano.function(inputs=[index, learning_rate], outputs=rbm_cost, updates=rbm_updates, givens={ self.input: train_set_x[batch_begin:batch_end], rbm_layer.is_train: T.cast(1, 'int32') }, name='train_rbm' + '_' + str(n)) self.rbm_pretraining_fns.append(train_rbm) validate_rbm = theano.function( inputs=[index], outputs=rbm_layer.get_valid_error(), givens={ self.input: valid_set_x[batch_begin:batch_end], rbm_layer.is_train: T.cast(0, 'int32') }, name='valid_rbm' + '_' + str(n)) self.rbm_pretraining_errors.append(validate_rbm) # end-for return self.rbm_pretraining_fns, self.rbm_pretraining_errors def build_finetune_functions(self, datasets, batch_size): (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] index = T.lscalar('index') # index to a [mini]batch learning_rate = T.scalar('learning_rate') batch_begin = index * batch_size batch_end = batch_begin + batch_size test_model = theano.function(inputs=[index], outputs=self.finetune_errors, givens={ self.input: test_set_x[batch_begin:batch_end], self.output: test_set_y[batch_begin:batch_end], self.is_train: T.cast(0, 'int32') }) validate_model = theano.function( inputs=[index], outputs=self.finetune_errors, givens={ self.input: valid_set_x[batch_begin:batch_end], self.output: valid_set_y[batch_begin:batch_end], self.is_train: T.cast(0, 'int32') }) train_model = theano.function( inputs=[index, learning_rate], outputs=self.finetune_cost, updates=self._finetune_updates(learning_rate), givens={ self.input: train_set_x[batch_begin:batch_end], self.output: train_set_y[batch_begin:batch_end], self.is_train: T.cast(1, 'int32') }) return train_model, validate_model, test_model
def __init__(self, configfile, train=False): self.slotList = [ "N", "per:age", "per:alternate_names", "per:children", "per:cause_of_death", "per:date_of_birth", "per:date_of_death", "per:employee_or_member_of", "per:location_of_birth", "per:location_of_death", "per:locations_of_residence", "per:origin", "per:schools_attended", "per:siblings", "per:spouse", "per:title", "org:alternate_names", "org:date_founded", "org:founded_by", "org:location_of_headquarters", "org:members", "org:parents", "org:top_members_employees" ] typeList = [ "O", "PERSON", "LOCATION", "ORGANIZATION", "DATE", "NUMBER" ] self.config = readConfig(configfile) self.addInputSize = 1 logger.info("additional mlp input") wordvectorfile = self.config["wordvectors"] logger.info("wordvectorfile " + wordvectorfile) networkfile = self.config["net"] logger.info("networkfile " + networkfile) hiddenunits = int(self.config["hidden"]) logger.info("hidden units " + str(hiddenunits)) hiddenunitsNer = hiddenunits if "hiddenunitsNER" in self.config: hiddenunitsNer = int(self.config["hiddenunitsNER"]) representationsizeNER = 50 if "representationsizeNER" in self.config: representationsizeNER = int(self.config["representationsizeNER"]) learning_rate = float(self.config["lrate"]) logger.info("learning rate " + str(learning_rate)) if train: self.batch_size = int(self.config["batchsize"]) else: self.batch_size = 1 logger.info("batch size " + str(self.batch_size)) self.filtersize = [1, int(self.config["filtersize"])] nkerns = [int(self.config["nkerns"])] logger.info("nkerns " + str(nkerns)) pool = [1, int(self.config["kmax"])] self.contextsize = int(self.config["contextsize"]) logger.info("contextsize " + str(self.contextsize)) if self.contextsize < self.filtersize[1]: logger.info("setting filtersize to " + str(self.contextsize)) self.filtersize[1] = self.contextsize logger.info("filtersize " + str(self.filtersize)) sizeAfterConv = self.contextsize - self.filtersize[1] + 1 sizeAfterPooling = -1 if sizeAfterConv < pool[1]: logger.info("setting poolsize to " + str(sizeAfterConv)) pool[1] = sizeAfterConv sizeAfterPooling = pool[1] logger.info("kmax pooling: k = " + str(pool[1])) # reading word vectors self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile) self.representationsize = self.vectorsize + 1 rng = numpy.random.RandomState( 23455 ) # not relevant, parameters will be overwritten by stored model anyways if train: seed = rng.get_state()[1][0] logger.info("seed: " + str(seed)) numSFclasses = 23 numNERclasses = 6 # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.xa = T.matrix('xa') # left context self.xb = T.matrix('xb') # middle context self.xc = T.matrix('xc') # right context self.y = T.imatrix('y') # label (only present in training) self.yNER1 = T.imatrix( 'yNER1') # label for first entity (only present in training) self.yNER2 = T.imatrix( 'yNER2') # label for second entity (only present in training) ishape = [self.representationsize, self.contextsize] # this is the size of context matrizes ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') # Reshape input matrix to be compatible with LeNetConvPoolLayer layer0a_input = self.xa.reshape( (self.batch_size, 1, ishape[0], ishape[1])) layer0b_input = self.xb.reshape( (self.batch_size, 1, ishape[0], ishape[1])) layer0c_input = self.xc.reshape( (self.batch_size, 1, ishape[0], ishape[1])) y_reshaped = self.y.reshape((self.batch_size, 1)) yNER1reshaped = self.yNER1.reshape((self.batch_size, 1)) yNER2reshaped = self.yNER2.reshape((self.batch_size, 1)) # Construct convolutional pooling layer: filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1]) poolsize = (pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) convB = theano.shared(value=b_values, borrow=True) self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) layer0aflattened = self.layer0a.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0bflattened = self.layer0b.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0cflattened = self.layer0c.output.flatten(2).reshape( (self.batch_size, nkerns[0] * sizeAfterPooling)) layer0outputSF = T.concatenate( [layer0aflattened, layer0bflattened, layer0cflattened], axis=1) layer0outputSFsize = 3 * (nkerns[0] * sizeAfterPooling) layer0outputNER1 = T.concatenate([layer0aflattened, layer0bflattened], axis=1) layer0outputNER2 = T.concatenate([layer0bflattened, layer0cflattened], axis=1) layer0outputNERsize = 2 * (nkerns[0] * sizeAfterPooling) layer2ner1 = HiddenLayer(rng, input=layer0outputNER1, n_in=layer0outputNERsize, n_out=hiddenunitsNer, activation=T.tanh) layer2ner2 = HiddenLayer(rng, input=layer0outputNER2, n_in=layer0outputNERsize, n_out=hiddenunitsNer, activation=T.tanh, W=layer2ner1.W, b=layer2ner1.b) # concatenate additional features to sentence representation self.additionalFeatures = T.matrix('additionalFeatures') self.additionalFeatsShaped = self.additionalFeatures.reshape( (self.batch_size, 1)) layer2SFinput = T.concatenate( [layer0outputSF, self.additionalFeatsShaped], axis=1) layer2SFinputSize = layer0outputSFsize + self.addInputSize layer2SF = HiddenLayer(rng, input=layer2SFinput, n_in=layer2SFinputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3rel = LogisticRegression(input=layer2SF.output, n_in=hiddenunits, n_out=numSFclasses) layer3et = LogisticRegression(input=layer2ner1.output, n_in=hiddenunitsNer, n_out=numNERclasses) scoresForR1 = layer3rel.getScores(layer2SF.output) scoresForE1 = layer3et.getScores(layer2ner1.output) scoresForE2 = layer3et.getScores(layer2ner2.output) self.crfLayer = CRF(numClasses=numSFclasses + numNERclasses, rng=rng, batchsizeVar=self.batch_size, sequenceLength=3) scores = T.zeros((self.batch_size, 3, numSFclasses + numNERclasses)) scores = T.set_subtensor(scores[:, 0, numSFclasses:], scoresForE1) scores = T.set_subtensor(scores[:, 1, :numSFclasses], scoresForR1) scores = T.set_subtensor(scores[:, 2, numSFclasses:], scoresForE2) self.scores = scores self.y_conc = T.concatenate([ yNER1reshaped + numSFclasses, y_reshaped, yNER2reshaped + numSFclasses ], axis=1) # create a list of all model parameters self.paramList = [ self.crfLayer.params, layer3rel.params, layer3et.params, layer2SF.params, layer2ner1.params, self.layer0a.params ] self.params = [] for p in self.paramList: self.params += p logger.info(p) if not train: self.gotNetwork = 1 # load parameters if not os.path.isfile(networkfile): logger.error("network file does not exist") self.gotNetwork = 0 else: save_file = open(networkfile, 'rb') for p in self.params: p.set_value(cPickle.load(save_file), borrow=False) save_file.close() self.relation_scores_global = self.crfLayer.getProbForClass( self.scores, numSFclasses) self.predictions_global = self.crfLayer.getPrediction(self.scores)
def __init__(self, numpy_rng, train_set_x, train_set_y, hidden_layers_sizes, n_ins=784, n_outs=10): """ This class is made to support a variable number of layers. :type numpy_rng: np.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type train_set_x: theano.shared float32 :param: train_set_x: Training data set, shape (n_samples, n_pixels) :type train_set_y: theano.shared, int32 :param: train_set_x: GT for training data, shape (n_samples) :type n_ins: int :param n_ins: dimension of the input to the SAE :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.AE_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.train_set_x = train_set_x self.train_set_y = train_set_y assert self.n_layers > 0 self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of for i in xrange(self.n_layers): # used to be n layers # construct the sigmoid layer = encoder stack if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=(n_ins if i == 0 else hidden_layers_sizes[i - 1]), n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # init the DA_layer, takes weights from sigmoid layer AE_layer = AutoEncoder( numpy_rng=numpy_rng, input=layer_input, n_visible=(n_ins if i == 0 else hidden_layers_sizes[i - 1]), n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.AE_layers.append(AE_layer) # on top of the layers # log layer for fine-tuning self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
class StackedAutoEncoder(object): """Stacked auto-encoder class (SAE) Adopted from: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/SdA.py A stacked autoencoder (SAE) model is obtained by stacking several AEs. The hidden layer of the AE at layer `i` becomes the input of the AE at layer `i+1`. The first layer AE gets as input the input of the SAE, and the hidden layer of the last AE represents the output. Note that after pretraining, the SAE is dealt with as a normal MLP, the AEs are only used to initialize the weights. """ def __init__(self, numpy_rng, train_set_x, train_set_y, hidden_layers_sizes, n_ins=784, n_outs=10): """ This class is made to support a variable number of layers. :type numpy_rng: np.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type train_set_x: theano.shared float32 :param: train_set_x: Training data set, shape (n_samples, n_pixels) :type train_set_y: theano.shared, int32 :param: train_set_x: GT for training data, shape (n_samples) :type n_ins: int :param n_ins: dimension of the input to the SAE :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.AE_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.train_set_x = train_set_x self.train_set_y = train_set_y assert self.n_layers > 0 self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of for i in xrange(self.n_layers): # used to be n layers # construct the sigmoid layer = encoder stack if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer( rng=numpy_rng, input=layer_input, n_in=(n_ins if i == 0 else hidden_layers_sizes[i - 1]), n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # init the DA_layer, takes weights from sigmoid layer AE_layer = AutoEncoder( numpy_rng=numpy_rng, input=layer_input, n_visible=(n_ins if i == 0 else hidden_layers_sizes[i - 1]), n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.AE_layers.append(AE_layer) # on top of the layers # log layer for fine-tuning self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, batch_size): """ Generates a list of functions to time each AE training. :type batch_size: int :param batch_size: size of a [mini]batch """ index = T.lscalar('index') # index to a minibatch # beginning of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size forward_backward_step = [] forward_step_fns = [] i = 0 for AE in self.AE_layers: # get the cost and the updates list cost = AE.get_cost_updates() params = AE.params shared_cost = theano.shared(np.float32(0.0)) forward_step_fns.append( theano.function([index], [], updates=[(shared_cost, cost)], givens={ self.x: self.train_set_x[batch_begin:batch_end], })) grads_temp = T.grad(cost, params) # This is both forward and backward forward_backward_step.append( theano.function([index], grads_temp, givens={ self.x: self.train_set_x[batch_begin:batch_end], })) i += 1 return forward_backward_step, forward_step_fns def build_finetune_functions(self, batch_size): index = T.lscalar('index') # index to a [mini]batch # beginning of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size cost = self.finetune_cost shared_cost = theano.shared(np.float32(0.0)) forward_mlp = theano.function( [index], [], updates=[(shared_cost, cost)], givens={ self.x: self.train_set_x[batch_begin:batch_end], self.y: self.train_set_y[batch_begin:batch_end], }) grads_temp = T.grad(cost, self.params) # This is both forward and backward forward_backward_mlp = theano.function( [index], grads_temp, givens={ self.x: self.train_set_x[batch_begin:batch_end], self.y: self.train_set_y[batch_begin:batch_end], }) return forward_mlp, forward_backward_mlp
layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h , n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) # change the number of output labels cost = layer4.negative_log_likelihood(y) classify = theano.function([index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) # load weights print 'loading weights state' f = file('weights.save', 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(cPickle.load(f))
def train_nnet(learning_rate=0.1, n_epochs=2, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500): rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch data = T.matrix('x') result = T.matrix('y') # the labels are presented as 1D vector [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') init_input = data.reshape((batch_size, 1, 16, 16)) # Check for pkl file holding old weights old_weights = [[None, None]] * 4; try: old_weights = pickle.load(open(sys.argv[1], "rb")) except FileNotFoundError as e: print(e) except IndexError: pass # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=init_input, image_shape=(batch_size, 1, 16, 16), filter_shape=(nkerns[0], 1, 5, 5), oldWeights=old_weights[0][0], oldBias=old_weights[0][1], poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 6, 6), filter_shape=(nkerns[1], nkerns[0], 3, 3), oldWeights=old_weights[1][0], oldBias=old_weights[1][1], poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 2 * 2, n_out=64, oldWeights=old_weights[2][0], oldBias=old_weights[2][1], activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=64, n_out=256, oldWeights=old_weights[3][0], oldBias=old_weights[3][1], ) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(result) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(result), givens={ data: test_set_x[index * batch_size: (index + 1) * batch_size], result: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(result), givens={ data: valid_set_x[index * batch_size: (index + 1) * batch_size], result: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ data: train_set_x[index * batch_size: (index + 1) * batch_size], result: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() done = False for epoch in range(1, n_epochs + 1): for minibatch_index in range(int(n_train_batches)): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(int(n_valid_batches))] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(int(n_test_batches)) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done = True break if done: break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) weights = [] weights.append([layer0.W.get_value(), layer0.b.get_value()]) weights.append([layer1.W.get_value(), layer1.b.get_value()]) weights.append([layer2.W.get_value(), layer2.b.get_value()]) weights.append([layer3.W.get_value(), layer3.b.get_value()]) pickle.dump(weights, open("mlp.pkl", "wb"))
def __init__(self, input, output, n_in, hidden_layers_sizes, n_out, dropout=None, optimizer=SGD, is_train=0): self.dense_layers = [] self.rbm_layers = [] self.params = [] self.consider_constants = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 self.rng = np.random.RandomState(888) self.theano_rng = RandomStreams(self.rng.randint(2**30)) for i in range(self.n_layers): if i == 0: input_size = n_in layer_input = input else: input_size = hidden_layers_sizes[i - 1] layer_input = self.dense_layers[-1].output dense_layer = DenseLayer(rng=self.rng, theano_rng=self.theano_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.softplus, dropout=dropout, is_train=is_train) rbm_layer = RBM(input=layer_input, rng=self.rng, theano_rng=self.theano_rng, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=dense_layer.W, hbias=dense_layer.b, dropout=dropout, h_activation=T.nnet.softplus, optimizer=optimizer, is_train=is_train) self.dense_layers.append(dense_layer) self.rbm_layers.append(rbm_layer) self.params.extend(dense_layer.params) if dense_layer.consider_constant is not None: self.consider_constants.extend(dense_layer.consider_constant) # end-for self.logistic_layer = LogisticRegression( input=self.dense_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_out) self.params.extend(self.logistic_layer.params) self.finetune_cost = self.logistic_layer.negative_loglikelihood(output) self.finetune_errors = self.logistic_layer.errors(output) self.input = input self.output = output self.is_train = is_train # model updates self.finetune_opt = optimizer(self.params)
def train_rep( learning_rate=0.002, L1_reg=0.0002, L2_reg=0.005, n_epochs=200, nkerns=[20, 50], batch_size=25, ): rng = numpy.random.RandomState(23455) train_dir = "../out/h5/" valid_dir = "../out/h5/" weights_dir = "./weights/" print("... load input data") filename = train_dir + "rep_train_data_1.gzip.h5" datasets = load_initial_data(filename) train_set_x, train_set_y, shared_train_set_y = datasets filename = valid_dir + "rep_valid_data_1.gzip.h5" datasets = load_initial_data(filename) valid_set_x, valid_set_y, shared_valid_set_y = datasets mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets # compute number of minibatches for training, validation and testing n_all_train_batches = 30000 n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_all_train_batches /= batch_size n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of # [int] labels # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2), ) # TODO: incase of flt_time < in_time the output dimension will be different layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2), ) layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2), ) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer( rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh, ) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = ( layer4.params + layer3.params + layer2.params + layer1.params + layer0.params ) # symbolic Theano variable that represents the L1 regularization term L1 = ( T.sum(abs(layer4.params[0])) + T.sum(abs(layer3.params[0])) + T.sum(abs(layer2.params[0])) + T.sum(abs(layer1.params[0])) + T.sum(abs(layer0.params[0])) ) # symbolic Theano variable that represents the squared L2 term L2_sqr = ( T.sum(layer4.params[0] ** 2) + T.sum(layer3.params[0] ** 2) + T.sum(layer2.params[0] ** 2) + T.sum(layer1.params[0] ** 2) + T.sum(layer0.params[0] ** 2) ) # the loss cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr # create a list of gradients for all model parameters grads = T.grad(cost, params) updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) ############### # TRAIN MODEL # ############### print("... training") start_time = time.clock() epoch = 0 done_looping = False cost_ij = 0 train_files_num = 600 val_files_num = 100 startc = time.clock() while (epoch < n_epochs) and (not done_looping): endc = time.clock() print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0))) startc = time.clock() epoch = epoch + 1 for nTrainSet in range(1, train_files_num + 1): # load next train data if nTrainSet % 50 == 0: print("training @ nTrainSet = ", nTrainSet, ", cost = ", cost_ij) filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5" datasets = load_next_data(filename) ns_train_set_x, ns_train_set_y = datasets train_set_x.set_value(ns_train_set_x, borrow=True) shared_train_set_y.set_value( numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # train for minibatch_index in range(n_train_batches): # training itself # -------------------------------------- cost_ij = train_model(minibatch_index) # ------------------------- # at the end of each epoch run validation this_validation_loss = 0 for nValSet in range(1, val_files_num + 1): filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5" datasets = load_next_data(filename) ns_valid_set_x, ns_valid_set_y = datasets valid_set_x.set_value(ns_valid_set_x, borrow=True) shared_valid_set_y.set_value( numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True ) n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss += numpy.mean(validation_losses) this_validation_loss /= val_files_num print(( "epoch %i, minibatch %i/%i, validation error %f %%" % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0, ) )) # save snapshots print("saving weights state, epoch = ", epoch) f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb") state_L0 = layer0.__getstate__() pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL) state_L1 = layer1.__getstate__() pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL) state_L2 = layer2.__getstate__() pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL) state_L3 = layer3.__getstate__() pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL) state_L4 = layer4.__getstate__() pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() end_time = time.clock() print ("Optimization complete.") print(( "The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) ), file=sys.stderr)
def __init__(self, configfile, train = False): self.config = readConfig(configfile) self.addInputSize = 1 logger.info("additional mlp input") wordvectorfile = self.config["wordvectors"] logger.info("wordvectorfile " + str(wordvectorfile)) networkfile = self.config["net"] logger.info("networkfile " + str(networkfile)) hiddenunits = int(self.config["hidden"]) logger.info("hidden units " + str(hiddenunits)) hiddenunitsNER = 50 if "hiddenunitsNER" in self.config: hiddenunitsNER = int(self.config["hiddenunitsNER"]) logger.info("hidden units NER " + str(hiddenunitsNER)) learning_rate = float(self.config["lrate"]) logger.info("learning rate " + str(learning_rate)) if train: self.batch_size = int(self.config["batchsize"]) else: self.batch_size = 1 logger.info("batch size " + str(self.batch_size)) self.filtersize = [1,int(self.config["filtersize"])] nkerns = [int(self.config["nkerns"])] logger.info("nkerns " + str(nkerns)) pool = [1, int(self.config["kmax"])] self.contextsize = int(self.config["contextsize"]) logger.info("contextsize " + str(self.contextsize)) if self.contextsize < self.filtersize[1]: logger.info("setting filtersize to " + str(self.contextsize)) self.filtersize[1] = self.contextsize logger.info("filtersize " + str(self.filtersize)) sizeAfterConv = self.contextsize - self.filtersize[1] + 1 sizeAfterPooling = -1 if sizeAfterConv < pool[1]: logger.info("setting poolsize to " + str(sizeAfterConv)) pool[1] = sizeAfterConv sizeAfterPooling = pool[1] logger.info("kmax pooling: k = " + str(pool[1])) # reading word vectors self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile) self.representationsize = self.vectorsize + 1 rng = numpy.random.RandomState(23455) if train: seed = rng.get_state()[1][0] logger.info("seed: " + str(seed)) # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.xa = T.matrix('xa') # left context self.xb = T.matrix('xb') # middle context self.xc = T.matrix('xc') # right context self.y = T.imatrix('y') # label (only present in training) self.yNER1 = T.imatrix('yNER1') # label for first entity self.yNER2 = T.imatrix('yNER2') # label for second entity ishape = [self.representationsize, self.contextsize] # this is the size of context matrizes ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') # Reshape input matrix to be compatible with our LeNetConvPoolLayer layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1])) self.y_reshaped = self.y.reshape((self.batch_size, 1)) yNER1reshaped = self.yNER1.reshape((self.batch_size, 1)) yNER2reshaped = self.yNER2.reshape((self.batch_size, 1)) # Construct convolutional pooling layer: filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1]) poolsize=(pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) convB = theano.shared(value=b_values, borrow=True) self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3) layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1) self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh) self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b) layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1) layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER self.additionalFeatures = T.matrix('additionalFeatures') additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1)) layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1) layer2_inputSize += self.addInputSize self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23) # create a list of all model parameters self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params] self.params = [] for p in self.paramList: self.params += p logger.info(p) if not train: self.gotNetwork = 1 # load parameters if not os.path.isfile(networkfile): logger.error("network file does not exist") self.gotNetwork = 0 else: save_file = open(networkfile, 'rb') for p in self.params: p.set_value(cPickle.load(save_file), borrow=False) save_file.close()
def build_lenet(config): rng = np.random.RandomState(23455) x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector image_width = config.image_width batch_size = config.batch_size image_size = image_width**2 x_shared = T.cast(theano.shared(np.random.rand(batch_size, image_size), borrow=True), theano.config.floatX) y_shared = T.cast(theano.shared(np.random.randint(config.ydim, size=batch_size), borrow=True), 'int32') layer0_input = x.reshape((batch_size, 1, image_width, image_width)) # construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, image_width, image_width), filter_shape=(config.num_kerns[0], 1, 5, 5), poolsize=(2, 2) ) # construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, config.num_kerns[0], 12, 12), filter_shape=(config.num_kerns[1], config.num_kerns[0], 5, 5), poolsize=(2, 2) ) layer2_input = layer1.output.flatten(2) # construct a fully-connected layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=config.num_kerns[1] * 4 * 4, n_out=500, activation=relu ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=config.ydim) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params_W = [layer3.W, layer2.W, layer1.W, layer0.W] params_b = [layer3.b, layer2.b, layer1.b, layer0.b] params = params_W + params_b shared_cost = theano.shared(np.float32(0.0)) grads_temp = T.grad(cost, params) start_compilation = time.time() forward_step = theano.function([], [], updates=[(shared_cost, cost)], givens={x: x_shared, y: y_shared}) forward_backward_step = theano.function([], grads_temp, givens={x: x_shared, y: y_shared}) print 'compilation time: %.4f s' % (time.time() - start_compilation) return forward_step, forward_backward_step
class StackedAutoEncoder(object): """Stacked auto-encoder class (SAE) Adopted from: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/SdA.py A stacked autoencoder (SAE) model is obtained by stacking several AEs. The hidden layer of the AE at layer `i` becomes the input of the AE at layer `i+1`. The first layer AE gets as input the input of the SAE, and the hidden layer of the last AE represents the output. Note that after pretraining, the SAE is dealt with as a normal MLP, the AEs are only used to initialize the weights. """ def __init__( self, numpy_rng, train_set_x, train_set_y, hidden_layers_sizes, n_ins=784, n_outs=10 ): """ This class is made to support a variable number of layers. :type numpy_rng: np.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type train_set_x: theano.shared float32 :param: train_set_x: Training data set, shape (n_samples, n_pixels) :type train_set_y: theano.shared, int32 :param: train_set_x: GT for training data, shape (n_samples) :type n_ins: int :param n_ins: dimension of the input to the SAE :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.AE_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.train_set_x = train_set_x self.train_set_y = train_set_y assert self.n_layers > 0 self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of for i in xrange(self.n_layers): # used to be n layers # construct the sigmoid layer = encoder stack if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=(n_ins if i == 0 else hidden_layers_sizes[i-1]), n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # init the DA_layer, takes weights from sigmoid layer AE_layer = AutoEncoder( numpy_rng=numpy_rng, input=layer_input, n_visible=(n_ins if i == 0 else hidden_layers_sizes[i-1]), n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.AE_layers.append(AE_layer) # on top of the layers # log layer for fine-tuning self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, batch_size): """ Generates a list of functions to time each AE training. :type batch_size: int :param batch_size: size of a [mini]batch """ index = T.lscalar('index') # index to a minibatch # beginning of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size forward_backward_step = [] forward_step_fns = [] i = 0 for AE in self.AE_layers: # get the cost and the updates list cost = AE.get_cost_updates() params = AE.params shared_cost = theano.shared(np.float32(0.0)) forward_step_fns.append( theano.function( [index], [], updates=[(shared_cost, cost)], givens={ self.x: self.train_set_x[batch_begin: batch_end], })) grads_temp = T.grad(cost, params) # This is both forward and backward forward_backward_step.append( theano.function( [index], grads_temp, givens={ self.x: self.train_set_x[batch_begin: batch_end], })) i += 1 return forward_backward_step, forward_step_fns def build_finetune_functions(self, batch_size): index = T.lscalar('index') # index to a [mini]batch # beginning of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size cost = self.finetune_cost shared_cost = theano.shared(np.float32(0.0)) forward_mlp = theano.function( [index], [], updates=[(shared_cost, cost)], givens={ self.x: self.train_set_x[batch_begin: batch_end], self.y: self.train_set_y[batch_begin: batch_end], }) grads_temp = T.grad(cost, self.params) # This is both forward and backward forward_backward_mlp = theano.function( [index], grads_temp, givens={ self.x: self.train_set_x[batch_begin: batch_end], self.y: self.train_set_y[batch_begin: batch_end], }) return forward_mlp, forward_backward_mlp
def __init__( self, numpy_rng, train_set_x, train_set_y, hidden_layers_sizes, n_ins=784, n_outs=10 ): """ This class is made to support a variable number of layers. :type numpy_rng: np.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type train_set_x: theano.shared float32 :param: train_set_x: Training data set, shape (n_samples, n_pixels) :type train_set_y: theano.shared, int32 :param: train_set_x: GT for training data, shape (n_samples) :type n_ins: int :param n_ins: dimension of the input to the SAE :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.AE_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.train_set_x = train_set_x self.train_set_y = train_set_y assert self.n_layers > 0 self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of for i in xrange(self.n_layers): # used to be n layers # construct the sigmoid layer = encoder stack if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=(n_ins if i == 0 else hidden_layers_sizes[i-1]), n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # init the DA_layer, takes weights from sigmoid layer AE_layer = AutoEncoder( numpy_rng=numpy_rng, input=layer_input, n_visible=(n_ins if i == 0 else hidden_layers_sizes[i-1]), n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.AE_layers.append(AE_layer) # on top of the layers # log layer for fine-tuning self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def prepare_network(): rng = numpy.random.RandomState(23455) print('Preparing Theano model...') mydatasets = load_initial_test_data() test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets n_test_batches = test_set_x.get_value(borrow=True).shape[0] # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) // 2 layer1_h = (layer0_h - 4) // 2 layer2_w = (layer1_w - 2) // 2 layer2_h = (layer1_h - 2) // 2 layer3_w = (layer2_w - 2) // 2 layer3_h = (layer2_h - 2) // 2 ###################### # BUILD NETWORK # ###################### # image sizes batchsize = 1 in_channels = 20 in_width = 50 in_height = 50 #filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batchsize, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batchsize, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batchsize:(index + 1) * batchsize], y: test_set_y[index * batchsize:(index + 1) * batchsize] }) print('Loading network weights...') weightFile = '../live_count/weights.save' f = open(weightFile, 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(pickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize
class lenet5(object): def __init__(self, learning_rate=0.1, n_epochs=200, nkerns=[20, 50], batch_size=500, img_size=28, img_dim=1, filtersize=(5, 5), poolsize=(2, 2), num_hidden=500, num_class=10, shuffle=True, cost_type ='nll_softmax', alpha_l1 = 0, alpha_l2 = 0, alpha_entropy=0, rng = np.random.RandomState(23455), logreg_activation=T.nnet.softmax, hidden_activation=relu, conv_activation=relu): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ ##################### # assign parameters # self.learning_rate = learning_rate self.n_epochs = n_epochs self.nkerns = nkerns self.batch_size = batch_size self.img_size = img_size self.img_dim = img_dim self.filtersize = filtersize self.poolsize = poolsize self.num_hidden = num_hidden self.num_class = num_class self.shuffle = shuffle self.cost_type = cost_type self.alpha_l1 = alpha_l1 self.alpha_l2 = alpha_l2 self.alpha_entropy = alpha_entropy self.rng = rng self.logreg_activation = logreg_activation self.conv_activation = conv_activation self.hidden_activation = hidden_activation # assign parameters # ##################### # call build model to build theano and other expressions self.build_model() self.build_functions() # end def __init__ def build_model(self, flag_preserve_params=False): ################### # build the model # logging.info('... building the model') # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images # self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels, used to represent labels given by # data # the y as features, used for taking in intermediate layer "y" values self.y = T.matrix('y') # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input, image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size), filter_shape=(self.nkerns[0], self.img_dim, self.filtersize[0], self.filtersize[0]), poolsize=(self.poolsize[0], self.poolsize[0]), activation=self.conv_activation) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0] self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output, image_shape=(self.batch_size, self.nkerns[0], self.img_size1, self.img_size1), filter_shape=(self.nkerns[1], self.nkerns[0], self.filtersize[1], self.filtersize[1]), poolsize=(self.poolsize[1], self.poolsize[1]), activation=self.conv_activation) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) self.layer2_input = self.layer1.output.flatten(2) self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1] # construct a fully-connected sigmoidal layer self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, n_in=self.nkerns[1] * self.img_size2 * self.img_size2, n_out=self.num_hidden, activation=self.hidden_activation) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=self.num_hidden, n_out=self.num_class, activation=self.logreg_activation) # regularization term self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \ self.alpha_l2 * (self.layer2.W ** 2).sum() self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \ self.alpha_l2 * (self.layer3.W ** 2).sum() # there's different choices of cost models if self.cost_type == 'nll_softmax': # the cost we minimize during training is the NLL of the model self.y = T.ivector('y') # index involved so has to use integer self.cost = self.layer3.negative_log_likelihood(self.y) + \ self.decay_hidden + self.decay_softmax + \ self.alpha_entropy * self.layer3.p_y_entropy elif self.cost_type == 'ssd_softmax': self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \ self.decay_hidden + self.decay_softmax elif self.cost_type == 'ssd_hidden': self.cost = T.mean((self.layer2.output - self.y) ** 2) + \ self.decay_hidden elif self.cost_type == 'ssd_conv': self.cost = T.mean((self.layer2_input - self.y) ** 2) # create a list of all model parameters to be fit by gradient descent # preserve parameters if the exist, used for keep parameter while # changing # some of the theano functions # but the user need to be aware that if the parameters should be kept # only if the network structure doesn't change if flag_preserve_params and hasattr(self, 'params'): pass params_temp = copy.deepcopy(self.params) else: params_temp = None self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params # if needed, assign old parameters if flag_preserve_params and (params_temp is not None): for ind in range(len(params_temp)): self.params[ind].set_value(params_temp[ind].get_value(), borrow=True) # create a list of gradients for all model parameters self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn') # error function from the last layer logistic regression self.errors = self.layer3.errors # the above line will cause the crash of cPickle, need to use # __getstate__ and __setstate__ to deal with it # build the model # ################### # end def build_model def build_functions(self): # prediction methods self.fcns = {} self.fcns['predict_proba_batch'] = theano.function([self.x], self.layer3.p_y_given_x) self.fcns['predict_batch'] = theano.function([self.x], T.argmax(self.layer3.p_y_given_x, axis=1)) self.fcns['predict_hidden_batch'] = theano.function([self.x], self.layer2.output) self.fcns['predict_convout_batch'] = theano.function([self.x], self.layer2_input) # self.predict_proba_batch = theano.function([self.x], self.layer3.p_y_given_x) # self.predict_batch = theano.function([self.x], T.argmax(self.layer3.p_y_given_x, axis=1)) # self.predict_hidden_batch = theano.function([self.x], self.layer2.output) # self.predict_convout_batch = theano.function([self.x], self.layer2_input) # cost function for a single batch # suitable for negative_log_likelihood input y self.fcns['predict_cost_batch'] = theano.function([self.x, self.y], self.cost, allow_input_downcast=True) # predict entropy # this function is for debugging purpose self.fcns['predict_entropy_batch'] = theano.function([self.x], self.layer3.p_y_entropy) def predict_cost(self, X, y): return cost_batch_to_any_size(self.batch_size, self.fcns['predict_cost_batch'], X, y) # end def predict_cost def predict_proba(self, X): return batch_to_anysize(self.batch_size, self.fcns['predict_proba_batch'], X) # end def predict_proba def predict(self, X): return batch_to_anysize(self.batch_size, self.fcns['predict_batch'], X) # end def predict def predict_hidden(self, X): return batch_to_anysize(self.batch_size, self.fcns['predict_hidden_batch'], X) # end def predict_hidden def predict_convout(self, X): return batch_to_anysize(self.batch_size, self.fcns['predict_convout_batch'], X) # end def predict_convout # copy weight parameters from another lenet5 def copy_weights(self, clf): # check the whether should copy if type(clf) is lenet5 and self.nkerns == clf.nkerns and self.img_size == clf.img_size and self.filtersize == clf.filtersize and self.poolsize == clf.poolsize and self.num_hidden == self.num_hidden and self.num_class == clf.num_class: self.set_weights(clf.params) else: print "Weight's not copied, the input classifier doesn't match the original classifier" # end def copy_params def set_weights(self, params_other): ''' set weights from other trained network or recorded early stopping. Use this function with caution, because it doesn't check whether the weights are safe to copied ''' for ind in range(len(params_other)): self.params[ind].set_value(params_other[ind].get_value(), borrow=True) # end def set_weights ################################# # dealing with cPickle problems # def __getstate__(self): print '__getstate__ executed' saved_weights = [] for param in self.params: saved_weights.append(param.get_value()) list_to_del = ["index", "x", "y", "layer0_input", "layer0", "img_size1", "layer1", "layer2_input", "img_size2", "layer2", "layer3", "decay_hidden", "decay_softmax", "cost", "params", "grads", "errors", "fcns", ] state = self.__dict__.copy() state['saved_weights'] = saved_weights for key in state.keys(): if key in list_to_del: del state[key] # del state['errors'] # del state['fcns'] return state # end def __getstate__ def __setstate__(self, state): print '__setstate__ executed' self.__dict__ = state # self.errors = self.layer3.errors self.build_model() self.build_functions() for ind in range(len(state['saved_weights'])): self.params[ind].set_value(state['saved_weights'][ind])
input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2), ) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer( rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh, ) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], }, ) # load weights print("loading weights state") f = open("weights.save", "rb")
def build_model(self, flag_preserve_params=False): ################### # build the model # logging.info('... building the model') # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.x = T.matrix('x') # the data is presented as rasterized images # self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels, used to represent labels given by # data # the y as features, used for taking in intermediate layer "y" values self.y = T.matrix('y') # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input, image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size), filter_shape=(self.nkerns[0], self.img_dim, self.filtersize[0], self.filtersize[0]), poolsize=(self.poolsize[0], self.poolsize[0]), activation=self.conv_activation) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0] self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output, image_shape=(self.batch_size, self.nkerns[0], self.img_size1, self.img_size1), filter_shape=(self.nkerns[1], self.nkerns[0], self.filtersize[1], self.filtersize[1]), poolsize=(self.poolsize[1], self.poolsize[1]), activation=self.conv_activation) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) self.layer2_input = self.layer1.output.flatten(2) self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1] # construct a fully-connected sigmoidal layer self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, n_in=self.nkerns[1] * self.img_size2 * self.img_size2, n_out=self.num_hidden, activation=self.hidden_activation) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=self.num_hidden, n_out=self.num_class, activation=self.logreg_activation) # regularization term self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \ self.alpha_l2 * (self.layer2.W ** 2).sum() self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \ self.alpha_l2 * (self.layer3.W ** 2).sum() # there's different choices of cost models if self.cost_type == 'nll_softmax': # the cost we minimize during training is the NLL of the model self.y = T.ivector('y') # index involved so has to use integer self.cost = self.layer3.negative_log_likelihood(self.y) + \ self.decay_hidden + self.decay_softmax + \ self.alpha_entropy * self.layer3.p_y_entropy elif self.cost_type == 'ssd_softmax': self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \ self.decay_hidden + self.decay_softmax elif self.cost_type == 'ssd_hidden': self.cost = T.mean((self.layer2.output - self.y) ** 2) + \ self.decay_hidden elif self.cost_type == 'ssd_conv': self.cost = T.mean((self.layer2_input - self.y) ** 2) # create a list of all model parameters to be fit by gradient descent # preserve parameters if the exist, used for keep parameter while # changing # some of the theano functions # but the user need to be aware that if the parameters should be kept # only if the network structure doesn't change if flag_preserve_params and hasattr(self, 'params'): pass params_temp = copy.deepcopy(self.params) else: params_temp = None self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params # if needed, assign old parameters if flag_preserve_params and (params_temp is not None): for ind in range(len(params_temp)): self.params[ind].set_value(params_temp[ind].get_value(), borrow=True) # create a list of gradients for all model parameters self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn') # error function from the last layer logistic regression self.errors = self.layer3.errors
class deep_sugar(object): def __init__(self, numpy_rng, theano_rng=None, y=None, alpha=0.9, sample_rate=0.1, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1], allX=None,allY=None,srng=None): self.sigmoid_layers = [] self.sugar_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.allXs = [] if y == None: self.y = tensor.ivector(name='y') else: self.y = y assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.x = tensor.matrix('x') self.x = tensor.matrix('x') self.y = tensor.ivector('y') self.y = tensor.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output if i == 0: self.allXs.append(allX) else: self.allXs.append(tensor.dot(self.allXs[i-1], self.sigmoid_layers[-1].W) + self.sigmoid_layers[-1].b) sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tensor.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) sugar_layer = sugar(numpy_rng=numpy_rng, alpha=alpha, sample_rate=sample_rate, x=layer_input, y=self.y, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b, allX=self.allXs[i], allY=allY, srng=srng) self.sugar_layers.append(sugar_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, train_set_x, train_set_y, batch_size): index = tensor.lscalar('index') index = tensor.lscalar('index') corruption_level = tensor.scalar('corruption') corruption_level = tensor.scalar('corruption') learning_rate = tensor.scalar('lr') learning_rate = tensor.scalar('lr') switch = tensor.iscalar('switch') n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size batch_begin = index * batch_size batch_end = batch_begin + batch_size pretrain_fns = [] for sugar in self.sugar_layers: cost, updates = sugar.get_cost_updates(corruption_level, learning_rate, switch) fn = function(inputs=[index, Param(corruption_level, default=0.2), Param(learning_rate, default=0.1), Param(switch, default=1)], outputs=[cost], updates=updates, givens={self.x: train_set_x[batch_begin:batch_end], self.y: train_set_y[batch_begin:batch_end]}, on_unused_input='ignore') pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size index = tensor.lscalar('index') gparams = tensor.grad(self.finetune_cost, self.params) updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate)) train_fn = function(inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) test_score_i = function([index], self.errors, givens={ self.x: test_set_x[index * batch_size: (index + 1) * batch_size], self.y: test_set_y[index * batch_size: (index + 1) * batch_size]}) valid_score_i = function([index], self.errors, givens={ self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) def valid_score(): return [valid_score_i(i) for i in xrange(n_valid_batches)] def test_score(): return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score, test_score
def __init__(self, numpy_rng, theano_rng=None, y=None, alpha=0.9, sample_rate=0.1, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1], allX=None, allY=None, srng=None): self.sigmoid_layers = [] self.sugar_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.allXs = [] if y == None: self.y = tensor.ivector(name='y') else: self.y = y assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) self.x = tensor.matrix('x') self.x = tensor.matrix('x') self.y = tensor.ivector('y') self.y = tensor.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output if i == 0: self.allXs.append(allX) else: self.allXs.append( tensor.dot(self.allXs[i - 1], self.sigmoid_layers[-1].W) + self.sigmoid_layers[-1].b) sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tensor.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) sugar_layer = sugar(numpy_rng=numpy_rng, alpha=alpha, sample_rate=sample_rate, x=layer_input, y=self.y, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b, allX=self.allXs[i], allY=allY, srng=srng) self.sugar_layers.append(sugar_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop): lr = params["lr"] batch_size = params["batch_size"] sequence_length = params["seq_length"] # minibatch) X = T.matrix(name="input", dtype=dtype) # batch of sequence of vector Y = T.matrix(name="output", dtype=dtype) # batch of sequence of vector is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction #CNN global parameters. subsample = (1, 1) p_1 = 0.5 border_mode = "same" cnn_batch_size = batch_size pool_size = (2, 2) #Layer1: conv2+pool+drop filter_shape = (128, 1, 10, 10) input_shape = (cnn_batch_size, 1, 144, 176 ) #input_shape= (samples, channels, rows, cols) input = X.reshape(input_shape) c1 = ConvLayer(rng, input, filter_shape, input_shape, border_mode, subsample, activation=nn.relu) p1 = PoolLayer(c1.output, pool_size=pool_size, input_shape=c1.output_shape) dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train) #Layer2: conv2+pool subsample = (1, 1) filter_shape = (256, p1.output_shape[1], 3, 3) c2 = ConvLayer(rng, dl1.output, filter_shape, p1.output_shape, border_mode, subsample, activation=nn.relu) p2 = PoolLayer(c2.output, pool_size=pool_size, input_shape=c2.output_shape) #Layer3: conv2+pool filter_shape = (256, p2.output_shape[1], 3, 3) c3 = ConvLayer(rng, p2.output, filter_shape, p2.output_shape, border_mode, subsample, activation=nn.relu) p3 = PoolLayer(c3.output, pool_size=pool_size, input_shape=c3.output_shape) #Layer4: conv2+pool filter_shape = (128, p3.output_shape[1], 3, 3) c4 = ConvLayer(rng, p3.output, filter_shape, p3.output_shape, border_mode, subsample, activation=nn.relu) p4 = PoolLayer(c4.output, pool_size=pool_size, input_shape=c4.output_shape) #Layer5: hidden n_in = reduce(lambda x, y: x * y, p4.output_shape[1:]) x_flat = p4.output.flatten(2) h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu) #Layer6: hidden lreg = LogisticRegression(rng, h1.output, 1024, params['n_output']) self.output = lreg.y_pred self.params = c1.params + c2.params + c3.params + c4.params + h1.params + lreg.params cost = get_err_fn(self, cost_function, Y) L2_reg = 0.0001 L2_sqr = theano.shared(0.) for param in self.params: L2_sqr += (T.sum(param[0]**2) + T.sum(param[1]**2)) cost += L2_reg * L2_sqr _optimizer = optimizer(cost, self.params, lr=lr) self.train = theano.function(inputs=[X, Y, is_train], outputs=cost, updates=_optimizer.getUpdates(), allow_input_downcast=True) self.predictions = theano.function(inputs=[X, is_train], outputs=self.output, allow_input_downcast=True) self.n_param = count_params(self.params)
class deep_sugar(object): def __init__(self, numpy_rng, theano_rng=None, y=None, alpha=0.9, sample_rate=0.1, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1], allX=None, allY=None, srng=None): self.sigmoid_layers = [] self.sugar_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.allXs = [] if y == None: self.y = tensor.ivector(name='y') else: self.y = y assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) self.x = tensor.matrix('x') self.x = tensor.matrix('x') self.y = tensor.ivector('y') self.y = tensor.ivector('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output if i == 0: self.allXs.append(allX) else: self.allXs.append( tensor.dot(self.allXs[i - 1], self.sigmoid_layers[-1].W) + self.sigmoid_layers[-1].b) sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=tensor.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) sugar_layer = sugar(numpy_rng=numpy_rng, alpha=alpha, sample_rate=sample_rate, x=layer_input, y=self.y, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b, allX=self.allXs[i], allY=allY, srng=srng) self.sugar_layers.append(sugar_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, train_set_x, train_set_y, batch_size): index = tensor.lscalar('index') index = tensor.lscalar('index') corruption_level = tensor.scalar('corruption') corruption_level = tensor.scalar('corruption') learning_rate = tensor.scalar('lr') learning_rate = tensor.scalar('lr') switch = tensor.iscalar('switch') n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size batch_begin = index * batch_size batch_end = batch_begin + batch_size pretrain_fns = [] for sugar in self.sugar_layers: cost, updates = sugar.get_cost_updates(corruption_level, learning_rate, switch) fn = function(inputs=[ index, Param(corruption_level, default=0.2), Param(learning_rate, default=0.1), Param(switch, default=1) ], outputs=[cost], updates=updates, givens={ self.x: train_set_x[batch_begin:batch_end], self.y: train_set_y[batch_begin:batch_end] }, on_unused_input='ignore') pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size index = tensor.lscalar('index') gparams = tensor.grad(self.finetune_cost, self.params) updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate)) train_fn = function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }) test_score_i = function( [index], self.errors, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size], self.y: test_set_y[index * batch_size:(index + 1) * batch_size] }) valid_score_i = function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) def valid_score(): return [valid_score_i(i) for i in xrange(n_valid_batches)] def test_score(): return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score, test_score
def __init__(self, nkerns=[48, 48, 48, 48], miniBatchSize=200): rng = numpy.random.RandomState(23455) nClasses = 2 nMaxPool = 2 nHidden = 200 self.p = 95 #self.x = T.tensor3('x') # membrane data set self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # labels := 1D vector of [int] labels self.miniBatchSize = miniBatchSize # Reshape matrix of rasterized images # to a 4D tensor, # compatible with the LeNetConvPoolLayer #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p)) layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 95 -> 92 -> 46 #-------------------------------------------------- fs0 = 4 # filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 46) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, 1, self.p, self.p), filter_shape=(nkerns[0], 1, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # layer1 convolution+max pool reduces image dimensions by: # 46 -> 42 -> 21 #-------------------------------------------------- fs1 = 5 # filter size, layer 1 os1 = (os0 - fs1 + 1) / nMaxPool # image out size 1 assert (os1 == 21) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(self.miniBatchSize, nkerns[0], os0, os0), filter_shape=(nkerns[1], nkerns[0], fs1, fs1), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 2 # layer2 convolution+max pool reduces image dimensions by: # 21 -> 18 -> 9 #-------------------------------------------------- fs2 = 4 os2 = (os1 - fs2 + 1) / nMaxPool assert (os2 == 9) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(self.miniBatchSize, nkerns[0], os1, os1), filter_shape=(nkerns[2], nkerns[1], fs2, fs2), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 3 # layer3 convolution+max pool reduces image dimensions by: # 9 -> 6 -> 3 #-------------------------------------------------- fs3 = 4 os3 = (os2 - fs3 + 1) / nMaxPool assert (os3 == 3) layer3 = LeNetConvPoolLayer(rng, input=layer2.output, image_shape=(self.miniBatchSize, nkerns[0], os2, os2), filter_shape=(nkerns[3], nkerns[2], fs3, fs3), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 4 # Fully connected sigmoidal layer, goes from # 3*3*48 ~ 450 -> 200 #-------------------------------------------------- layer4_input = layer3.output.flatten(2) layer4 = HiddenLayer(rng, input=layer4_input, n_in=nkerns[3] * os3 * os3, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 5 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer5 = LogisticRegression(input=layer4.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2, layer3, layer4, layer5)
def test_conv(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], kern_shape=[9,7], batch_size=200, verbose=False, loadmodel=False): """ learning_rate: term for the gradient n_epochs: maximal number of epochs before exiting nkerns: number of kernels on each layer kern_shape: list of numbers with the dimensions of the kernels batch_szie: number of examples in minibatch. verbose: to print out epoch summary or not to loadmodel: load parameters from a saved .npy file """ # Folder for saving and loading parameters folder='results' # Seed the random generator rng = numpy.random.RandomState(1990) # Load the dataset datasets = load_faceScrub(theano_shared=True) # Functions for saving and loading parameters def save(folder): for param in params: print (str(param.name)) numpy.save(os.path.join(folder, param.name + '.npy'), param.get_value()) def load(folder): for param in params: param.set_value(numpy.load(os.path.join(folder, param.name + '.npy'))) # Accassing the train,test and validation set train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ############### # BUILD MODEL # ############### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 1 * 100 * 100) # to a 4D tensor, which is expected by theano layer0_input = x.reshape((batch_size, 1, 100, 100)) # First convolutional pooling layer layer0 = ConvPoolLayer( rng, input=layer0_input, image_shape=((batch_size, 1, 100, 100)), filter_shape=((nkerns[0], 1, kern_shape[0], kern_shape[0])), poolsize=((2,2)), idx=0 ) # Second layer layer1 = ConvPoolLayer( rng, input=layer0.output, image_shape=((batch_size, nkerns[0], 46, 46)), filter_shape=((nkerns[1], nkerns[0], kern_shape[1], kern_shape[1])), poolsize=((2,2)), idx=1 ) # Flatten input for the fully connected layer layer2_input = layer1.output.flatten(2) # Fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=(nkerns[1]*20*20), n_out=(500), activation=T.tanh ) # Output layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=530) # Cost function cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # Calculate validation error validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # Parameter list which needs update params = layer3.params + layer2.params + layer1.params + layer0.params # Load the parameters if we want if loadmodel == True: load(folder) # Gradient of costfunction w.r.t. parameters grads = T.grad(cost, params) # Gradient decent for every parameters updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] # Theano function for calculating the cost and updating the model train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) print('... training') train_net(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) # Save parameters after training save(folder)
def __init__(self, nkerns=[48, 48, 48], miniBatchSize=200, nHidden=200, nClasses=2, nMaxPool=2, nChannels=1): """ nClasses : the number of target classes (e.g. 2 for binary classification) nMaxPool : number of pixels to max pool nChannels : number of input channels (e.g. 1 for single grayscale channel) """ rng = numpy.random.RandomState(23455) self.p = 65 self.miniBatchSize = miniBatchSize # Note: self.x and self.y will be re-bound to a subset of the # training/validation/test data dynamically by the update # stage of the appropriate function. self.x = T.tensor4('x') # membrane mini-batch self.y = T.ivector('y') # 1D vector of [int] labels # We now assume the input will already be reshaped to the # proper size (i.e. we don't need a theano resize op here). layer0_input = self.x #-------------------------------------------------- # LAYER 0 # layer0 convolution+max pool reduces image dimensions by: # 65 -> 62 -> 31 #-------------------------------------------------- fs0 = 4 # conv. filter size, layer 0 os0 = (self.p - fs0 + 1) / nMaxPool # image out size 0 assert (os0 == 31) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(self.miniBatchSize, nChannels, self.p, self.p), filter_shape=(nkerns[0], nChannels, fs0, fs0), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 1 # layer1 convolution+max pool reduces image dimensions by: # 31 -> 28 -> 14 #-------------------------------------------------- fs1 = 4 # filter size, layer 1 os1 = (os0 - fs1 + 1) / nMaxPool # image out size 1 assert (os1 == 14) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(self.miniBatchSize, nkerns[0], os0, os0), filter_shape=(nkerns[1], nkerns[0], fs1, fs1), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 2 # layer2 convolution+max pool reduces image dimensions by: # 14 -> 10 -> 5 #-------------------------------------------------- fs2 = 5 os2 = (os1 - fs2 + 1) / nMaxPool assert (os2 == 5) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(self.miniBatchSize, nkerns[1], os1, os1), filter_shape=(nkerns[2], nkerns[1], fs2, fs2), poolsize=(nMaxPool, nMaxPool)) #-------------------------------------------------- # LAYER 3 # Fully connected sigmoidal layer, goes from # 5*5*48 -> 200 #-------------------------------------------------- layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * os2 * os2, n_out=nHidden, activation=T.tanh) #-------------------------------------------------- # LAYER 4 # Classification via a logistic regression layer # 200 -> 2 #-------------------------------------------------- # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=nHidden, n_out=nClasses) self.layers = (layer0, layer1, layer2, layer3, layer4)
class SentConv(object): def __init__(self, learning_rate=0.1, L1_reg=0.00, L2_reg=0.0001, filter_hs=[3, 4, 5], filter_num=100, n_hidden=100, n_out=2, word_idx_map=None, wordvec=None, k=300, adjust_input=False): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) """ self.learning_rate = learning_rate self.L1_reg = L1_reg self.L2_reg = L2_reg self.word_idx_map = word_idx_map rng = np.random.RandomState(3435) self.rng = rng self.k = k self.filter_num = filter_num self.filter_hs = filter_hs # Can be assigned at the fit step. self.batch_size = None self.epoch = 0 self.Words = theano.shared(value=wordvec, name="Words") X = T.matrix('X') Y = T.ivector('Y') self.X = X self.Y = Y layer0_input = self.Words[T.cast(X.flatten(), dtype='int32')].reshape((X.shape[0], X.shape[1], self.Words.shape[1])) self.layer0_input = layer0_input c_max_list = [] self.conv_layer_s = [] test_case = [] for filter_h in filter_hs: conv_layer = ConvLayer(rng, layer0_input, filter_h=filter_h, filter_num=filter_num, k=k) self.conv_layer_s.append(conv_layer) c_max_list.append(conv_layer.c_max) max_pooling_out = T.concatenate(c_max_list, axis=1) max_pooling_out_size = filter_num * len(filter_hs) self.hidden_layer = HiddenLayer(rng, max_pooling_out, max_pooling_out_size, n_hidden) self.lr_layer = LogisticRegression( input=self.hidden_layer.output, n_in=n_hidden, n_out=n_out, ) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = ( sum([abs(conv_layer.W).sum() for conv_layer in self.conv_layer_s]) + abs(self.hidden_layer.W).sum() + abs(self.lr_layer.W).sum() ) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ( sum([(conv_layer.W ** 2).sum() for conv_layer in self.conv_layer_s]) + (self.hidden_layer.W ** 2).sum() + (self.lr_layer.W ** 2).sum() ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.cost = ( self.negative_log_likelihood(Y) + self.L1_reg * self.L1 + self.L2_reg * self.L2_sqr ) # the parameters of the model are the parameters of the two layer it is # made out of self.params = [] # also adjust the input word vectors if adjust_input: self.params.append(self.Words) for conv_layer in self.conv_layer_s: self.params += conv_layer.params self.params += self.hidden_layer.params self.params += self.lr_layer.params # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer def negative_log_likelihood(self, Y): return self.lr_layer.negative_log_likelihood(Y) # same holds for the function computing the number of errors def errors(self, Y): return self.lr_layer.errors(Y) def fit(self, datasets, batch_size=50, n_epochs=400): train_x, train_y, valid_x, valid_y = datasets self.batch_size = batch_size # compute number of minibatches for training, validation and testing train_len = train_x.get_value(borrow=True).shape[0] valid_len = valid_x.get_value(borrow=True).shape[0] n_train_batches = train_len / batch_size if train_len % batch_size != 0: n_train_batches += 1 n_valid_batches = valid_len / batch_size if valid_len % batch_size != 0: n_valid_batches += 1 print 'number of train mini batch: %s' % n_train_batches ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch X = self.X Y = self.Y learn_rate = T.scalar('Learning Rate') # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(self.cost, param) for param in self.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learn_rate * gparam) for param, gparam in zip(self.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index, learn_rate], outputs=self.cost, updates=updates, givens={ X: train_x[index * batch_size: (index + 1) * batch_size], Y: train_y[index * batch_size: (index + 1) * batch_size] } ) test_train_model = theano.function( inputs=[index], outputs=self.errors(Y), givens={ X: train_x[index * batch_size: (index + 1) * batch_size], Y: train_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=self.errors(Y), givens={ X: valid_x[index * batch_size:(index + 1) * batch_size], Y: valid_y[index * batch_size:(index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.9999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() done_looping = False last_cost = np.inf sys.stdout.flush() logger.info('already traned number of epochs: %s' % self.epoch) epoch = self.epoch while (epoch < n_epochs) and (not done_looping): epoch += 1 avg_cost_list = [] for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index, self.learning_rate) avg_cost_list.append(minibatch_avg_cost) # print self.lr_layer.W.get_value() # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # print self.lr_layer.W.get_value() # print self.lr_layer.b.get_value() # train_losses = [test_train_model(i) for i in xrange(n_train_batches)] # this_train_loss = np.mean(train_losses) # # compute zero-one loss on validation set # validation_losses = [validate_model(i) for i # in xrange(n_valid_batches)] # this_validation_loss = np.mean(validation_losses) # train_all_precison, train_label_precision, train_label_recall = \ # self.test(train_x, train_y.eval()) # this_train_loss = 1 - train_all_precison valid_all_precison, valid_label_precision, valid_label_recall = \ self.test(valid_x, valid_y.eval()) this_validation_loss = 1 - valid_all_precison avg_cost = np.mean(avg_cost_list) if avg_cost >= last_cost: self.learning_rate *= 0.95 last_cost = avg_cost logger.info( 'epoch %i, learning rate: %f, avg_cost: %f, valid P: %f %%, valid_1_P: %s, valid_1_R: %s' % ( epoch, self.learning_rate, avg_cost, # (1 - this_train_loss) * 100, (1 - this_validation_loss) * 100., # train_label_precision[1], # train_label_recall[1], valid_label_precision[1], valid_label_recall[1] ) ) sys.stdout.flush() # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): # Increase patience_increase times based on the current iteration. patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break self.epoch = epoch end_time = timeit.default_timer() logger.info(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i') % (( 1 - best_validation_loss) * 100., best_iter + 1)) logger.info('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) def save(self, path): with open(path, 'wb') as f: pickle.dump(self, f, -1) logger.info('save model to path %s' % path) return None @classmethod def load(self, path): with open(path, 'rb') as f: return pickle.load(f) def predict(self, shared_x, batch_size=None): if not batch_size: batch_size = self.batch_size shared_x_len = shared_x.get_value(borrow=True).shape[0] n_batches = shared_x_len / batch_size if shared_x_len % batch_size != 0: n_batches += 1 index = T.lscalar() # index to a [mini]batch X = self.X predict_model = theano.function( inputs=[index], outputs=self.lr_layer.y_pred, givens={ X: shared_x[index * batch_size:(index + 1) * batch_size] } ) pred_y = np.concatenate([predict_model(i) for i in range(n_batches)]) return pred_y def test(self, shared_x, data_y, out_path=None): pred_y = self.predict(shared_x) if out_path: with codecs.open(out_path, 'wb') as f: f.writelines(['%s\t%s\n' % (x, y) for x, y in zip(data_y, pred_y)]) return evaluate(data_y, pred_y) def test_from_file(self, path, out_path=None, encoding='utf-8'): data_x = [] data_y = [] with codecs.open(path, 'rb', encoding=encoding) as f: for i, line in enumerate(f): tokens = line.strip('\n').split('\t') if len(tokens) != 2: raise ValueError('invalid line %s' % (i+1)) label = int(tokens[0]) sent = tokens[1] s = get_idx_from_sent(sent, self.word_idx_map) data_x.append(s) data_y.append(label) shared_x = theano.shared( value=np.asarray(data_x, dtype=theano.config.floatX), borrow='True' ) return self.test(shared_x, data_y, out_path=out_path)
class CNN: def __init__(self, configfile, train = False): self.config = readConfig(configfile) self.addInputSize = 1 logger.info("additional mlp input") wordvectorfile = self.config["wordvectors"] logger.info("wordvectorfile " + str(wordvectorfile)) networkfile = self.config["net"] logger.info("networkfile " + str(networkfile)) hiddenunits = int(self.config["hidden"]) logger.info("hidden units " + str(hiddenunits)) hiddenunitsNER = 50 if "hiddenunitsNER" in self.config: hiddenunitsNER = int(self.config["hiddenunitsNER"]) logger.info("hidden units NER " + str(hiddenunitsNER)) learning_rate = float(self.config["lrate"]) logger.info("learning rate " + str(learning_rate)) if train: self.batch_size = int(self.config["batchsize"]) else: self.batch_size = 1 logger.info("batch size " + str(self.batch_size)) self.filtersize = [1,int(self.config["filtersize"])] nkerns = [int(self.config["nkerns"])] logger.info("nkerns " + str(nkerns)) pool = [1, int(self.config["kmax"])] self.contextsize = int(self.config["contextsize"]) logger.info("contextsize " + str(self.contextsize)) if self.contextsize < self.filtersize[1]: logger.info("setting filtersize to " + str(self.contextsize)) self.filtersize[1] = self.contextsize logger.info("filtersize " + str(self.filtersize)) sizeAfterConv = self.contextsize - self.filtersize[1] + 1 sizeAfterPooling = -1 if sizeAfterConv < pool[1]: logger.info("setting poolsize to " + str(sizeAfterConv)) pool[1] = sizeAfterConv sizeAfterPooling = pool[1] logger.info("kmax pooling: k = " + str(pool[1])) # reading word vectors self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile) self.representationsize = self.vectorsize + 1 rng = numpy.random.RandomState(23455) if train: seed = rng.get_state()[1][0] logger.info("seed: " + str(seed)) # allocate symbolic variables for the data self.index = T.lscalar() # index to a [mini]batch self.xa = T.matrix('xa') # left context self.xb = T.matrix('xb') # middle context self.xc = T.matrix('xc') # right context self.y = T.imatrix('y') # label (only present in training) self.yNER1 = T.imatrix('yNER1') # label for first entity self.yNER2 = T.imatrix('yNER2') # label for second entity ishape = [self.representationsize, self.contextsize] # this is the size of context matrizes ###################### # BUILD ACTUAL MODEL # ###################### logger.info('... building the model') # Reshape input matrix to be compatible with our LeNetConvPoolLayer layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1])) layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1])) self.y_reshaped = self.y.reshape((self.batch_size, 1)) yNER1reshaped = self.yNER1.reshape((self.batch_size, 1)) yNER2reshaped = self.yNER2.reshape((self.batch_size, 1)) # Construct convolutional pooling layer: filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1]) poolsize=(pool[0], pool[1]) fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the convolution weight matrix convW = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) convB = theano.shared(value=b_values, borrow=True) self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input, image_shape=(self.batch_size, 1, ishape[0], ishape[1]), filter_shape=filter_shape, poolsize=poolsize) #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3) layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling)) layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1) self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh) self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b) layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1) layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER self.additionalFeatures = T.matrix('additionalFeatures') additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1)) layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1) layer2_inputSize += self.addInputSize self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23) # create a list of all model parameters self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params] self.params = [] for p in self.paramList: self.params += p logger.info(p) if not train: self.gotNetwork = 1 # load parameters if not os.path.isfile(networkfile): logger.error("network file does not exist") self.gotNetwork = 0 else: save_file = open(networkfile, 'rb') for p in self.params: p.set_value(cPickle.load(save_file), borrow=False) save_file.close() def classify(self, candidateAndFillerAndOffsetList, slot): ############## # TEST MODEL # ############## logger.info('... testing') index = T.lscalar() # index to a [mini]batch if self.gotNetwork == 0: return [] inputMatrixDev_a, inputMatrixDev_b, inputMatrixDev_c, lengthListDev_a, lengthListDev_b, lengthListDev_c, inputFeaturesDev, _ = getInput(candidateAndFillerAndOffsetList, self.representationsize, self.contextsize, self.filtersize, self.wordvectors, self.vectorsize) # create input matrix and save them in valid_set slot2types, binarizer, numTypes = getSlot2Types() yner1Dev = [] yner2Dev = [] type1bin = binarizer.transform([slot2types[slot][0]]) type2bin = binarizer.transform([slot2types[slot][1]]) dt = theano.config.floatX for item in range(len(inputMatrixDev_a)): yner1Dev.append(type1bin) yner2Dev.append(type2bin) yner1DevNumpy = numpy.array(yner1Dev, dtype = numpy.int32) yner2DevNumpy = numpy.array(yner2Dev, dtype = numpy.int32) valid_set_xa = theano.shared(numpy.matrix(inputMatrixDev_a, dtype = dt)) valid_set_xb = theano.shared(numpy.matrix(inputMatrixDev_b, dtype = dt)) valid_set_xc = theano.shared(numpy.matrix(inputMatrixDev_c, dtype = dt)) valid_mlp = theano.shared(numpy.matrix(inputFeaturesDev, dtype = dt)) valid_yner1 = theano.shared(yner1DevNumpy.reshape(yner1DevNumpy.shape[0], yner1DevNumpy.shape[2])) valid_yner2 = theano.shared(yner2DevNumpy.reshape(yner2DevNumpy.shape[0], yner2DevNumpy.shape[2])) # compute number of minibatches for testing n_valid_batches = valid_set_xa.get_value(borrow=True).shape[0] n_valid_batches /= self.batch_size input_dict = {} input_dict[self.xa] = valid_set_xa[index * self.batch_size: (index + 1) * self.batch_size] input_dict[self.xb] = valid_set_xb[index * self.batch_size: (index + 1) * self.batch_size] input_dict[self.xc] = valid_set_xc[index * self.batch_size: (index + 1) * self.batch_size] input_dict[self.yNER1] = valid_yner1[index * self.batch_size: (index + 1) * self.batch_size] input_dict[self.yNER2] = valid_yner2[index * self.batch_size: (index + 1) * self.batch_size] input_dict[self.additionalFeatures] = valid_mlp[index * self.batch_size: (index + 1) * self.batch_size] test_model_confidence = theano.function([index], self.layer3.results(), givens = input_dict) resultList = [test_model_confidence(i) for i in xrange(n_valid_batches)] return resultList
def start(inputfile): global in_time, out_time, cooldown_in_time, cooldown_out_time, classify global global_counter, winner_stride, cur_state, in_frame_num, actions_counter global test_set_x, test_set_y, shared_test_set_y rng = numpy.random.RandomState(23455) # ####################### build start ######################## # create an empty shared variables to be filled later data_x = numpy.zeros([1, 20 * 50 * 50]) data_y = numpy.zeros(20) train_set = (data_x, data_y) (test_set_x, test_set_y, shared_test_set_y) = \ shared_dataset(train_set) print 'building ... ' batch_size = 1 # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') # image size layer0_w = 50 layer0_h = 50 layer1_w = (layer0_w - 4) / 2 layer1_h = (layer0_h - 4) / 2 layer2_w = (layer1_w - 2) / 2 layer2_h = (layer1_h - 2) / 2 layer3_w = (layer2_w - 2) / 2 layer3_h = (layer2_h - 2) / 2 # ##################### # BUILD ACTUAL MODEL # # ##################### # image sizes batchsize = batch_size in_channels = 20 in_width = 50 in_height = 50 # filter sizes flt_channels = 40 flt_time = 20 flt_width = 5 flt_height = 5 signals_shape = (batchsize, in_channels, in_height, in_width) filters_shape = (flt_channels, in_channels, flt_height, flt_width) layer0_input = x.reshape(signals_shape) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=signals_shape, filter_shape=filters_shape, poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, flt_channels, layer1_w, layer1_h), filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 60, layer2_w, layer2_h), filter_shape=(90, 60, 3, 3), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h, n_out=500, activation=T.tanh) layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8) cost = layer4.negative_log_likelihood(y) classify = theano.function( [index], outputs=layer4.get_output_labels(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # load weights print 'loading weights state' f = file('weights.save', 'rb') loaded_objects = [] for i in range(5): loaded_objects.append(cPickle.load(f)) f.close() layer0.__setstate__(loaded_objects[0]) layer1.__setstate__(loaded_objects[1]) layer2.__setstate__(loaded_objects[2]) layer3.__setstate__(loaded_objects[3]) layer4.__setstate__(loaded_objects[4]) # ####################### build done ######################## fromCam = False if fromCam: print 'using camera input' cap = cv2.VideoCapture(0) else: print 'using input file: ', inputfile cap = cv2.VideoCapture(inputfile) # my timing frame_rate = 5 frame_interval_ms = 1000 / frame_rate fourcc = cv2.VideoWriter_fourcc(*'XVID') video_writer = cv2.VideoWriter('../out/live_out.avi', fourcc, frame_rate, (640, 480)) frame_counter = 0 (ret, frame) = cap.read() proFrame = process_single_frame(frame) # init detectors st_a_det = RepDetector(proFrame, detector_strides[0]) st_b_det = RepDetector(proFrame, detector_strides[1]) st_c_det = RepDetector(proFrame, detector_strides[2]) frame_wise_counts = [] while True: in_frame_num += 1 if in_frame_num % 2 == 1: continue (ret, frame) = cap.read() if ret == 0: print 'unable to read frame' break proFrame = process_single_frame(frame) # handle stride A.... if frame_counter % st_a_det.stride_number == 0: st_a_det.count(proFrame) # handle stride B if frame_counter % st_b_det.stride_number == 0: st_b_det.count(proFrame) # handle stride C if frame_counter % st_c_det.stride_number == 0: st_c_det.count(proFrame) # display result on video................ blue_color = (130, 0, 0) green_color = (0, 130, 0) red_color = (0, 0, 130) orange_color = (0, 140, 0xFF) out_time = in_frame_num / 60 if cur_state == state.IN_REP and (out_time - in_time < 4 or global_counter < 5): draw_str(frame, (20, 120), ' new hypothesis (%d) ' % global_counter, orange_color, 1.5) if cur_state == state.IN_REP and out_time - in_time >= 4 \ and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: counting... %d' % (actions_counter, global_counter), green_color, 2) if cur_state == state.COOLDOWN and global_counter >= 5: draw_str( frame, (20, 120), 'action %d: done. final counting: %d' % (actions_counter, global_counter), blue_color, 2) # print "pls", global_counter frame_wise_counts.append(global_counter) # print 'action %d: done. final counting: %d' % (actions_counter, global_counter) print "Dhruv", frame_wise_counts, global_counter return frame_wise_counts
layer2_inputSize = layer1outputsize layer2_input = layer1flattened elif combinationMethod == "noAtt": layer2_inputSize = layer0outputsize layer2_input = layer0flattened else: # concatenation layer2_inputSize = layer0outputsize + layer1outputsize layer2_input = T.concatenate([layer0flattened, layer1flattened], axis = 1) if useHiddenLayer: # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize, n_out=hiddenunits, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=hiddenunits, n_out=2) else: # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2_input, n_in=layer2_inputSize, n_out=2) # create a list of all model non-bricks parameters paramList = [layer3.params] if useHiddenLayer: paramList.append(layer2.params) if combinationMethod != "noAtt": paramList.append(layer1.params) # params from layer0 already have the blocks role params = [] for p in paramList: for i, p_part in enumerate(p): if i == 0:
def __init__(self, learning_rate=0.1, L1_reg=0.00, L2_reg=0.0001, filter_hs=[3, 4, 5], filter_num=100, n_hidden=100, n_out=2, word_idx_map=None, wordvec=None, k=300, adjust_input=False): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) """ self.learning_rate = learning_rate self.L1_reg = L1_reg self.L2_reg = L2_reg self.word_idx_map = word_idx_map rng = np.random.RandomState(3435) self.rng = rng self.k = k self.filter_num = filter_num self.filter_hs = filter_hs # Can be assigned at the fit step. self.batch_size = None self.epoch = 0 self.Words = theano.shared(value=wordvec, name="Words") X = T.matrix('X') Y = T.ivector('Y') self.X = X self.Y = Y layer0_input = self.Words[T.cast(X.flatten(), dtype='int32')].reshape((X.shape[0], X.shape[1], self.Words.shape[1])) self.layer0_input = layer0_input c_max_list = [] self.conv_layer_s = [] test_case = [] for filter_h in filter_hs: conv_layer = ConvLayer(rng, layer0_input, filter_h=filter_h, filter_num=filter_num, k=k) self.conv_layer_s.append(conv_layer) c_max_list.append(conv_layer.c_max) max_pooling_out = T.concatenate(c_max_list, axis=1) max_pooling_out_size = filter_num * len(filter_hs) self.hidden_layer = HiddenLayer(rng, max_pooling_out, max_pooling_out_size, n_hidden) self.lr_layer = LogisticRegression( input=self.hidden_layer.output, n_in=n_hidden, n_out=n_out, ) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = ( sum([abs(conv_layer.W).sum() for conv_layer in self.conv_layer_s]) + abs(self.hidden_layer.W).sum() + abs(self.lr_layer.W).sum() ) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ( sum([(conv_layer.W ** 2).sum() for conv_layer in self.conv_layer_s]) + (self.hidden_layer.W ** 2).sum() + (self.lr_layer.W ** 2).sum() ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.cost = ( self.negative_log_likelihood(Y) + self.L1_reg * self.L1 + self.L2_reg * self.L2_sqr ) # the parameters of the model are the parameters of the two layer it is # made out of self.params = [] # also adjust the input word vectors if adjust_input: self.params.append(self.Words) for conv_layer in self.conv_layer_s: self.params += conv_layer.params self.params += self.hidden_layer.params self.params += self.lr_layer.params