def build(self):
        
        # input and output variables
        x = T.matrix('x')
        y = T.matrix('y')
        index = T.lscalar() 
        batch_count = T.lscalar() 
        LR = T.scalar('LR', dtype=theano.config.floatX)
        M = T.scalar('M', dtype=theano.config.floatX)

        # before the build, you work with symbolic variables
        # after the build, you work with numeric variables
        
        self.train_batch = theano.function(inputs=[index,LR,M], updates=self.model.updates(x,y,LR,M),givens={ 
                x: self.shared_x[index * self.batch_size:(index + 1) * self.batch_size], 
                y: self.shared_y[index * self.batch_size:(index + 1) * self.batch_size]},
                name = "train_batch", on_unused_input='warn')
        
        self.test_batch = theano.function(inputs=[index],outputs=self.model.errors(x,y),givens={
                x: self.shared_x[index * self.batch_size:(index + 1) * self.batch_size], 
                y: self.shared_y[index * self.batch_size:(index + 1) * self.batch_size]},
                name = "test_batch")
                
        if self.format == "DFXP" :  
            self.update_range = theano.function(inputs=[batch_count],updates=self.model.range_updates(batch_count), name = "update_range")
Example #2
0
    def __compileFunctions(self):

        self.__logger.info("Compiling computational graph:")

        index = T.lscalar('index')

        miniBatchSize = T.lscalar('miniBatchSize')


        self.__logger.info(" - Setting up and compiling outputs")
        self.__setUpOutputs(self.input)

        self.__logger.info(" - Setting up and compiling cost functions")
        self.__setUpCostFunctions(self.input,
                                  self.output,
                                  self.supCostWeight,
                                  self.unsupCostWeight)

        self.__logger.info(" - Setting up and compiling optimizers")
        self.__setUpOptimizers(index,
                               miniBatchSize,
                               self.input,
                               self.output,
                               self.epsilon,
                               self.decay,
                               self.momentum)

        self.__setUpHelpers(index,miniBatchSize)
Example #3
0
    def train_model(self, X_train, Y_train, X_valid, Y_valid,
                    num_epochs=3000, learning_rate=0.001, batch_size=20,
                    L1_reg=0., L2_reg=0.):

        logging.info('... training model (learning_rate: %f)' % learning_rate)

        cost = self.NLL + L1_reg*self.L1 + L2_reg*self.L2_sqr

        grads = T.grad(cost=cost, wrt=self.params)
        updates = [[param, param - learning_rate*grad]
                   for param, grad in zip(self.params, grads)]

        start = T.lscalar()
        end = T.lscalar()

        train = theano.function(
            inputs=[start, end],
            outputs=cost,
            updates=updates,
            givens={
                self.X: X_train[start:end],
                self.Y: Y_train[start:end]
            }
        )

        validate = theano.function(
            inputs=[start, end],
            outputs=[cost, self.py_x],
            givens={
                self.X: X_valid[start:end],
                self.Y: Y_valid[start:end]
            }
        )

        m_train = X_train.get_value(borrow=True).shape[0]
        m_valid = X_valid.get_value(borrow=True).shape[0]

        stopping_criteria = StoppingCriteria()
        index = range(0, m_train+1, batch_size)

        y_valid = np.argmax(Y_valid.get_value(borrow=True), axis=1)
        for i in range(num_epochs):
            costs = [train(index[j], index[j+1]) for j in range(len(index)-1)]
            E_tr = np.mean(costs)

            E_va, py_x = validate(0, m_valid)
            y_pred = np.argmax(py_x, axis=1)
            A_valid = AccuracyTable(y_pred, y_valid)

            stopping_criteria.append(E_tr, E_va)
            logging.debug('epoch %3d/%d. Cost: %f  Validation: Q3=%.2f%% C3=%f'
                          '(%.2f %.2f %.2f)',
                          i+1, num_epochs, E_tr, A_valid.Q3, A_valid.C3,
                          A_valid.Ch, A_valid.Ce, A_valid.Cc)

            if stopping_criteria.PQ(1):
                logging.debug('Early Stopping!')
                break

        return stopping_criteria
Example #4
0
    def fiting_variables(self, batch_size, train_set_x, train_set_y, test_set_x=None):
        """Sets useful variables for locating batches"""    
        self.index = T.lscalar('index')    # index to a [mini]batch
        self.n_ex = T.lscalar('n_ex')      # total number of examples

        assert type(batch_size) is IntType or FloatType, "Batch size must be an integer."
        if type(batch_size) is FloatType:
            warnings.warn('Provided batch_size is FloatType, value has been truncated')
            batch_size = int(batch_size)
        # Proper implementation of variable-batch size evaluation
        # Note that the last batch may be a smaller size
        # So we keep around the effective_batch_size (whose last element may
        # be smaller than the rest)
        # And weight the reported error by the batch_size when we average
        # Also, by keeping batch_start and batch_stop as symbolic variables,
        # we make the theano function easier to read
        self.batch_start = self.index * batch_size
        self.batch_stop = T.minimum(self.n_ex, (self.index + 1) * batch_size)
        self.effective_batch_size = self.batch_stop - self.batch_start

        self.get_batch_size = theano.function(inputs=[self.index, self.n_ex],
                                          outputs=self.effective_batch_size)

        # compute number of minibatches for training
        # note that cases are the second dimension, not the first
        self.n_train = train_set_x.get_value(borrow=True).shape[0]
        self.n_train_batches = int(np.ceil(1.0 * self.n_train / batch_size))
        if test_set_x is not None:
            self.n_test = test_set_x.get_value(borrow=True).shape[0]
            self.n_test_batches = int(np.ceil(1.0 * self.n_test / batch_size))
Example #5
0
    def compile(self, objective, optimizer, constraints=None):
        if not constraints:
            constraints = [lambda x: x for _ in self.params]

        # Dummy variables as placeholder for training data,
        # which need to be shared tensor variables
        self.X_train = shared_vals(np.zeros((2, 2)), name='X_train')
        self.Y_train = shared_vals(np.zeros((2, 2)), name='Y_train')

        batch_ix = T.lscalar('ix')
        batch_size = T.lscalar('size')
        y_sym = T.matrix('Y')
        loss = objective(y_sym, self.output)
        updates = optimizer.get_updates(self.params, constraints, loss)
        self.train = theano.function(
            inputs=[batch_ix, batch_size],
            outputs=loss,
            updates=updates,
            givens={
                self.X: self.X_train[batch_ix * batch_size: (batch_ix + 1) * batch_size],
                y_sym : self.Y_train[batch_ix * batch_size: (batch_ix + 1) * batch_size]
            }
        )

        self._predict = theano.function(
            inputs=[self.X],
            outputs=self.output
        )
Example #6
0
 def pretraining_functions(self, train_set_x, train_set_y, batch_size):
     index = tensor.lscalar('index')  
     index = tensor.lscalar('index')  
     corruption_level = tensor.scalar('corruption')  
     corruption_level = tensor.scalar('corruption')  
     learning_rate = tensor.scalar('lr')  
     learning_rate = tensor.scalar('lr')  
     switch = tensor.iscalar('switch')
     n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
     batch_begin = index * batch_size
     batch_end = batch_begin + batch_size
     pretrain_fns = []
     for sugar in self.sugar_layers:
         cost, updates = sugar.get_cost_updates(corruption_level,
                                             learning_rate,
                                             switch)
         fn = function(inputs=[index,
                                      Param(corruption_level, default=0.2),
                                      Param(learning_rate, default=0.1),
                                      Param(switch, default=1)],
                              outputs=[cost],
                              updates=updates,
                              givens={self.x: train_set_x[batch_begin:batch_end],
                                      self.y: train_set_y[batch_begin:batch_end]}, on_unused_input='ignore')
         pretrain_fns.append(fn)
     return pretrain_fns
Example #7
0
def train_rnn():
    rng = numpy.random.RandomState(1234)

    q = T.lvector("q")
    pos = T.lscalar("pos")
    neg = T.lscalar("neg")
    inputs = [q, pos, neg]

    embLayer = emb_layer(None, 100, 5)
    rnn = rnn_layer(input=inputs, emb_layer=embLayer, nh=5)

    cost = rnn.loss()
    gradient = T.grad(cost, rnn.params)
    lr = 0.001
    updates = OrderedDict((p, p - lr * g) for p, g in zip(rnn.params, gradient))
    train = theano.function(inputs=[q, pos, neg], outputs=cost, updates=updates)

    print rnn.emb.eval()[0]
    e0 = rnn.emb.eval()

    for i in range(0, 3):
        idq = rng.randint(size=10, low=0, high=100)
        idpos = rng.random_integers(100)
        idneg = rng.random_integers(100)

        train(idq, idpos, idneg)
        rnn.normalize()

        print rnn.emb.eval() - e0
Example #8
0
def trainer(X,Y,alpha,lr,predictions,updates,data,labels):
	data   = U.create_shared(data,  dtype=np.int8)
	labels = U.create_shared(labels,dtype=np.int8)
	index_start = T.lscalar('start')
	index_end   = T.lscalar('end')
	print "Compiling function..."
	train_model = theano.function(
			inputs  = [index_start,index_end,alpha,lr],
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			updates = updates,
			givens  = {
				X:   data[index_start:index_end],
				Y: labels[index_start:index_end]
			}
		)
	test_model = theano.function(
			inputs  = [index_start,index_end],
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			givens  = {
				X:   data[index_start:index_end],
				Y: labels[index_start:index_end]
			}
		)
	print "Done."
	return train_model,test_model
Example #9
0
    def test_doc(self):
        """Ensure the code given in pfunc.txt works as expected"""

        # Example #1.
        a = lscalar()
        b = shared(1)
        f1 = pfunc([a], (a + b))
        f2 = pfunc([Param(a, default=44)], a + b, updates={b: b + 1})
        self.assertTrue(b.get_value() == 1)
        self.assertTrue(f1(3) == 4)
        self.assertTrue(f2(3) == 4)
        self.assertTrue(b.get_value() == 2)
        self.assertTrue(f1(3) == 5)
        b.set_value(0)
        self.assertTrue(f1(3) == 3)

        # Example #2.
        a = tensor.lscalar()
        b = shared(7)
        f1 = pfunc([a], a + b)
        f2 = pfunc([a], a * b)
        self.assertTrue(f1(5) == 12)
        b.set_value(8)
        self.assertTrue(f1(5) == 13)
        self.assertTrue(f2(4) == 32)
 def __init__( self, da, stop_val, corruption, rate, train_path, test_path ):
     self.fid = open( 'output.txt', 'r+' )
     self.model = da
     self.stop_val = stop_val
     self.last_cost = 9999
     self.train_path = train_path
     self.test_path = test_path
     
     self.train_set = numpy.load( train_path )
     self.test_set = numpy.load( test_path )
     
     self.shared_train = theano.shared( self.train_set )
     self.shared_test = theano.shared( self.test_set )
     self.print_set( self.shared_train, "train_set" )
     self.print_set( self.shared_test, "test_set" )
     
     self.learning_rate = rate
     self.corruption_level = corruption
     
     self.start_index = T.lscalar()
     self.end_index = T.lscalar()
     self.cost, self.updates = da.get_cost_updates( corruption, rate )
     
     self.train = theano.function( [ self.start_index, self.end_index ], self.cost, updates = self.updates,
         givens = { da.x : self.shared_train [ self.start_index : self.end_index ] } )
     self.test = theano.function( [ self.start_index, self.end_index ], self.cost, updates = self.updates,
         givens = { da.x : self.shared_test [ self.start_index : self.end_index ] } )
Example #11
0
    def getTrainModel(self, data_x, data_y, data_sm):
        self.ngram_start_index = T.lscalar()
        self.ngram_end_index = T.lscalar()
        self.sm_start_index = T.lscalar()
        self.sm_end_index = T.lscalar()
        self.learning_rate = T.scalar()

        # TRAIN_MODEL
        self.train_outputs = [self.cost, self.grad_norm]
        self.train_set_x, self.train_set_y, self.train_set_sm = io_read_ngram.shared_dataset([data_x, data_y, data_sm])
        self.int_train_set_y = T.cast(self.train_set_y, "int32")
        self.train_model = theano.function(
            inputs=[
                self.ngram_start_index,
                self.ngram_end_index,
                self.sm_start_index,
                self.sm_end_index,
                self.learning_rate,
            ],
            outputs=self.train_outputs,
            updates=self.updates,
            givens={
                self.x: self.train_set_x[self.ngram_start_index : self.ngram_end_index],
                self.y: self.int_train_set_y[self.ngram_start_index : self.ngram_end_index],
                self.sm: self.train_set_sm[self.sm_start_index : self.sm_end_index],
                self.lr: self.learning_rate,
            },
        )

        return self.train_model
Example #12
0
def test_argsort():
    # Set up
    rng = np.random.RandomState(seed=utt.fetch_seed())
    m_val = rng.rand(3, 2)
    v_val = rng.rand(4)

    # Example 1
    a = tensor.dmatrix()
    w = argsort(a)
    f = theano.function([a], w)
    gv = f(m_val)
    gt = np.argsort(m_val)
    assert np.allclose(gv, gt)

    # Example 2
    a = tensor.dmatrix()
    axis = tensor.lscalar()
    w = argsort(a, axis)
    f = theano.function([a, axis], w)
    for axis_val in 0, 1:
        gv = f(m_val, axis_val)
        gt = np.argsort(m_val, axis_val)
        assert np.allclose(gv, gt)

    # Example 3
    a = tensor.dvector()
    w2 = argsort(a)
    f = theano.function([a], w2)
    gv = f(v_val)
    gt = np.argsort(v_val)
    assert np.allclose(gv, gt)

    # Example 4
    a = tensor.dmatrix()
    axis = tensor.lscalar()
    l = argsort(a, axis, "mergesort")
    f = theano.function([a, axis], l)
    for axis_val in 0, 1:
        gv = f(m_val, axis_val)
        gt = np.argsort(m_val, axis_val)
        assert np.allclose(gv, gt)

    # Example 5
    a = tensor.dmatrix()
    axis = tensor.lscalar()
    a1 = ArgSortOp("mergesort", [])
    a2 = ArgSortOp("quicksort", [])
    # All the below should give true
    assert a1 != a2
    assert a1 == ArgSortOp("mergesort", [])
    assert a2 == ArgSortOp("quicksort", [])

    # Example 6: Testing axis=None
    a = tensor.dmatrix()
    w2 = argsort(a, None)
    f = theano.function([a], w2)
    gv = f(m_val)
    gt = np.argsort(m_val, None)
    assert np.allclose(gv, gt)
Example #13
0
 def predict(self, X):
     start = T.lscalar()
     end = T.lscalar()
     return theano.function(
         inputs=[start, end],
         outputs=self.py_x,
         givens={self.X: X[start:end]}
     )
Example #14
0
    def pretraining_functions(self, train_set_x, train_set_y, alpha, batch_size):
        ''' Generates a list of functions, each of them implementing one
        component (sub-CNN) in trainnig the iCNN.
        The function will require as input the minibatch index, and to train
        a sub-CNN you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
                            for training the sub-CNN

        : train_set_y: ...

        :type batch_size: int
        :param batch_size: size of a [mini]batch

        '''

        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('learning_rate')  # learning rate to use
        # number of batches
        #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for subcnn in self.subcnns:
            # create a function to compute the mistakes that are made by the model
            index = T.lscalar('index')  # index to a [mini]batch
            #batch_size_var = T.lscalar('batch_size_var')  # batch_size
            # compute the gradients with respect to the model parameters
            grads = T.grad(subcnn.cost, subcnn.params_pretrain)
        
            # add momentum
            # initialize the delta_i-1
            delta_before=[]
            for param_i in subcnn.params:
                delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape))
                delta_before.append(delta_before_i)
        
            updates = []
            for param_i, grad_i, delta_before_i in zip(subcnn.params, grads, delta_before):
                delta_i=-learning_rate * grad_i + alpha*delta_before_i
                updates.append((param_i, param_i + delta_i ))
                updates.append((delta_before_i,delta_i))
            # compile the theano function
            fn = theano.function([index,theano.Param(learning_rate, default=0.1)], [subcnn.cost,subcnn.errors], updates=updates,
                                      givens={
                                      self.x: train_set_x[index*batch_size:(index+1)*batch_size],
                                      self.y: train_set_y[index*batch_size:(index+1)*batch_size]})
            
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
Example #15
0
def test(model):
    dim = 128
    v_size = 7810
    margin = 1.0
    
    #load model
    f = open(model, 'rb')
    input_params = cPickle.load(f)
    emb, wx, wh, bh, wa = input_params
    f.close()
    
    embLayer = emb_layer(pre_train=emb, v = v_size, dim = dim) 
    rnnLayer = rnn_layer(input=None, wx=wx, wh=wh, bh=bh, emb_layer = embLayer, nh = dim) 
    att = attention_layer(input=None, rnn_layer=rnnLayer, margin = margin)

    q = T.lvector('q')
    a = T.lscalar('a')
    p = T.lvector('p')
    t = T.lscalar('t')
    inputs = [q,a,p,t]
    score = att.predict(inputs)
    pred = theano.function(inputs=inputs,outputs=score)

    pool = ThreadPool()

    f = open('./data/test-small.id','r')
    count = 1
    print 'time_b:%s' %time.clock()  
    to_pred = []
    for line in f:
        if count % 10000 == 0:
	    print count / 10000
	count += 1
        #print 'time_b:%s' %time.clock()  
        line = line[:-1]
        tmp = line.split('\t')
        in_q = numpy.array(tmp[0].split(' ')).astype(numpy.int) - 1
        in_a = int(tmp[1].split(' ')[2]) - 1
        in_p = numpy.array(tmp[1].split(' ')).astype(numpy.int) - 1
        in_t = int(tmp[2]) - 1
	lis = (in_q, in_a, in_p, in_t)
	to_pred.append(lis)
        #print 'time_load:%s' %time.clock()  
        #print 'time_score:%s' %time.clock()  
    f.close()

    ay = numpy.asarray(to_pred)
    #results = map(pred, list(ay[:,0]), list(ay[:,1]),list(ay[:,2]),list(ay[:,3]))
    results = pool.map(pred, to_pred)
    #results = []
    #for p in to_pred:
    #    results.append(att.predict(p,params))
    print 'time_e:%s' %time.clock()
    #print results
    pool.close()
    pool.join()
def classify_lenet5(learning_rate=0.005, n_epochs=8000,
                    image_path='D:/dev/datasets/isbi/train-input/train-input_0000.tif',
                    paramfile='lenet0_membrane_epoch_25100.pkl.gz',
                    nkerns=[20, 50], batch_size=1):

    rng = numpy.random.RandomState(23455)

    # allocate symbolic variables for the data
    index_x = T.lscalar()  # index to a [mini]batch
    index_y = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (28, 28)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], 12, 12),
            filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))

    # the TanhLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4,
                         n_out=500, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)
Example #17
0
def test(model):
    dim = 128
    v_size = 7810
    margin = 1.0
    
    #load model
    f = open(model, 'rb')
    input_params = cPickle.load(f)
    emb, wx, wh, bh, wa = input_params
    f.close()
    
    embLayer = emb_layer(pre_train=emb, v = v_size, dim = dim) 
    rnnLayer = rnn_layer(input=None, wx=wx, wh=wh, bh=bh, emb_layer = embLayer, nh = dim) 
    att = attention_layer(input=None, rnn_layer=rnnLayer, margin = margin)

    q = T.lvector('q')
    a = T.lscalar('a')
    p = T.lvector('p')
    t = T.lscalar('t')
    inputs = [q,a,p,t]

    #emb_num = T.lscalar('emb_num')
    #nh = T.scalar('nh')
    #dim = T.scalar('dim')
    score = att.predict(inputs)
    pred = theano.function(inputs=inputs,outputs=score)

    

    wf = open('./data/res','w')
    f = open('./data/test.id','r')
    count = 1
    print 'time_b:%s' %time.clock()  
    for line in f:
        if count % 10000 == 0:
	    print count / 10000
            print 'time_1w:%s' %time.clock()  
	count += 1
        #print 'time_b:%s' %time.clock()  
        line = line[:-1]
        tmp = line.split('\t')
        in_q = numpy.array(tmp[0].split(' ')).astype(numpy.int) - 1
        #x = emb[q].reshape((q.shape[0], emb.shape[1]))
        in_a = int(tmp[1].split(' ')[2]) - 1
        in_p = numpy.array(tmp[1].split(' ')).astype(numpy.int) - 1
        in_t = int(tmp[2]) - 1
        #in_lis =  [in_q, in_a, in_p, in_t]
        #print 'time_load:%s' %time.clock()  
	s = pred(in_q, in_a, in_p, in_t)
	#print s
        wf.write(str(s) + '\n')
        #print 'time_score:%s' %time.clock()  
    f.close()
    wf.close()
Example #18
0
    def apply_net(self, input_image, perform_downsample=False, perform_pad=False, perform_upsample=False, perform_blur=False, perform_offset=False):

        if perform_pad:
            input_image = np.pad(input_image, ((self.pad_by, self.pad_by), (self.pad_by, self.pad_by)), 'symmetric')

        if perform_downsample and self.downsample != 1:
            input_image = np.float32(mahotas.imresize(input_image, 1.0/self.downsample))

        nx = input_image.shape[0] - self.pad_by*2
        ny = input_image.shape[1] - self.pad_by*2
        nbatches = nx * ny

        output = np.zeros((nx, ny), dtype=np.float32)

        t_input_image = theano.shared(np.asarray(input_image,dtype=theano.config.floatX),borrow=True)

        index_x = T.lscalar()
        index_y = T.lscalar()

        # eval_network_l0 = theano.function([index_x, index_y], self.all_layers[0].output,
        #     givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]})
        # eval_network_l1 = theano.function([index_x, index_y], self.all_layers[1].output,
        #     givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]})
        # eval_network_l2 = theano.function([index_x, index_y], self.all_layers[2].output,
        #     givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]})
        eval_network = theano.function([index_x, index_y], self.all_layers[-1].output,
            givens={self.x: t_input_image[index_x:index_x + self.pad_by * 2 + 1, index_y:index_y + self.pad_by * 2 + 1]})

        for xi in range(nx):
            for yi in range(ny):
                # print eval_network_l0(xi, yi)[0,0,:,:]
                # print eval_network_l1(xi, yi)[0,0,:,:]
                # print eval_network_l2(xi, yi)[0,0,:,:]
                # print eval_network(xi, yi)[0,0]
                output[xi, yi] = eval_network(xi, yi)[0,0]
            print "up to x={0} of {1}".format(xi+1, nx)


        if perform_upsample:
            output = np.float32(mahotas.imresize(output, self.downsample))

        if perform_blur and self.best_sigma != 0:
            output = scipy.ndimage.filters.gaussian_filter(output, self.best_sigma)

        if perform_offset:
            #Translate
            output = np.roll(output, self.best_offset[0], axis=0)
            output = np.roll(output, self.best_offset[1], axis=1)

        # Crop to valid size
        #output = output[self.pad_by:-self.pad_by,self.pad_by:-self.pad_by]

        return output
Example #19
0
     def cost_function(self,learning_rate,batch_size):
         index = T.lscalar()
         index1 = T.lscalar()
         
         """ cost function"""
         cost=self.negative_log_likelihood(self.y)
         """ Gradient of cost function"""
         g_W = T.grad(cost=cost, wrt=self.W)
         g_b = T.grad(cost=cost, wrt=self.b)    

         """ Gradient update equations used by gradient descent algorithms"""
         updates = [(self.W, self.W - learning_rate * g_W),(self.b, self.b - learning_rate * g_b)]
    
    
         num_samples=classifier.train[0].get_value(borrow=True).shape[0]
         print '\n\n********************************'
         #tbatch_size=batch_size;#feature.get_value(borrow=True).shape[0];
         print 'num of training samples :' + `num_samples`
         print 'num of dimensions :' + `self.n_in`
         print 'num of classes :' + `self.n_classes`    
         print 'Training batch size :' + `batch_size`
         
         #print 'Test batch size :' + `tbatch_size`
         self.n_train_batches = self.train[0].get_value(borrow=True).shape[0] / batch_size    
         self.n_valid_batches= self.validate[0].get_value(borrow=True).shape[0] / batch_size    
         self.n_test_batches= self.test[0].get_value(borrow=True).shape[0] / batch_size    
         #print 'num of training batches :'+`self.n_train_batches`
         
         self.train[1]=T.cast(self.train[1],'int32');
         self.test[1]=T.cast(self.test[1],'int32');
         self.validate[1]=T.cast(self.validate[1],'int32');

         """ Defining functions for training,testing and validation """
         self.train_model = theano.function(inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                self.x: self.train[0][index*batch_size:(index + 1)*batch_size],
                self.y: self.train[1][index*batch_size:(index + 1)*batch_size]});
    
         self.test_model = theano.function(inputs=[index1],
            outputs=[self.errors(self.y),self.y_pred],
             givens={
                 self.x: self.test[0][index1*batch_size:(index1 + 1)*batch_size],
                 self.y: self.test[1][index1*batch_size:(index1 + 1)*batch_size]});

    
         self.validate_model = theano.function(inputs=[index],
            outputs=self.errors(self.y),
            givens={
                self.x: self.validate[0][index * batch_size:(index + 1) * batch_size],
                self.y: self.validate[1][index * batch_size:(index + 1) * batch_size]})
Example #20
0
    def __init__(self, dnodex,inputdim, name=""):
        pos_p=T.lscalar()
        neg_poi=T.lscalar()
	user=T.lscalar()
	eta=T.scalar()
	pfp_loss=T.scalar()
	if dnodex.pmatrix is None:
	    dnodex.umatrix=theano.shared(floatX(np.random.randn(*(dnodex.nuser, inputdim))))
            dnodex.pmatrix=theano.shared(floatX(np.random.randn(*(dnodex.npoi,inputdim))))
	n_updates=[(dnodex.pmatrix, T.set_subtensor(dnodex.pmatrix[neg_poi,:],dnodex.pmatrix[neg_poi,:]-eta*pfp_loss*dnodex.umatrix[user,:]-eta*eta*dnodex.pmatrix[neg_poi,:]))]
        p_updates=[(dnodex.pmatrix, T.set_subtensor(dnodex.pmatrix[pos_p,:],dnodex.pmatrix[pos_p,:]+eta*pfp_loss*dnodex.umatrix[user,:]-eta*eta*dnodex.pmatrix[pos_p,:])),(dnodex.umatrix, T.set_subtensor(dnodex.umatrix[user,:],dnodex.umatrix[user,:]+eta*pfp_loss*(dnodex.pmatrix[pos_p,:]-dnodex.pmatrix[neg_poi,:])-eta*eta*dnodex.umatrix[user,:]))]
        self.trainpos=theano.function([pos_p,neg_poi,user,eta,pfp_loss],updates=p_updates,allow_input_downcast=True)
        self.trainneg=theano.function([neg_poi,user,eta,pfp_loss],updates=n_updates,allow_input_downcast=True)
Example #21
0
    def _test_scan2(self):

        def step(a, b):
            return a + b, b

        h = T.lscalar("h")
        x = T.lscalar("x")


        [cui, bui], _ = theano.scan(step, sequences=np.array([1,2,3]), outputs_info=[theano.shared(value=0, name='W_in'), None])

        func = theano.function([], cui)
        print func()
	def __init__(self, transition_model, observation_model, n_particles, observation_input=None, n_history=1):
		
		self.transition_model=transition_model
		self.observation_model=observation_model
		self.data_dims=observation_model.output_dims
		self.state_dims=transition_model.output_dims
		self.n_particles=n_particles
		self.n_history=n_history
		
		#this is used to keep track of what set of particles corresponds
		#to the previous point in time
		self.time_counter=theano.shared(0)
		
		self.theano_rng=RandomStreams()
		
		#init_particles=np.zeros((n_history+1, n_particles, self.state_dims)).astype(np.float32)
		init_particles=np.random.randn(n_history+1, n_particles, self.state_dims).astype(np.float32)
		init_weights=(np.ones((n_history+1, n_particles))/float(n_particles)).astype(np.float32)
		
		self.particles=theano.shared(init_particles)
		self.weights=theano.shared(init_weights)
		
		self.next_state=self.particles[(self.time_counter+1)%(self.n_history+1)]
		self.current_state=self.particles[self.time_counter%(self.n_history+1)]
		self.previous_state=self.particles[(self.time_counter-1)%(self.n_history+1)]
		
		self.next_weights=self.weights[(self.time_counter+1)%(self.n_history+1)]
		self.current_weights=self.weights[self.time_counter%(self.n_history+1)]
		self.previous_weights=self.weights[(self.time_counter-1)%(self.n_history+1)]
		
		self.proposal_distrib=None
		
		self.true_log_transition_probs=self.transition_model.rel_log_prob
		self.true_log_observation_probs=self.observation_model.rel_log_prob
		
		self.perform_inference=None
		self.resample=None
		self.sample_joint=None
		
		self.observation_input=observation_input
		
		ess=self.compute_ESS()
		self.get_ESS=theano.function([],ess)
		
		n_samps=T.lscalar()
		n_T=T.lscalar()
		data_samples, state_samples, init_state_samples, data_sample_updates=self.sample_future(n_samps,n_T)
		self.sample_from_future=theano.function([n_samps, n_T],[data_samples,state_samples,init_state_samples],updates=data_sample_updates)
		
		self.get_current_particles=theano.function([],self.current_state)
		self.get_current_weights=theano.function([],self.current_weights)
    def test_dtype(self):
        random = RandomStreams(utt.fetch_seed())
        low = tensor.lscalar()
        high = tensor.lscalar()
        out = random.random_integers(low=low, high=high, size=(20,), dtype='int8')
        assert out.dtype == 'int8'
        f = function([low, high], out)

        val0 = f(0, 9)
        assert val0.dtype == 'int8'

        val1 = f(255, 257)
        assert val1.dtype == 'int8'
        assert numpy.all(abs(val1) <= 1)
    def build_set_function(self):

        index_new = T.lscalar('index_new')
        dataset_new = T.lscalar('dataset_new')

        updates = [(self.__index, index_new), (self.__dataset, dataset_new)]

        set_function = theano.function(
            inputs=[index_new, dataset_new],
            outputs=[],
            updates=updates
        )

        return set_function
Example #25
0
    def build_norm_estimation_functions(self, data_sets):
        (corpus_feats, _) = data_sets.get_shared()

        start_idx = T.lscalar("start_idx")  # index to a [mini]batch
        end_idx = T.lscalar("end_idx")  # index to a [mini]batch

        # gives a vector of 0's and 1's where the 0's are correct hypotheses
        norm_func = theano.function(
            inputs=[start_idx, end_idx],
            outputs=[self.zeroth_order_stats(self.x), self.first_order_stats(self.x), self.second_order_stats(self.x)],
            givens={self.x: corpus_feats[start_idx:end_idx]},
        )

        return norm_func
Example #26
0
    def test_dtype(self):
        rng_R = random_state_type()
        low = tensor.lscalar()
        high = tensor.lscalar()
        post_r, out = random_integers(rng_R, low=low, high=high, size=(20,), dtype="int8")
        assert out.dtype == "int8"
        f = compile.function([rng_R, low, high], [post_r, out])

        rng = numpy.random.RandomState(utt.fetch_seed())
        rng0, val0 = f(rng, 0, 9)
        assert val0.dtype == "int8"

        rng1, val1 = f(rng0, 255, 257)
        assert val1.dtype == "int8"
        assert numpy.all(abs(val1) <= 1)
Example #27
0
 def test_infer_shape(self):
     x = tensor.lscalar()
     self._compile_and_check([x], [self.op(x)],
                             [numpy.random.random_integers(3, 50, size=())],
                             self.op_class)
     self._compile_and_check([x], [self.op(x)], [0], self.op_class)
     self._compile_and_check([x], [self.op(x)], [1], self.op_class)
Example #28
0
    def pretraining_functions(self, train_set_x, batch_size):

       
        index = T.lscalar('index') 
        corruption_level = T.scalar('corruption')  
        learning_rate = T.scalar('lr')  
        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size
        
        pretrain_fns = []
        for dA in self.dA_layers:
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            fn = theano.function(
                inputs=[
                    index,
                    theano.In(corruption_level, value=0.1),
                    theano.In(learning_rate, value=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin: batch_end]
                }
            )
            
            pretrain_fns.append(fn)

        return pretrain_fns
Example #29
0
    def get_train_fn(self, dataX, batch_size=1, k=1):
        """
        dataX: theano shared data

        dataY: theano shared label
        """
        learning_rate = T.scalar('lr')
        Beta = T.scalar('beta')
        Gamma = T.scalar('gamma')
        Sparseness = T.scalar('sparseness')

        cost, updates = self._get_cost_update(lr=learning_rate,
                                              beta=Beta,
                                              gamma=Gamma,
                                              s_constrain=Sparseness,
                                              k=k)

        index = T.lscalar('index')

        fn = theano.function(inputs=[index,
                                     theano.Param(learning_rate, default=0.01),
                                     theano.Param(Beta, default=0.1),
                                     theano.Param(Gamma, default=0.0001),
                                     theano.Param(Sparseness, default=0.05)],
                             outputs=cost,
                             updates=updates,
                             givens={self.x: dataX[index * batch_size:(index + 1) * batch_size]},
                             name='train_rbm_S_L2')
        return fn
Example #30
0
def fine_train(nn,datasets,learning_Rate,batch_sizes,epochs):
	train_set_x, train_set_y = datasets[0]
	n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_sizes
	
	train_label = T.cast(train_label,'float64')
	index = T.lscalar()
	x = T.matrix('x')
	y = T.matrix('y')
	min_batch_cost = []
	if nn is None:
		mynn = ForwordNN(x,y,n_in,n_out,hidden_sizes)
	else:
		mynn=nn
	cost,update = mynn.get_cost_update(x,y,learning_Rate)
	train_nn = theano.function([index],
				cost,
				updates = update,
				givens = {
							x:train_data[index*batch_sizes:(index+1)*batch_sizes,:],
							y:train_label[index*batch_sizes:(index+1)*batch_sizes,:]
						}
				)
	for num_epochs in range(epochs):
		t1=time.time()
		for num_batch in xrange(n_train_batchs):
			min_batch_cost.append(train_nn(num_batch))
		t2=time.time()
		print 'The %d/%dth training,takes %f seconds,cost is %f' %(num_epochs+1,epochs,(t2-t1),np.mean(min_batch_cost))
	return mynn	
def sgd_optimization_mnist(learning_rate=0.13,
                           n_epochs=1000,
                           dataset='mnist.pkl.gz',
                           batch_size=600):

    # 导入数据
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # 计算miniBatches数
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    index = T.lscalar()  # index to a [mini]batch

    # 产生数据X, y矩阵
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # 利用逻辑回归训练模型, 每个样本有28 * 28个特征,输出为10
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # 计算代价函数
    cost = classifier.negative_log_likelihood(y)

    # 计算误分数
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # 验证模型
    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # 计算cost的梯度
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # 更新参数
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # 优化模型
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # "early-stopping" 参数
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):  # 迭代开始
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # 计算0-1损失
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    # 保存最优模型的参数
                    with open('best_model.pkl', 'w') as f:
                        cPickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Example #32
0
def test_dA(learning_rate=0.01,
            training_epochs=15,
            dataset="",
            modelfile="",
            batch_size=20,
            output_folder='dA_plots',
            n_visible=1346,
            n_hidden=100,
            beta=0,
            rho=0.5,
            noise=0.3,
            linear=False,
            lost_func='KL',
            loader=None):

    data = map(lambda x: x.partition(' ')[2], open(dataset))
    train_set_x, n_visible = loader.load_training_data(data)

    print >> sys.stderr, "number of training example", len(train_set_x)
    print >> sys.stderr, "batch size", batch_size

    print >> sys.stderr, "number of visible nodes", n_visible
    print >> sys.stderr, "number of hidden nodes", n_hidden

    print >> sys.stderr, "corruption_level", noise
    print >> sys.stderr, "sparse rate", rho, "weight", beta

    print >> sys.stderr, "learning rate", learning_rate
    # compute number of minibatches for training, validation and testing
    n_train_batches = len(train_set_x) / batch_size
    #print(n_train_batches)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    data_x = numpy.array([[0 for i in range(n_visible)]
                          for j in range(batch_size)])
    shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX),
                             borrow=True)

    #####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=n_visible,
            n_hidden=n_hidden)

    cost, updates = da.get_cost_updates(corruption_level=noise,
                                        learning_rate=learning_rate,
                                        beta=beta,
                                        rho=rho,
                                        linear=linear,
                                        lost_func=lost_func)

    train_da = theano.function([], cost, updates=updates, givens={x: shared_x})

    start_time = time.clock()

    # TRAINING #
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            sub = train_set_x[batch_index * batch_size:(1 + batch_index) *
                              batch_size]
            sub = numpy.array(sub)
            shared_x.set_value(sub)
            c.append(train_da())
        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, (' ran for %.2fm' % (training_time / 60.))

    modelfile = gzip.open(modelfile, "wb")
    cPickle.dump([n_visible, n_hidden], modelfile)
    cPickle.dump([da.W, da.b, da.b_prime], modelfile)
    modelfile.close()
Example #33
0
    def learning_feature(
        self,
        train_set,
        n_epochs, learning_rate, batch_size,
        corruption_level, balance_coef
    ):
        # perform `denoising`
        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        # map the corrupted input to hidden layer
        y = T.nnet.sigmoid(T.dot(tilde_x, self.W) + self.b)
        # maps back hidden representation to unsupervised reconstruction
        z1 = T.nnet.sigmoid(T.dot(y, self.Wu) + self.bu)
        L1 = T.mean(-T.sum(self.x * T.log(z1) + (1 - self.x) * T.log(1 - z1), axis=1))

        # perform one-sided regression to fit the cost !
        z2 = T.dot(y, self.Ws) + self.bs
        # cost_vector = T.matrix('cost_vector')
        # Z_nk = T.matrix('Z_nk')
        # xi = T.maximum((Z_nk * (z2 - cost_vector)), 0.) # xi is a matrix
        # L2 = T.sum(xi)
        # TODO: smooth logistic loss function (upper bound)
        delta = T.log(1 + T.exp(Z_nk * (z2 - cost_vector)))
        L2 = T.sum(delta)

        # symbolic variable for balance_coef
        bc = T.scalar('bc')

        cost = L1 + bc * L2

        gparams = T.grad(cost, self.params)

        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        batch_index = T.lscalar('batch_index')

        train_set_x, train_set_y, train_set_c = train_set

        train_set_z = np.zeros(train_set_c.shape) - 1
        for i in xrange(train_set_z.shape[0]):
            train_set_z[i][train_set_y[i]] = 1

        train_set_x = make_shared_data(train_set_x)
        train_set_c = make_shared_data(train_set_c)
        train_set_z = make_shared_data(train_set_z)

        pretrain_model = theano.function(
            inputs=[batch_index, bc],
            outputs=[cost, L1, L2], # TODO: debug
            updates=updates,
            givens={
                self.x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                cost_vector: train_set_c[batch_index * batch_size: (batch_index + 1) * batch_size],
                Z_nk: train_set_z[batch_index * batch_size: (batch_index + 1) * batch_size]
            },
            name='pretrain_model'
        )

        n_batches = train_set_x.get_value().shape[0] / batch_size
        for epoch in xrange(n_epochs):
            epoch_cost = 0.
            L1_cost = 0.
            L2_cost = 0.
            for batch in xrange(n_batches):
                batch_cost = pretrain_model(batch, balance_coef)
                epoch_cost += batch_cost[0]
                L1_cost += batch_cost[1]
                L2_cost += batch_cost[2]
            epoch_cost /= n_batches
            L1_cost /= n_batches
            L2_cost /= n_batches
            print '        epoch #%d, loss = (%f, %f, %f)' % (epoch + 1, epoch_cost, L1_cost, L2_cost)

        y_new = T.nnet.sigmoid(T.dot(self.x, self.W) + self.b)

        transform_data = theano.function(
            inputs=[],
            outputs=y_new,
            givens={
                self.x: train_set_x
            },
            name='trainform_data'
        )

        return [transform_data(), train_set_y, train_set_c.get_value()]
Example #34
0
def test_mlp_parity(learning_rate=0.01,
                    L1_reg=0.00,
                    L2_reg=0.0001,
                    n_epochs=100,
                    batch_size=64,
                    n_hidden=500,
                    n_hiddenLayers=1,
                    verbose=False):

    reader = csv.reader(open("joint_knee.csv", "rb"), delimiter=',')

    x = list(reader)
    #print x
    result = numpy.array(x)

    #print result.shape

    def score_to_numeric(x, a):
        if (x == 'Hospice - Home'):
            return 11
        if (x == 'Psychiatric Hospital or Unit of Hosp'):
            return 10
        if (x == 'Hospice - Medical Facility'):
            return 9
        if (x == 'Expired'):
            return 8
        if (x == 'Facility w/ Custodial/Supportive Care'):
            return 7
        if (x.lower() == 'left against medical advice'):
            return 6
        if (x.lower() == 'short-term hospital'):
            return 5
        if (x.lower() == 'multi-racial' or x.lower() == 'home or self care'):
            return 4
        if (x.lower() == 'other race' or x.lower() == 'emergency'
                or x.lower() == 'skilled nursing home'
                or x.lower() == 'not available'):
            return 3
        if (x.lower() == 'm' or x.lower() == 'black/african american'
                or x.lower() == 'urgent'
                or x.lower() == 'inpatient rehabilitation facility'):
            return 2
        if (x.lower() == 'f' or x.lower() == 'white' or x.lower() == 'elective'
                or x.lower() == 'home w/ home health services'):
            return 1
        if (a == 1):
            return int(x[:2])
        if (a == 2):
            return float(x[1:])
        else:
            return float(x)

    rownum = 0
    for row in result:
        # Save header row.
        if rownum == 0:
            rownum += 1
            header = row
            for i in range(0, len(header)):
                if header[i].lower() == 'gender':
                    gender = i
                if header[i].lower() == 'race':
                    race = i
                if header[i].lower() == 'type of admission':
                    admi = i
                if header[i].lower() == 'patient disposition':
                    disp = i
                if header[i].lower() == 'age group':
                    age = i
                if header[i].lower() == 'total charges':
                    price = i
        else:
            row[gender] = score_to_numeric(row[gender], 0)
            row[race] = score_to_numeric(row[race], 0)
            row[admi] = score_to_numeric(row[admi], 0)
            row[disp] = score_to_numeric(row[disp], 0)
            row[age] = score_to_numeric(row[age], 1)
            row[price] = score_to_numeric(row[price], 2)
            for i in range(0, len(row)):
                row[i] = float(row[i])
                #y = row[i].astype(numpy.float)
                #row[i] = y
                #print type(row[i])

    #print type(result)
    #result = numpy.array(result).astype('float')
    #print result[1:(len(result)),1:]
    res = result[1:(len(result)), 1:].astype(numpy.float)
    for i in range(len(res)):
        for j in range(len(res[0])):
            if (j == 9):
                res[i, j] = int(round(res[i, j] / 10000))
            else:
                res[i, j] = int(round(res[i, j]))

    myset = set(res[:, 9])
    nout = len(myset)

    y = res[:, 9]
    #print y
    x = res[:, 0:9]

    iris = load_iris()
    clf = ExtraTreesClassifier()
    clf = clf.fit(x, y)
    model = SelectFromModel(clf, prefit=True)
    X_new = model.transform(x)
    data = np.c_[X_new, y]

    totallen = len(data)
    numpy.random.shuffle(data)
    training, validation, testing = data[:totallen / 2, :], data[totallen / 2:(
        3 * totallen / 4), :], data[(3 * totallen / 4):, :]

    l = len(data[0]) - 1

    train_set = [training[:, 0:l], training[:, l]]
    valid_set = [validation[:, 0:l], validation[:, l]]
    test_set = [testing[:, 0:l], testing[:, l]]

    #print train_set
    #print valid_set
    #print test_set

    # Convert raw dataset to Theano shared variables.
    train_set_x, train_set_y = shared_dataset(train_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    test_set_x, test_set_y = shared_dataset(test_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=l,
                       n_hidden=n_hidden,
                       n_out=len(myset),
                       n_hiddenLayers=n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model, n_train_batches,
             n_valid_batches, n_test_batches, n_epochs, verbose)

    y_p_train = theano.function(inputs=[],
                                outputs=[classifier.logRegressionLayer.y_pred],
                                givens={x: train_set_x})

    y_predict = theano.function(inputs=[],
                                outputs=[classifier.logRegressionLayer.y_pred],
                                givens={x: test_set_x})
    y_pred1 = y_p_train()
    y_pred2 = y_predict()

    return y_pred1, y_pred2
Example #35
0
def _construct_mlp(datasets,
                   learning_rate=0.01,
                   L1_reg=0.00,
                   L2_reg=0.0001,
                   n_epochs=1000,
                   batch_size=20,
                   n_hidden=200):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    Note: Parameters need tuning.

    :type datasets: tuple
    :param datasets: (inputs, targets)

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type batch_size: int
    :param batch_size: number of examples in one batch

    :type n_hidden: int
    :param n_hidden: number of hidden units to be used in class HiddenLayer

     """
    inputs, targets = datasets
    temp_train_set_x = []
    temp_train_set_y = []
    train_set_x = []
    train_set_y = []
    valid_set_x = []
    valid_set_y = []
    test_set_x = []
    test_set_y = []

    # stratified k-fold to split test and temporary train, which contains
    # validation and train
    skf = StratifiedShuffleSplit(targets, 1, 0.2)
    for temp_train_index, test_index in skf:
        # print("TEMP_TRAIN:", temp_train_index, "TEST:", test_index)
        temp_train_set_x.append(inputs[temp_train_index])
        temp_train_set_y.append(targets[temp_train_index])
        test_set_x.append(inputs[test_index])
        test_set_y.append(targets[test_index])

    # convert from list-wrapping array to array
    test_set_x = test_set_x[0]
    test_set_y = test_set_y[0]
    temp_train_set_x = temp_train_set_x[0]
    temp_train_set_y = temp_train_set_y[0]

    # stratified k-fold to split valid and train
    skf = StratifiedShuffleSplit(temp_train_set_y, 1, 0.25)
    for train_index, valid_index in skf:
        # print("TRAIN: ", train_index, ", VALID: ", valid_index)
        train_set_x.append(temp_train_set_x[train_index])
        train_set_y.append(temp_train_set_y[train_index])
        valid_set_x.append(temp_train_set_x[valid_index])
        valid_set_y.append(temp_train_set_y[valid_index])

    # convert from list-wrapping array to array
    train_set_x = train_set_x[0]
    train_set_y = train_set_y[0]
    valid_set_x = valid_set_x[0]
    valid_set_y = valid_set_y[0]

    # check shape
    # print("train_set_x shape: " + str(train_set_x.shape))
    # print("train_set_y shape: " + str(train_set_y.shape))
    # print("valid_set_x shape: " + str(valid_set_x.shape))
    # print("valid_set_y shape: " + str(valid_set_y.shape))
    # print("test_set_x shape: " + str(test_set_x.shape))
    # print("test_set_y shape: " + str(test_set_y.shape))

    # convert to theano.shared variable
    train_set_x = theano.shared(value=train_set_x, name='train_set_x')
    train_set_y = theano.shared(value=train_set_y, name='train_set_y')
    valid_set_x = theano.shared(value=valid_set_x, name='valid_set_x')
    valid_set_y = theano.shared(value=valid_set_y, name='valid_set_y')
    test_set_x = theano.shared(value=test_set_x, name='test_set_x')
    test_set_y = theano.shared(value=test_set_y, name='test_set_y')

    # compute number of minibatches for training, validation and testing
    n_train_batches = int(train_set_x.get_value().shape[0] / batch_size)
    n_valid_batches = int(valid_set_x.get_value().shape[0] / batch_size)
    n_test_batches = int(test_set_x.get_value().shape[0] / batch_size)

    # check batch
    # print("n_train_batches:" + str(n_train_batches))
    # print("n_valid_batches:" + str(n_valid_batches))
    # print("n_test_batches:" + str(n_test_batches))

    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.lvector('y')  # the labels are presented as 1D vector of [int] labels

    # set a random state that is related to the time
    # noinspection PyUnresolvedReferences
    rng = numpy.random.RandomState(int((time.time())))

    # construct the MLP class
    classifier = MLP(rng=rng,
                     input_=x,
                     n_in=_std_height * _std_width,
                     n_hidden=n_hidden,
                     n_out=len(_captcha_provider.chars))

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    # compute the gradient of cost with respect to theta (sorted in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        mode='FAST_RUN')

    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant

    if T.lt(n_train_batches, patience / 2):
        validation_frequency = n_train_batches
    else:
        validation_frequency = patience / 2
    # go through this many minibatches before checking the network
    # on the validation set; in this case we check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.time()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for minibatch_index in range(n_train_batches):
            # noinspection PyUnusedLocal
            minibatch_avg_cost = train_model(minibatch_index)
            iteration = (epoch - 1) * n_train_batches + minibatch_index

            if (iteration + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch {0}, minibatch {1}/{2}, validation error {3}'.
                      format(epoch, minibatch_index + 1, n_train_batches,
                             this_validation_loss * 100.))
                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iteration * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iteration

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(
                        '    epoch {0}, minibatch {1}/{2}, test error of best '
                        'model {3}'.format(epoch, minibatch_index + 1,
                                           n_train_batches, test_score * 100))

            if patience <= iteration:
                done_looping = True
                break

    end_time = time.time()
    print('Optimization complete. Best validation score of {0} obtained at '
          'iteration {1}, with test performance {2}'.format(
              best_validation_loss * 100, best_iter + 1, test_score * 100))
    print('Time used for testing the mlp is', end_time - start_time)
    return classifier
Example #36
0
def validate(conf, net_weights):

    logger.info("... loading data")
    logger.debug("Theano.config.floatX is %s" % theano.config.floatX)

    path = conf['data']['location']
    batch_size = 1
    assert (type(batch_size) is int)
    logger.info('Batch size %d' % (batch_size))

    try:
        x_train_allscales = try_pickle_load(path + 'x_' + conf['run-dataset'] +
                                            '.bin')
        x_train = x_train_allscales[0]  # first scale
        y_train = try_pickle_load(path + 'y_' + conf['run-dataset'] + '.bin')
    except IOError:
        logger.error("Unable to load Theano dataset from %s", path)
        exit(1)

    y_valid = try_pickle_load(path + 'y_validation.bin')
    print path + 'y_validation.bin'
    n_classes = int(max(y_train.max(), y_valid.max()) + 1)
    logger.info("Dataset has %d classes", n_classes)

    image_shape = (x_train.shape[-2], x_train.shape[-1])
    logger.info("Image shape is %s", image_shape)

    logger.info('Dataset has %d images' % x_train.shape[0])
    logger.info('Input data has shape of %s ', x_train.shape)

    # compute number of minibatches
    n_train_batches = x_train.shape[0] // batch_size

    logger.info("Number of train batches %d" % n_train_batches)

    logger.info("... building network")

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # input is presented as (batch, channel, x, y)
    x0 = T.tensor4('x')
    x2 = T.tensor4('x')
    x4 = T.tensor4('x')
    # matrix row - batch index, column label of pixel
    # every column is a list of pixel labels (image matrix reshaped to list)
    y = T.imatrix('y')

    # create all layers
    builder_name = conf['network']['builder-name']
    layers, out_shape, conv_out = get_net_builder(builder_name)(
        x0,
        x2,
        x4,
        y,
        batch_size,
        classes=n_classes,
        image_shape=image_shape,
        nkerns=conf['network']['layers'][:3],
        seed=conf['network']['seed'],
        activation=lReLU,
        bias=0.001,
        sparse=False)
    logger.info("Image out shape is %s", out_shape)

    y_train_shape = (y_train.shape[0], out_shape[0], out_shape[1])

    # resize marked images to out_size of the network
    y_train_downscaled = np.empty(y_train_shape)
    # for i in xrange(y_train.shape[0]):
    #     y_train_downscaled[i] = resize_marked_image(y_train[i], out_shape)

    x_train_shared, y_train_shared = \
        shared_dataset((x_train,
                        y_train_downscaled))
    x2_train_shared = theano.shared(x_train_allscales[1], borrow=True)
    x4_train_shared = theano.shared(x_train_allscales[2], borrow=True)

    ###############
    # BUILD MODEL #
    ###############
    logger.info("... building model")

    layers, new_layers = extend_net_w1l_drop(
        conv_out,
        conf['network']['layers'][-2] * 3,
        layers,
        n_classes,
        nkerns=conf['network']['layers'][-1:],
        seed=conf['network']['seed'],
        activation=lReLU,
        bias=0.001)

    test_model = theano.function(
        [index], [layers[0].y_pred],
        givens={
            x0: x_train_shared[index * batch_size:(index + 1) * batch_size],
            x2: x2_train_shared[index * batch_size:(index + 1) * batch_size],
            x4: x4_train_shared[index * batch_size:(index + 1) * batch_size]
        })

    # try to load weights
    try:
        if net_weights is not None:
            for net_weight, layer in zip(net_weights, layers):
                layer.set_weights(net_weight)
            logger.info("Loaded net weights from file.")
            net_weights = None
    except:
        logger.error("Uncompatible network to load weights in")
        exit(1)

    set_layers_training_mode(layers, 0)

    logger.info("---> Results - no postprocessing")
    start_time = time.clock()
    validation = [
        test_model(i)[0].reshape(NET_OUT_SHAPE)
        for i in xrange(n_train_batches)
    ]
    end_time = time.clock()
    logfiles_path = conf['data']['location'] +\
        'samples_' + conf['run-dataset'] + '.log'
    logger.info("Validated %d images in %.2f seconds", n_train_batches,
                end_time - start_time)
    get_stats(validation, y_train, layers[0].n_classes,
              conf['data']['dont-care-classes'], logfiles_path,
              conf['run-dataset'])

    logger.info("---> Results - superpixels")
    stats_func = lambda p: get_stats(validation,
                                     y_train,
                                     layers[0].n_classes,
                                     conf['data']['dont-care-classes'],
                                     logfiles_path,
                                     conf['run-dataset'],
                                     postproc=oversegment,
                                     postproc_params=p,
                                     show=False,
                                     log=False)
    start_time = time.clock()
    best_params = find_best_superpixel_params(stats_func)
    end_time = time.clock()
    logger.info("Done in %.2f seconds", end_time - start_time)
    logger.info("Best params are %s", best_params)

    #   run one more time with params, log output this time
    get_stats(validation,
              y_train,
              layers[0].n_classes,
              conf['data']['dont-care-classes'],
              logfiles_path,
              conf['run-dataset'],
              postproc=oversegment,
              postproc_params=best_params,
              show=False)
Example #37
0
    def SGD(self, training_data, epochs, mini_batch_size, eta,
            validation_data, test_data, lmbda=0.0):
        """Train the network using mini-batch stochastic gradient descent."""
        training_x, training_y = training_data
        validation_x, validation_y = validation_data
        test_x, test_y = test_data

        # compute number of minibatches for training, validation and testing
        num_training_batches = size(training_data)/mini_batch_size
        num_validation_batches = size(validation_data)/mini_batch_size
        num_test_batches = size(test_data)/mini_batch_size

        # define the (regularized) cost function, symbolic gradients, and updates
        l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
        cost = self.layers[-1].cost(self)+\
               0.5*lmbda*l2_norm_squared/num_training_batches
        grads = T.grad(cost, self.params)
        updates = [(param, param-eta*grad)
                   for param, grad in zip(self.params, grads)]

        # define functions to train a mini-batch, and to compute the
        # accuracy in validation and test mini-batches.
        i = T.lscalar() # mini-batch index
        train_mb = theano.function(
            [i], cost, updates=updates,
            givens={
                self.x:
                training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
                self.y:
                training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })
        validate_mb_accuracy = theano.function(
            [i], self.layers[-1].accuracy(self.y),
            givens={
                self.x:
                validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
                self.y:
                validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })
        test_mb_accuracy = theano.function(
            [i], self.layers[-1].accuracy(self.y),
            givens={
                self.x:
                test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
                self.y:
                test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })
        self.test_mb_predictions = theano.function(
            [i], self.layers[-1].y_out,
            givens={
                self.x:
                test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })
        # Do the actual training
        best_validation_accuracy = 0.0
        for epoch in xrange(epochs):
            for minibatch_index in xrange(num_training_batches):
                iteration = num_training_batches*epoch+minibatch_index
                if iteration % 1000 == 0:
                    print("Training mini-batch number {0}".format(iteration))
                cost_ij = train_mb(minibatch_index)
                if (iteration+1) % num_training_batches == 0:
                    validation_accuracy = np.mean(
                        [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
                    print("Epoch {0}: validation accuracy {1:.2%}".format(
                        epoch, validation_accuracy))
                    if validation_accuracy >= best_validation_accuracy:
                        print("This is the best validation accuracy to date.")
                        best_validation_accuracy = validation_accuracy
                        best_iteration = iteration
                        if test_data:
                            test_accuracy = np.mean(
                                [test_mb_accuracy(j) for j in xrange(num_test_batches)])
                            print('The corresponding test accuracy is {0:.2%}'.format(
                                test_accuracy))
        print("Finished training network.")
        print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
            best_validation_accuracy, best_iteration))
        print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
def test_multilayer_perceptron(learning_rate=0.01,
                               L1_reg=0.00,
                               L2_reg=0.0001,
                               n_epochs=1000,
                               dataset='mnist.pkl.gz',
                               batch_size=20,
                               n_hidden=500):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MultiLayerPerceptron class
    classifier = MultiLayerPerceptron(rng=rng,
                                      input=x,
                                      n_in=28 * 28,
                                      n_hidden=n_hidden,
                                      n_out=10)

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sorted in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
Example #39
0
File: cnn.py Project: MartinHua/cnn
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=200,
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]

    n_train_batches //= batch_size
    n_valid_batches //= batch_size

    index = T.lscalar()

    x = T.matrix('x')
    y = T.ivector('y')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(rng=rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(rng=rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    layer2 = HiddenLayer(rng=rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=300,
                         activation=T.tanh)

    layer3 = LogisticRegression(input=layer2.output, n_in=300, n_out=10)

    cost = layer3.negative_log_likelihood(y)

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    params = layer3.params + layer2.params + layer1.params + layer0.params

    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995

    validation_frequency = min(n_train_batches, patience // 2)

    best_validation_loss = numpy.inf
    best_iter = 0

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:

                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    fo = open('best_cnn_model.pkl', 'wb')
                    pickle.dump([[layer0.W, layer0.b], [layer1.W, layer1.b],
                                 [layer2.W, layer2.b], [layer3.W, layer3.b]],
                                fo)
                    fo.close()
            if patience <= iter:
                done_looping = True
                break

    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, ' %
          (best_validation_loss * 100., best_iter + 1))
Example #40
0
def sgd_optimization_mnist(learning_rate=0.13,
                           n_epochs=1000,
                           dataset='mnist.pkl.gz',
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of best'
                           ' model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Example #41
0
def test_rbm(learning_rate=0.1, training_epochs=15,
             dataset='mnist.pkl.gz', batch_size=20,
             n_chains=20, n_samples=10, output_folder='rbm_plots',
             n_hidden=500):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: path the the pickled dataset

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = RBM(input=x, n_visible=28 * 28,
              n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,
                                         persistent=persistent_chain, k=15)

    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function([index], cost,
           updates=updates,
           givens={x: train_set_x[index * batch_size:
                                  (index + 1) * batch_size]},
           name='train_rbm')

    plotting_time = 0.
    start_time = time.clock()

    # go through training epochs
    for epoch in xrange(training_epochs):

        print 'starting epoch %d... ' % epoch
        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            print 'batch: %d' % batch_index
            mean_cost += [train_rbm(batch_index)]

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)

        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = PIL.Image.fromarray(tile_raster_images(
                 X=rbm.W.get_value(borrow=True).T,
                 img_shape=(28, 28), tile_shape=(10, 10),
                 tile_spacing=(1, 1)))
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = time.clock()
        plotting_time += (plotting_stop - plotting_start)

    end_time = time.clock()

    pretraining_time = (end_time - start_time) - plotting_time

    print ('Training took %f minutes' % (pretraining_time / 60.))

    #################################
    #     Sampling from the RBM     #
    #################################
    # find out the number of test samples
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]

    # pick random test examples, with which to initialize the persistent chain
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(numpy.asarray(
            test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
            dtype=theano.config.floatX))

    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field) define a
    # function that does `plot_every` steps before returning the
    # sample for plotting
    [presig_hids, hid_mfs, hid_samples, presig_vis,
     vis_mfs, vis_samples], updates =  \
                        theano.scan(rbm.gibbs_vhv,
                                outputs_info=[None,  None, None, None,
                                              None, persistent_vis_chain],
                                n_steps=plot_every)

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]],
                                updates=updates,
                                name='sample_fn')

    # create a space to store the image for plotting ( we need to leave
    # room for the tile_spacing as well)
    image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1),
                             dtype='uint8')
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
                X=vis_mf,
                img_shape=(28, 28),
                tile_shape=(1, n_chains),
                tile_spacing=(1, 1))
        # construct image

    image = PIL.Image.fromarray(image_data)
    image.save('samples.png')
    os.chdir('../')
Example #42
0
def evaluate_lenet5(learning_rate=0.10,
                    n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[16, 16, 16, 12, 12, 12],
                    batch_size=500):

    rng = numpy.random.RandomState(32324)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    index = T.lscalar()  # index for each mini batch
    train_epoch = T.lscalar('train_epoch')

    x = T.matrix('x')
    y = T.ivector('y')

    # ------------------------------- Building Model ----------------------------------
    print "...Building the model"

    layer_0_input = x.reshape((batch_size, 1, 28, 28))

    # output image size = (28-5+1+)/1 = 24
    layer_0 = LeNetConvPoolLayer(rng,
                                 input=layer_0_input,
                                 image_shape=(batch_size, 1, 28, 28),
                                 filter_shape=(nkerns[0], 1, 5, 5),
                                 poolsize=(1, 1))

    #output image size = (24-3+1) = 22
    layer_1 = LeNetConvPoolLayer(rng,
                                 input=layer_0.output,
                                 image_shape=(batch_size, nkerns[0], 24, 24),
                                 filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                 poolsize=(1, 1))

    #output image size = (22-3+1)/2 = 10
    layer_2 = LeNetConvPoolLayer(rng,
                                 input=layer_1.output,
                                 image_shape=(batch_size, nkerns[1], 22, 22),
                                 filter_shape=(nkerns[2], nkerns[1], 3, 3),
                                 poolsize=(2, 2))

    #output image size = (10-3+1)/2 = 4
    layer_3 = LeNetConvPoolLayer(rng,
                                 input=layer_2.output,
                                 image_shape=(batch_size, nkerns[2], 10, 10),
                                 filter_shape=(nkerns[3], nkerns[2], 3, 3),
                                 poolsize=(2, 2))

    #output image size = (4-3+2+1) = 4
    layer_4 = LeNetConvPoolLayer(rng,
                                 input=layer_3.output,
                                 image_shape=(batch_size, nkerns[3], 4, 4),
                                 filter_shape=(nkerns[4], nkerns[3], 3, 3),
                                 poolsize=(1, 1),
                                 border_mode=1)

    #output image size = (4-3+1)/2 = 2
    layer_5 = LeNetConvPoolLayer(rng,
                                 input=layer_4.output,
                                 image_shape=(batch_size, nkerns[4], 4, 4),
                                 filter_shape=(nkerns[5], nkerns[4], 3, 3),
                                 poolsize=(2, 2),
                                 border_mode=1)

    # make the input to hidden layer 2 dimensional
    layer_6_input = layer_5.output.flatten(2)

    layer_6 = HiddenLayer(rng,
                          input=layer_6_input,
                          n_in=nkerns[5] * 2 * 2,
                          n_out=200,
                          activation=T.tanh)

    layer_7 = LogReg(input=layer_6.output, n_in=200, n_out=10)

    teacher_p_y_given_x = theano.shared(numpy.asarray(
        pickle.load(open('prob_best_model.pkl', 'rb')),
        dtype=theano.config.floatX),
                                        borrow=True)
    p_y_given_x = T.matrix('p_y_given_x')
    e = theano.shared(value=0, name='e', borrow=True)

    cost = layer_7.neg_log_likelihood(
        y) + 2.0 / (e) * T.mean(-T.log(layer_7.p_y_given_x) * p_y_given_x -
                                layer_7.p_y_given_x * T.log(p_y_given_x))

    tg = theano.shared(numpy.asarray(pickle.load(
        open('modified_guided_data.pkl', 'rb')),
                                     dtype=theano.config.floatX),
                       borrow=True)
    guiding_weights = T.tensor4('guiding_weights')
    #guide_cost = T.mean(-T.log(layer_3.output)*guiding_weights - layer_3.output*T.log(guiding_weights))
    guide_cost = T.mean((layer_3.output - guiding_weights)**2)
    test_model = theano.function(
        [index],
        layer_7.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer_7.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # list of parameters

    params = layer_7.params + layer_6.params + layer_5.params + layer_4.params + layer_3.params + layer_2.params + layer_1.params + layer_0.params
    params_gl = layer_3.params + layer_2.params + layer_1.params + layer_0.params
    # import pdb
    # pdb.set_trace()
    grads_gl = T.grad(guide_cost, params_gl)
    updates_gl = [(param_i, param_i - learning_rate * grad_i)
                  for param_i, grad_i in zip(params_gl, grads_gl)]

    grads = T.grad(cost, params)
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index, train_epoch],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            p_y_given_x: teacher_p_y_given_x[index],
            e: train_epoch
        })
    train_till_guided_layer = theano.function(
        [index],
        guide_cost,
        updates=updates_gl,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            guiding_weights: tg[index]
        },
        on_unused_input='ignore')

    # -----------------------------------------Starting Training ------------------------------

    print('..... Training ')

    # for early stopping
    patience = 10000
    patience_increase = 2

    improvement_threshold = 0.95

    validation_frequency = min(n_train_batches, patience // 2)

    best_validation_loss = numpy.inf  # initialising loss to be inifinite
    best_itr = 0
    test_score = 0

    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            if epoch < n_epochs / 5:
                cost_ij_guided = train_till_guided_layer(minibatch_index)
            cost_ij = train_model(minibatch_index, epoch)

            if (iter + 1) % validation_frequency == 0:
                # compute loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                # import pdb
                # pdb.set_trace()

                with open('Student_6_terminal_out_2', 'a+') as f_:
                    f_.write(
                        'epoch %i, minibatch %i/%i, validation error %f %% \n'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

                # check with best validation score till now
                if this_validation_loss < best_validation_loss:

                    # improve
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_itr = iter

                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    with open('Student_6_terminal_out_2', 'a+') as f_:
                        f_.write(
                            'epoch %i, minibatch %i/%i, testing error %f %%\n'
                            % (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))
                    with open('best_model_7layer_2.pkl', 'wb') as f:
                        pickle.dump(params, f)
                    with open('Results_student_6_2.txt', 'wb') as f1:
                        f1.write(str(test_score * 100) + '\n')
            #if patience <= iter:
            #	done_looping = True
            #	break

    end_time = timeit.default_timer()
    with open('Student_6_terminal_out_2', 'a+') as f_:
        f_.write('Optimization complete\n')
        f_.write(
            'Best validation score of %f %% obtained at iteration %i with test performance %f %% \n'
            % (best_validation_loss * 100., best_itr, test_score * 100))
        f_.write('The code ran for %.2fm\n' % ((end_time - start_time) / 60.))
Example #43
0
n_train_batches = n_batches

print 'Number of song for training in single chunk file: ' + str(
    n_train_batches)

###########################################################
###########################################################

############ CONSTRUCTING MODEL ARCHITECTURE ##############
###########################################################

print 'Building model...'

# allocate symbolic variables for the data

index = T.lscalar()  # index to a [mini]batch
x = T.matrix(
    'x'
)  # the data is presented as a vector of inputs with many exchangeable examples of this vector
rng = numpy.random.RandomState(1234)

# Reshape matrix of rasterized images of shape (batch_size, 2000 * 60)
# to a 4D tensor, compatible with our LeNetConvPoolLayer
layer0_input = x.reshape((minibatch_size, 1, 1000, 60))

layer0 = LeNetConvPoolLayer(rng,
                            input=layer0_input,
                            image_shape=(minibatch_size, 1, 1000, 60),
                            filter_shape=(layer0_filters, 1, 5, 5),
                            poolsize=(5, 1),
                            dim2=1)
def test_Highway_Momentum_output(datasets, learning_rate=0.1, lr_decay=0.95, momentum=0.9, n_epochs=200, n_hidden=10, n_hiddenLayers=1, n_highwayLayers = 5, 
                 activation_hidden = T.nnet.nnet.relu, activation_highway = T.nnet.nnet.sigmoid, b_T = -5, L1_reg = 0,
                 L2_reg = 0, batch_size=500,verbose=False, early_stopping=True):
    
    rng = numpy.random.RandomState(23455)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
        
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    
    n_in = train_set_x.get_value(borrow=True).shape[1]
    
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    itr = T.fscalar()  # index to an iteration

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    
    highway_net = HighwayNetwork(
        rng=rng, 
        input=x,
        n_in=n_in, 
        n_hidden=n_hidden, 
        n_out=10, 
        n_hiddenLayers=n_hiddenLayers, 
        n_highwayLayers = n_highwayLayers,
        activation_hidden = activation_hidden,
        activation_highway = activation_highway,
        b_T = b_T
    )
    
    print('... building the model')
    
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = ( highway_net.logRegressionLayer.negative_log_likelihood(y)
        #+ L1_reg * L1
        #+ L2_reg * L2_sqr
    )
            
    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch    
    test_model = theano.function(
        inputs=[index],
        outputs=highway_net.logRegressionLayer.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=highway_net.logRegressionLayer.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    updates = MomentumG(cost, highway_net.params, itr, lr_base=learning_rate, 
                                lr_decay=lr_decay, momentum=momentum)
    
    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index,itr],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        },
        on_unused_input='ignore'
    )
    
    gate_output = theano.function(inputs=[index],
                                  outputs=highway_net.gate_outputs,
                                  givens={
                                    x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                 })
    
    block_output = theano.function(inputs=[index],
                                   outputs=highway_net.block_outputs,
                                   givens={
                                    x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                 })
        
    result = train_nn(train_model, validate_model, test_model, 
                      n_train_batches, n_valid_batches, n_test_batches, n_epochs, 
                      gate_output, block_output, verbose, early_stopping)
             
    res =  pd.DataFrame([result.RunningTime, result.BestXEntropy, result.TestPerformance, result.BestValidationScore,
                         n_epochs, result.N_Epochs, activation_hidden, activation_highway, L2_reg, L1_reg,
                         batch_size, result.N_Iterations, n_hidden, n_hiddenLayers, n_highwayLayers, learning_rate, lr_decay, momentum, result.Patience],                         
                        index=['Running time','XEntropy','Test performance','Best Validation score',
                                 'Max epochs','N epochs','Activation function - hidden', 'Activation function - highway','L2_reg parameter',
                                 'L1_reg parameter','Batch size','Iterations', 'Hidden neurons per layer', 'Hidden Layers', 'Highway Layers', 
                                 'Learning rate', 'lr_decay', 'momentum', 'Patience']).transpose()
    
    res.to_csv('Results.csv',mode='a',index=None,header=False)
    idx = pd.read_csv('Results.csv').index.values[-1]
    
    pickle.dump(result.XEntropy,open("cross_entropy"+str(idx)+".p","wb"))
    print('Cross entropy is stored in cross_entropy'+str(idx)+'.p') 
    
    return highway_net.params, result.Gate_outputs, result.Block_outputs 
Example #45
0
W = csp(X_train, Y_train)
V = np.ones((301, 1))
sc = classify_csp(W, V, X_train, Y_train, X_test, Y_test)

# Fine tune CSP pipeline
# Note input data dim: [batches, time, channel]
# Filter dim: [channel_in, channel_out]
X_train_T = theano.shared(X_train.transpose(2, 0, 1))
X_test_T = theano.shared(X_test.transpose(2, 0, 1))
Y_train_T = T.cast(theano.shared(Y_train[0, :]), 'int32')
Y_test_T = T.cast(theano.shared(Y_test[0, :]), 'int32')

lr = .01  # learning rate
batch_size = 28
epochs = 1700
index = T.lscalar('index')
y = T.ivector('y')
X = T.tensor3('X')
csp_w = theano.shared(W)
avg_v = theano.shared(V)
proj_csp = T.tensordot(X, csp_w, axes=[2, 0])
layer0_out = T.pow(proj_csp, 2)
variance = T.tensordot(layer0_out, avg_v, axes=[1, 0])
layer1_out = T.log((variance))[:, :, 0]
layer2 = LogisticRegression(input=layer1_out, n_in=26, n_out=2)
loss = layer2.negative_log_likelihood(y) + .01 * T.sum(T.pow(avg_v, 2))

f = open('params_dnn_al.pkl')
params_model = cPickle.load(f)
csp_w.set_value(params_model[0].get_value())
avg_v.set_value(params_model[1].get_value())
Example #46
0
    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                         the has to contain three pairs, `train`,
                         `valid`, `test` in this order, where each pair
                         is formed of two Theano variables, one for the
                         datapoints, the other for the labels

        :type batch_size: int
        :param batch_size: size of a minibatch

        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches //= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches //= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = [(param, param - gparam * learning_rate)
                   for param, gparam in zip(self.params, gparams)]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='train')

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                test_set_x[index * batch_size:(index + 1) * batch_size],
                self.y: test_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='test')

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='valid')

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        return train_fn, valid_score, test_score
Example #47
0
def test_dA(learning_rate=0.1, training_epochs=15,
            dataset='mnist.pkl.gz',
            batch_size=20, output_folder='dA_plots'):

    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)
    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=28 * 28,
        n_hidden=500
    )

    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=learning_rate
    )

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):            
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The no corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28), tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')

    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=28 * 28,
        n_hidden=500
    )

    cost, updates = da.get_cost_updates(
        corruption_level=0.3,
        learning_rate=learning_rate
    )

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The 30% corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % (training_time / 60.))

    image = Image.fromarray(tile_raster_images(
        X=da.W.get_value(borrow=True).T,
        img_shape=(28, 28), tile_shape=(10, 10),
        tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')

    os.chdir('../')
Example #48
0
def test_mlp_dropout(  p,learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=500,
                 batch_size=20, n_hidden=500, verbose=True, acttest=T.tanh,):


    # load the dataset; download the dataset if it is not present
    f = open("dropout.txt",'w')
    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model',file=f)
    print('... building the model')


    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of # [int] labels
    
    
    training_enabled = T.iscalar('training_enabled') # pseudo boolean for switching between training and prediction

    rng = numpy.random.RandomState(1234)
    
    #TODO: create an object of DropoutHiddenLayer class
    #hiddenlayer_one = ...... 
    hiddenlayer_one=DropoutHiddenLayer(
            rng=rng,
            is_train=training_enabled,
            input=x,
            n_in=32*32*3,
            n_out=n_hidden, 
            p=p
    )
        
    #TODO: create an object of DropoutHiddenLayer class
    #hiddenlayer_two = ......      
    hiddenlayer_two=DropoutHiddenLayer(
            rng=rng,
            is_train=training_enabled,
            input=hiddenlayer_one.output,
            n_in=n_hidden,
            n_out=n_hidden,  
            p=p
    )
    # The logistic regression layer gets as input the hidden units
    # of the hidden layer
    #TODO: create an object of LogisticRegression class
    #logRegressionLayer = ......
    logRegressionLayer = LogisticRegression(
            input=hiddenlayer_two.output,
            n_in=n_hidden,
            n_out=10
    )
    # L1 norm ; one regularization option is to enforce L1 norm to
    # be small
    #TODO: Define the expression for L1
    #L1 = ......
    L1 =  (
        abs(hiddenlayer_one.W).sum() + abs(hiddenlayer_two.W).sum() + 
        abs(logRegressionLayer.W).sum()
    )
    # square of L2 norm ; one regularization option is to enforce
    # square of L2 norm to be small
    #TODO: Define the expression for L2_sqr
    #L2_sqr = ......
    L2_sqr = (
              (hiddenlayer_one.W ** 2).sum()
            + (hiddenlayer_two.W ** 2).sum()
            + (logRegressionLayer.W ** 2).sum()
        )
    # negative log likelihood of the MLP is given by the negative
    # log likelihood of the output of the model, computed in the
    # logistic regression layer
    #TODO: Define the expression for negative_log_likelihood
    #negative_log_likelihood = ......
    negative_log_likelihood = (
            logRegressionLayer.negative_log_likelihood
        )
    
    # same holds for the function computing the number of errors
    #TODO: Define the expression for errors
    #errors = ......
    errors=logRegressionLayer.errors
    # the parameters of the model are the parameters of the two layer it is
    # made out of
    #TODO: Define the expression for params
    #params = ......
    params = hiddenlayer_one.params + hiddenlayer_two.params + logRegressionLayer.params
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically  
    #TODO: Define the expression for cost
    #cost = ......
    cost = (
            negative_log_likelihood(y)
            + L1_reg * L1
            + L2_reg * L2_sqr
        )
    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        },

    )

    validate_model = theano.function(
        inputs=[index],
        outputs=errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        },

    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
    momentum =theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum')
    updates = []
    for param in  params:
        param_update = theano.shared(param.get_value()*numpy.cast[theano.config.floatX](0.))    
        updates.append((param, param - learning_rate*param_update))
        updates.append((param_update, momentum*param_update + (numpy.cast[theano.config.floatX](1.) - momentum)*T.grad(cost, param)))
        
        
    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](1)
        },

    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    print('... training',file=f)

    print('p=%f'%p)
    print('p=%f'%p,file=f)

    # early-stopping parameters
    patience = 20000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                if verbose:
                    print(
                        'epoch %i, minibatch %i/%i, validation error %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100.
                        )
                    )
                    print(
                        'epoch %i, minibatch %i/%i, validation error %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100.
                        ),file=f
                    )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    if verbose:
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.),file=f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.),file=f)
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)),file=f)
Example #49
0
train_loss_nonorm = l6.error(normalisation=False)
train_loss = l6.error()  # but compute and print this!
valid_loss = l6.error(dropout_active=False)
all_parameters = layers.all_parameters(l6)
all_bias_parameters = layers.all_bias_parameters(l6)

xs_shared = [
    theano.shared(np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
    for _ in xrange(num_input_representations)
]
y_shared = theano.shared(np.zeros((1, 1), dtype=theano.config.floatX))

learning_rate = theano.shared(
    np.array(LEARNING_RATE_SCHEDULE[0], dtype=theano.config.floatX))

idx = T.lscalar('idx')

givens = {
    l0.input_var: xs_shared[0][idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE],
    l0_45.input_var: xs_shared[1][idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE],
    l6.target_var: y_shared[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE],
}

# updates = layers.gen_updates(train_loss, all_parameters, learning_rate=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
updates_nonorm = layers.gen_updates_nesterov_momentum_no_bias_decay(
    train_loss_nonorm,
    all_parameters,
    all_bias_parameters,
    learning_rate=learning_rate,
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY)
Example #50
0
def sgd_optimization_mnist(learning_rate=0.13,
                           n_epochs=1000,
                           dataset='mnist.pkl.gz',
                           batch_size=600):
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print '... building the model'

    index = T.lscalar()

    x = T.matrix('x')
    y = T.ivector('y')

    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    cost = classifier.negative_log_likelihood(y)

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print '... training the model'

    patience = 5000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    test_score = 0.
    start_time = timeit.default_timer()

    copy_reg.pickle(types.MethodType, _pickle_method)

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                    with open('best_logReg_model.pkl', 'w') as f:
                        cPickle.dump(classifier,
                                     f,
                                     protocol=cPickle.HIGHEST_PROTOCOL)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Example #51
0
def test_dA_joint(learning_rate=0.01,
                  training_epochs=15000,
                  dataset='mnist.pkl.gz',
                  batch_size=5,
                  output_folder='dA_plots'):
    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    ##datasets = load_data(dataset)
    #from SdA_mapping import load_data_half
    #datasets = load_data_half(dataset)
    print 'loading data'
    datasets, x_mean, y_mean, x_std, y_std = load_vc()
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    print 'loaded data'

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x1 = T.matrix('x1')  # the data is presented as rasterized images
    x2 = T.matrix('x2')  # the data is presented as rasterized images
    cor_reg = T.scalar('cor_reg')
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)
    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    #da = dA_joint(
    #numpy_rng=rng,
    #theano_rng=theano_rng,
    #input1=x1,
    #input2=x2,

    #n_visible1=28 * 28/2,
    #n_visible2=28 * 28/2,

    #n_hidden=500
    #)
    print 'initialize functions'

    da = dA_joint(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input1=x1,
        input2=x2,
        cor_reg=cor_reg,

        #n_visible1=28 * 28/2,
        #n_visible2=28 * 28/2,
        n_visible1=24,
        n_visible2=24,
        n_hidden=50)

    cost, updates = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)
    cor_reg_val = numpy.float32(5.0)
    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x1: train_set_x[index * batch_size:(index + 1) * batch_size],
            x2: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    fprop_x1 = theano.function([],
                               outputs=da.output1,
                               givens={x1: test_set_x},
                               name='fprop_x1')
    fprop_x2 = theano.function([],
                               outputs=da.output2,
                               givens={x2: test_set_y},
                               name='fprop_x2')
    fprop_x1t = theano.function([],
                                outputs=da.output1,
                                givens={x1: train_set_x},
                                name='fprop_x1')
    fprop_x2t = theano.function([],
                                outputs=da.output2,
                                givens={x2: train_set_y},
                                name='fprop_x2')
    rec_x1 = theano.function([],
                             outputs=da.rec1,
                             givens={x1: test_set_x},
                             name='rec_x1')
    rec_x2 = theano.function([],
                             outputs=da.rec2,
                             givens={x2: test_set_y},
                             name='rec_x2')
    fprop_x1_to_x2 = theano.function([],
                                     outputs=da.reg,
                                     givens={x1: test_set_x},
                                     name='fprop_x12x2')
    updates_reg = [(da.cor_reg, da.cor_reg + theano.shared(numpy.float32(0.1)))
                   ]
    update_reg = theano.function([], updates=updates_reg)
    print 'initialize functions ended'

    start_time = time.clock()

    ############
    # TRAINING #
    ############
    print 'training started'
    X1 = test_set_x.eval()
    X1 *= x_std
    X1 += x_mean
    X2 = test_set_y.eval()
    X2 *= y_std
    X2 += y_mean
    from dcca_numpy import cor_cost
    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        #cor_reg_val += 1
        #da.cor_reg = theano.shared(cor_reg_val)
        update_reg()

        X1H = rec_x1()
        X2H = rec_x2()
        X1H *= x_std
        X1H += x_mean
        X2H *= y_std
        X2H += y_mean
        H1 = fprop_x1()
        H2 = fprop_x2()
        print 'Training epoch'
        print 'Reconstruction ', numpy.mean(numpy.mean((X1H-X1)**2,1)),\
              numpy.mean(numpy.mean((X2H-X2)**2,1))

        if epoch % 5 == 2:  # pretrain middle layer
            print '... pre-training MIDDLE layer'
            H1t = fprop_x1t()
            H2t = fprop_x2t()
            h1 = T.matrix('x')  # the data is presented as rasterized images
            h2 = T.matrix('y')  # the labels are presented as 1D vector of
            from mlp import HiddenLayer
            numpy_rng = numpy.random.RandomState(89677)
            log_reg = HiddenLayer(numpy_rng, h1, 50, 50, activation=T.tanh)

            if 1:  # for middle layer
                learning_rate = 0.1

                #H1=theano.shared(H1)
                #H2=theano.shared(H2)
                # compute the gradients with respect to the model parameters
                logreg_cost = log_reg.mse(h2)

                gparams = T.grad(logreg_cost, log_reg.params)

                # compute list of fine-tuning updates
                updates = [(param, param - gparam * learning_rate)
                           for param, gparam in zip(log_reg.params, gparams)]

                train_fn_middle = theano.function(inputs=[],
                                                  outputs=logreg_cost,
                                                  updates=updates,
                                                  givens={
                                                      h1: theano.shared(H1t),
                                                      h2: theano.shared(H2t)
                                                  },
                                                  name='train_middle')
            epoch = 0
            while epoch < 100:
                print epoch, train_fn_middle()
                epoch += 1

            ##X2H=fprop_x1_to_x2()
            X2H = numpy.tanh(H1.dot(log_reg.W.eval()) + log_reg.b.eval())
            X2H = numpy.tanh(X2H.dot(da.W2_prime.eval()) + da.b2_prime.eval())

            X2H *= y_std
            X2H += y_mean
            print 'Regression ', numpy.mean(numpy.mean((X2H - X2)**2, 1))

        print 'Correlation ', cor_cost(H1, H2)
    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The no corruption code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((training_time) / 60.))
    image = Image.fromarray(
        tile_raster_images(X=da.W1.get_value(borrow=True).T,
                           img_shape=(28, 14),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')

    from matplotlib import pyplot as pp
    pp.plot(H1[:10, :2], 'b')
    pp.plot(H2[:10, :2], 'r')
    pp.show()

    print cor
Example #52
0
def run_mnl():
    """ Discrete choice model estimation with Theano

    Setup
    -----
    step 1: Load variables from csv file
    step 2: Define hyperparameters used in the computation
    step 3: define symbolic Theano tensors
    step 4: build model and define cost function
    step 5: define gradient calculation algorithm
    step 6: define Theano symbolic functions
    step 7: run main estimaiton loop for n iterations
    step 8: perform analytics and model statistics

    """
    # compile and import dataset from csv#
    d_x_ng, d_x_g, d_y, avail, d_ind = extractdata(csvString)
    data_x_ng = shared(np.asarray(d_x_ng, dtype=floatX), borrow=True)
    data_x_g = shared(np.asarray(d_x_g, dtype=floatX), borrow=True)
    data_y = T.cast(shared(np.asarray(d_y - 1, dtype=floatX), borrow=True),
                    'int32')
    data_av = shared(np.asarray(avail, dtype=floatX), borrow=True)

    sz_n = d_x_g.shape[0]  # number of samples
    sz_k = d_x_g.shape[1]  # number of generic variables
    sz_m = d_x_ng.shape[2]  # number of non-generic variables
    sz_i = d_x_ng.shape[1]  # number of alternatives

    sz_minibatch = sz_n  # model hyperparameters
    learning_rate = 0.3
    momentum = 0.9

    x_ng = T.tensor3('data_x_ng')  # symbolic theano tensors
    x_g = T.matrix('data_x_g')
    y = T.ivector('data_y')
    av = T.matrix('data_av')

    index = T.lscalar('index')

    # construct model
    model = Logistic(sz_i,
                     av,
                     input=[x_ng, x_g],
                     n_in=[(sz_m, ), (sz_k, sz_i)])

    cost = -model.loglikelihood(y)

    # calculate the gradients wrt to the loss function
    grads = T.grad(cost=cost, wrt=model.params)
    opt = optimizers.adadelta(model.params, model.masks, momentum)

    updates = opt.updates(model.params, grads, learning_rate)

    # hessian function
    fn_hessian = function(inputs=[],
                          outputs=T.hessian(cost=cost, wrt=model.params),
                          givens={
                              x_ng: data_x_ng,
                              x_g: data_x_g,
                              y: data_y,
                              av: data_av
                          },
                          on_unused_input='ignore')

    # null loglikelihood function
    fn_null = function(inputs=[],
                       outputs=model.loglikelihood(y),
                       givens={
                           x_ng: data_x_ng,
                           x_g: data_x_g,
                           y: data_y,
                           av: data_av
                       },
                       on_unused_input='ignore')

    # compile the theano functions
    fn_estimate = function(
        name='estimate',
        inputs=[index],
        outputs=[model.loglikelihood(y),
                 model.errors(y)],
        updates=updates,
        givens={
            x_ng:
            data_x_ng[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                  sz_n))],
            x_g:
            data_x_g[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                 sz_n))],
            y:
            data_y[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                               sz_n))],
            av:
            data_av[index * sz_minibatch:T.min(((index + 1) * sz_minibatch,
                                                sz_n))]
        },
        allow_input_downcast=True,
        on_unused_input='ignore',
    )
    """ Main estimation process loop """
    print('Begin estimation...')

    epoch = 0  # process loop parameters
    sz_epoches = 9999
    sz_batches = np.ceil(sz_n / sz_minibatch).astype(np.int32)
    done_looping = False
    patience = 300
    patience_inc = 10
    best_loglikelihood = -np.inf
    null_Loglikelihood = fn_null()
    start_time = timeit.default_timer()

    while epoch < sz_epoches and done_looping is False:
        epoch_error = []
        epoch_loglikelihood = []
        for i in range(sz_batches):
            (batch_loglikelihood, batch_error) = fn_estimate(i)
            epoch_error.append(batch_error)
            epoch_loglikelihood.append(batch_loglikelihood)

        this_loglikelihood = np.sum(epoch_loglikelihood)
        print('@ iteration %d loglikelihood: %.3f' %
              (epoch, this_loglikelihood))

        if this_loglikelihood > best_loglikelihood:
            if this_loglikelihood > 0.997 * best_loglikelihood:
                patience += patience_inc
            best_loglikelihood = this_loglikelihood
            with open('best_model.pkl', 'wb') as f:
                pickle.dump(model, f)

        if epoch > patience:
            done_looping = True

        epoch += 1

    final_Loglikelihood = best_loglikelihood
    rho_square = 1. - (final_Loglikelihood / null_Loglikelihood)
    end_time = timeit.default_timer()
    """ Analytics and model statistics """
    print('... solving Hessians')
    h = np.hstack([np.diagonal(mat) for mat in fn_hessian()])
    n_est_params = np.count_nonzero(h)
    aic = 2 * n_est_params - 2 * final_Loglikelihood
    bic = np.log(sz_n) * n_est_params - 2 * final_Loglikelihood

    print('@iteration %d, run time %.3f ' % (epoch, end_time - start_time))
    print('Null Loglikelihood: %.3f' % null_Loglikelihood)
    print('Final Loglikelihood: %.3f' % final_Loglikelihood)
    print('rho square %.3f' % rho_square)
    print('AIC %.3f' % aic)
    print('BIC %.3f' % bic)

    with open('best_model.pkl', 'rb') as f:
        best_model = pickle.load(f)

    run_analytics(best_model, h)
Example #53
0
batch_size = 100

datasets = ds.load_mnist("../data/mnist.pkl.gz")
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]

n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

print "[MESSAGE] The data is loaded"

X = T.matrix("data")
y = T.ivector("label")
idx = T.lscalar()
dropout_rate = T.fscalar()

layer_0 = ReLULayer(in_dim=784, out_dim=500)
layer_1 = ReLULayer(in_dim=500, out_dim=200)
layer_2 = SoftmaxLayer(in_dim=200, out_dim=10)

dropout = multi_dropout([(batch_size, 784), (batch_size, 500),
                         (batch_size, 200)], dropout_rate)

model = FeedForward(layers=[layer_0, layer_1, layer_2], dropout=dropout)
model_test = FeedForward(layers=[layer_0, layer_1, layer_2])
#model=FeedForward(layers=[layer_0, layer_1, layer_2]);

out = model.fprop(X)
out_test = model_test.fprop(X)
Example #54
0
 def make_node(self, *args):
     # HERE `args` must be THEANO VARIABLES
     return gof.Apply(op=self, inputs=args, outputs=[tensor.lscalar()])
Example #55
0
def test_mlp(datasets, learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             batch_size=20, n_hidden=500):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

   """

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]

    # compute number of minibatches for training, validation
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size + 1
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size + 1

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    dim = train_set_x.get_value(borrow=True).shape[1]
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=dim,
        n_hidden=n_hidden,
        n_out=2
    )

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.y_pred,
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size]
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
    # same length, zip generates a list C of same size, where each element
    # is a pair formed from the two lists :
    # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    best_fscore = 0
    start_time = time.clock()

    epoch = 0
    while epoch < n_epochs:
        epoch += 1
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)

        # compute f-score on validation set
        y_preds = [validate_model(i) for i in range(n_valid_batches)]
        y_pred = [pij for pi in y_preds for pij in pi]
        y_real = valid_set_y.get_value(borrow=True)
        fscore = f_score(y_real, y_pred)
        print('epoch {0:d}, fscore {1:f} %'.format(epoch, fscore * 100.))

        # if we got the best validation score until now
        if fscore > best_fscore:
            best_fscore = fscore
            print('-----Best score: {0:f}-----'.format(best_fscore))

    end_time = time.clock()
    print('Optimization complete with best validation score of {0:.1f} %,'
          .format(best_fscore * 100.))
    print('The code for file ' +
          os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #56
0
# predictions path
predictions_dir = utils.get_dir_path('model-predictions',
                                     pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
utils.auto_make_dir(outputs_path)

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % config_name)
sys.stderr = sys.stdout

# builds model and sets its parameters
model = config().build_model()

x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape))
idx_z = T.lscalar('idx_z')
idx_y = T.lscalar('idx_y')
idx_x = T.lscalar('idx_x')

window_size = config().window_size
stride = config().stride
n_windows = config().n_windows

givens = {}
givens[model.l_in.input_var] = x_shared

get_predictions_patch = theano.function([],
                                        nn.layers.get_output(
                                            model.l_out, deterministic=True),
                                        givens=givens,
                                        on_unused_input='ignore')
Example #57
0
    #building on top of logistic regression
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    #cost function with L2 Regularization of lambda = 0.01
    cost = (classifier.negative_log_likelihood(y) + L2_reg * classifier.L2_sqr)

    #taking the cost function to evaluate gradient
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)
    #updating the weights and errors vector
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # creating a training function that computes the cost and updates the parameter of the model based on the rules
    index = T.lscalar()

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
Example #58
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden=500):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # calcul du nombre de minibatch : training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ##########
    # MODELE #
    ##########
    print '... building the model'

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    rng = numpy.random.RandomState(1234)

    # instance du mlp
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10)
    # fonction de perte
    cost = classifier.negative_log_likelihood(y) \
         + L1_reg * classifier.L1 \
         + L2_reg * classifier.L2_sqr

    # Fonction calculant les erreurs que le modele fait sur un minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # gradient de la perte par rapport a theta et update des parametres
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)
    updates = []
    for param, gparam in zip(classifier.params, gparams):
        updates.append((param, param - learning_rate * gparam))

    # fonction train_model qui retourne la perte ET update les parametres
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # parametres de fin de boucle
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995  # seul une amelioration> threshold est consideree comme significative
    validation_frequency = min(n_train_batches, patience / 2)

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # nombre iteration
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # calcul zero-one loss sur un validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # si on a le meilleur score jusqu a present
                if this_validation_loss < best_validation_loss:
                    #augmente patience si loss improvement est significatif
                    if this_validation_loss < best_validation_loss *  \
                           improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test sur le test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_conv_net(use_test,
                   perf_or_predict,
                   datasets,
                   U,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 3],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25,
                   batch_size=50,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0]) - 1
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    test_real_size = datasets[1].shape[0]
    test_vote_array = np.zeros((datasets[1].shape[0], 10))
    for filter_h in filter_hs:

        filter_shapes.append((feature_maps, 1, filter_h, filter_w))

        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch)]
    print parameters

    #define model architecture
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)

    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ])

    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], 1, x.shape[1], Words.shape[1]))
    conv_layers = []
    layer1_inputs = []

    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    #define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        #if word vectors are allowed to change, add them as model parameters
        params += [Words]

    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)

    #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate
    #extra data (at random)
    np.random.seed()

    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = np.random.permutation(datasets[0])
        extra_data = train_set[:extra_data_num]
        new_data = np.append(datasets[0], extra_data, axis=0)
    else:
        new_data = datasets[0]

    if use_test == 1 and datasets[1].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[1].shape[0] % batch_size
        extra_data = datasets[1][:extra_data_num]
        datasets[1] = np.append(datasets[1], extra_data, axis=0)

    new_data = np.random.permutation(new_data)

    n_batches = new_data.shape[0] / batch_size

    n_train_batches = int(np.round(n_batches * 0.9))

    if use_test == 1:
        n_test_batches = int(np.round(datasets[1].shape[0] / batch_size))
    #divide train set into train/val sets
    test_set_x_4check = datasets[1][:, :img_h]

    test_set_y_4check = np.asarray(datasets[1][:, -1], "int32")
    train_set = new_data[:n_train_batches * batch_size, :]
    val_set = new_data[n_train_batches * batch_size:, :]

    train_set_x, train_set_y = shared_dataset(
        (train_set[:, :img_h], train_set[:, -1]))
    val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1]))
    test_set_x, test_set_y = shared_dataset(
        (datasets[1][:, :img_h], datasets[1][:, -1]))
    n_val_batches = n_batches - n_train_batches
    val_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size]
        })
    #compile theano functions to get train/val/test errors
    test_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    train_model = theano.function(
        [index],
        cost,
        updates=grad_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    get_test_label = theano.function(
        [index],
        classifier.testlabel(),
        givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]})

    test_pred_layers = []
    if use_test == 1:
        test_size = batch_size
    else:
        test_size = datasets[1].shape[0]
    test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (test_size, 1, img_h, Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function([x, y], test_error)
    get_test_result = theano.function([x], test_y_pred)
    #start training over mini-batches
    print '... training'
    epoch = 0
    best_test_perf = 0
    final_test_perf = 0
    predict_vector = []
    val_perf = 0
    test_perf = 0
    cost_epoch = 0
    while (epoch < n_epochs):
        epoch = epoch + 1
        if shuffle_batch:

            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)

                set_zero(zero_vec)
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in xrange(n_train_batches)]
        train_perf = 1 - np.mean(train_losses)
        val_losses = [val_model(i) for i in xrange(n_val_batches)]
        val_perf = 1 - np.mean(val_losses)
        print('epoch %i, train perf %f %%, val perf %f' %
              (epoch, train_perf * 100., val_perf * 100.))
        if epoch >= 6 and epoch % 2 == 0:
            if use_test == 1:
                test_result = []
                for minibatch_index in xrange(n_test_batches):
                    test_result_tmp = get_test_label(minibatch_index)
                    test_result_tmp = np.array(test_result_tmp)
                    test_result.append(test_result_tmp)
                test_result = np.array(test_result)
                test_result = test_result.reshape(
                    (n_test_batches * batch_size, 1))
                for i in range(test_real_size):
                    test_vote_array[i][test_result[i]] += 1
                sum_4_test = 0
                for i in range(test_real_size):
                    if test_result[i] == test_set_y_4check[i]:
                        sum_4_test += 1
                test_perf = float(sum_4_test) / test_real_size
                if test_perf > best_test_perf:
                    best_test_perf = test_perf
                print("test_perf: " + str(test_perf))
            if use_test == 0:
                test_result = get_test_result(test_set_x_4check)
                test_result = np.array(test_result)
                for i in range(test_real_size):
                    test_vote_array[i][test_result[i]] += 1
                sum_4_test = 0
                for i in range(test_real_size):
                    if test_result[i] == test_set_y_4check[i]:
                        sum_4_test += 1
                test_perf = float(sum_4_test) / test_real_size
                if test_perf > best_test_perf:
                    best_test_perf = test_perf
                print("test_perf: " + str(test_perf))

    if epoch == n_epochs:
        if perf_or_predict == 0:
            final_test_perf = vote_for_answer(test_vote_array,
                                              test_set_y_4check,
                                              perf_or_predict)
            return final_test_perf
        if perf_or_predict == 1:
            predict_vector = vote_for_answer(test_vote_array,
                                             test_set_y_4check,
                                             perf_or_predict)
            return predict_vector
def train_FSRCNN(train_set_x,train_set_y,valid_set_x,valid_set_y,test_set_x,test_set_y,
    n_train_batches, n_valid_batches, n_test_batches, n_epochs, batch_size,lr,upsampling_factor=4):
    #Assume x to be shape (batch_size,3,33,33)
    x = T.matrix('x')
    y = T.matrix('y')

    theano.config.optimizer = 'fast_compile'
    #print "theano optimizer: " + str(theano.config.optimizer)

    rng = np.random.RandomState(11111)
    index = T.lscalar() 

    reshaped_input = x.reshape((batch_size,3,8,8))
    reshaped_gt = y.reshape((batch_size,3,33,33))

    learning_rate = theano.shared(np.cast[theano.config.floatX](lr))

    #Upsampling layer now done in preprocessing to save compute
    #upsampled_input = T.nnet.abstract_conv.bilinear_upsampling(reshaped_input,upsampling_factor,batch_size=batch_size,num_input_channels=3)
    # r_fun = theano.function([index],upsampled_input.shape,givens = {
    #         x: train_set_x[index * batch_size: (index + 1) * batch_size]
    #         })
    # theano.printing.debugprint(r_fun(0))
    
    #Filter params
    f1 = 9
    f2 = 5
    f3 = 10
    input_image_size = 8
    output_len = input_image_size + f3 -1
    #output_len = 16
    #Conv for Patch extraction
    #print('batch size', batch_size)
    conv1 = Conv_Layer_ReLU(rng, reshaped_input, image_shape=(batch_size,3,input_image_size,input_image_size),filter_shape = (64,3,f1,f1));
    conv1_len = input_image_size 
    #Conv for Non linear mapping
    #print('conv1 done....')
    conv2 = Conv_Layer_ReLU(rng, conv1.output, image_shape=(batch_size,64,conv1_len,conv1_len),filter_shape = (32,64,f2,f2))
    conv2_len = conv1_len
    #Conv for Reconstruction
    #conv2_output =  conv2.output.repeat(2,2)
    #conv2_output =  conv2_output.repeat(2,3)
    #conv3 = Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size,32,conv2_len*2,conv2_len*2),filter_shape = (3,32,f3,f3))
    #model_output = conv3.output
    
    conv3 = De_Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size,32,conv2_len,conv2_len),filter_shape = (3,32,f3,f3))
    model_output = conv3.output
    
    #print(model_output.shape)
    #grab center pixels
    #print('output len...', output_len)
    center_start = (33 - output_len) / 2
    center_end = 33 - center_start
    sub_y = reshaped_gt[:,:,center_start:center_end,center_start:center_end]
    #sub_y = reshaped_gt
    #MSE between center pixels of prediction and ground truth
    cost = T.mean((sub_y-model_output) ** 2)
    cost2 = 1.0/batch_size * T.sum((sub_y - model_output) ** 2)
    #PSNR of a patch is based on color space
    MSE_per_pixel = cost2/(output_len*output_len*3)
    psnr = 20 * T.log10(255) - 10 * T.log10(MSE_per_pixel)
    reconstucted_imgs = model_output

    #Perchannel cost iok
    # costs = []
    # for d in sub_y.shape[0]:
    #     channel_cost = cost = 1.0/batch_size * T.sum((sub_y[d,:,:]-model_output[d,:,:]) ** 2)
    #     costs.append(channel_cost)

    params = conv3.params + conv2.params + conv1.params

    # #ADAM opt
    beta1 =theano.shared(np.cast[theano.config.floatX](0.9), name='beta1')
    beta2 =theano.shared(np.cast[theano.config.floatX](0.999), name='beta2')
    eps =theano.shared(np.cast[theano.config.floatX](1e-8), name='eps')

    updates = []
    for param in params:
        m = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.))    
        v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.))    
        new_m = beta1 * m + (np.cast[theano.config.floatX](1.) - beta1) * T.grad(cost, param)
        new_v = beta2 * v + (np.cast[theano.config.floatX](1.) - beta2) * T.sqr(T.grad(cost, param))
        updates.append((m, new_m))
        updates.append((v, new_v))
        updates.append((param, param - learning_rate*new_m/(T.sqrt(new_v) + eps)))

    #RMSProp
    # updates = []

    # for param in params:
    #     cache = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.))    
    #     rms_decay = np.cast[theano.config.floatX](0.999)
    #     eps =theano.shared(np.cast[theano.config.floatX](1e-8)) 
    #     clip_grad = T.grad(cost,param)
 
    #     # if T.ge(1.0,clip_grad):
    #     #     clip_grad = np.cast[theano.config.floatX](1.0)
    #     # if T.le(-1,clip_grad):
    #     #     clip_grad = np.cast[theano.config.floatX](-1.0)
    #     new_cache = rms_decay * cache + (np.cast[theano.config.floatX](1.0) - rms_decay) * clip_grad**2
    #     updates.append((cache, new_cache))
    #     updates.append((param,param - learning_rate * clip_grad/(T.sqrt(new_cache) + eps)))

    #nesterov momentum
    # updates = []
    # mu = np.cast[theano.config.floatX](.9)
    # for param in params:
    #     v_prev = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.))    
    #     v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.))   
    #     clip_grad = T.grad(cost,param)
 
    #     if T.ge(np.cast[theano.config.floatX](1.0),clip_grad):
    #         clip_grad = np.cast[theano.config.floatX](1.0)
    #     if T.le(np.cast[theano.config.floatX](-1.0),clip_grad):
    #         clip_grad = np.cast[theano.config.floatX](-1.0)
    #     new_v_prev = v
    #     new_v = mu * v - learning_rate * clip_grad

    #     updates.append((v_prev, new_v_prev))
    #     updates.append((v, new_v))
    #     updates.append((param,param - mu * new_v_prev + (np.cast[theano.config.floatX](1.0) + mu) * new_v))


    #SGD
    # clip_thresh = 1.0
    # for param in params:
    #     clip_grad = T.grad(cost,param)
    #     if T.ge(clip_thresh,clip_grad):
    #         clip_grad = np.cast[theano.config.floatX](clip_thresh)
    #     if T.le(-clip_thresh,clip_grad):
    #         clip_grad = np.cast[theano.config.floatX](-clip_thresh)
    #     updates = [
    #         (param, param - learning_rate * clip_grad)
    #     ]


    
    #Theano function complilation
    #if neccessary, could load here
    test_model = theano.function(
        [index],
        [cost,MSE_per_pixel,psnr,reconstucted_imgs],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        [cost,MSE_per_pixel,psnr,reconstucted_imgs],
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    train_model = theano.function(
        [index],
        [cost,MSE_per_pixel,psnr],
        updates=updates,
        givens={
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        })

    decay_learning_rate_function = theano.function([],learning_rate,updates = [(learning_rate,learning_rate * .995)])

    train_nn(train_model, validate_model, test_model,
            n_train_batches, n_valid_batches, n_test_batches, n_epochs,output_len,decay_learning_rate_function,
            verbose = True)

    return validate_model,test_model