Esempio n. 1
0
def load_cls_for_simclr(cfg):
    simCLR,_,_ = load_simclr(cfg)
    logit = LogisticRegression(simCLR,cfg.cls.dataset.n_classes)
    if cfg.cls.load:
        model_fp = os.path.join(
            cfg.cls.model_path, "checkpoint_{}.tar".format(cfg.cls.epoch_num)
        )
        logit.load_state_dict(torch.load(model_fp, map_location=cfg.cls.device.type))
    cfg_adam = cfg.cls.optim.adam
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg_adam.lr)  # TODO: LARS
    scheduler = None
    return logit,optimizer,scheduler
Esempio n. 2
0
 def __init__(self,K,node_num, nfeat, nhid, nclass, sampleSize, dropout,trainAttention):
     super(GAT, self).__init__()
     self.gc1 = GraphConvolution(K, node_num, nfeat, nhid, sampleSize[1],'False','True',trainAttention)
     self.gc2 = GraphConvolution(1, node_num, K*nhid, 14*nclass, sampleSize[0],'False','False',trainAttention)
     #self.gc3 = GraphConvolution(1, node_num, 4*7*nclass, 7*nclass, 'False','False')
     self.gc6 = LogisticRegression(14*nclass,1)
     self.dropout = dropout
Esempio n. 3
0
 def __init__(self, numpy_rng, theano_rng=None, y=None, 
              alpha=0.9, sample_rate=0.1, n_ins=784,
              hidden_layers_sizes=[500, 500], n_outs=10,
              corruption_levels=[0.1, 0.1],
              allX=None,allY=None,srng=None):
     self.sigmoid_layers = []
     self.sugar_layers = []
     self.params = []
     self.n_layers = len(hidden_layers_sizes)
     self.allXs = []
     if y == None:
         self.y = tensor.ivector(name='y')
     else:
         self.y = y
     assert self.n_layers > 0
     if not theano_rng:
         theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
     self.x = tensor.matrix('x')  
     self.x = tensor.matrix('x')  
     self.y = tensor.ivector('y')  
     self.y = tensor.ivector('y')  
     for i in xrange(self.n_layers):
         if i == 0:
             input_size = n_ins
         else:
             input_size = hidden_layers_sizes[i - 1]
         if i == 0:
             layer_input = self.x
         else:
             layer_input = self.sigmoid_layers[-1].output
         if i == 0:
             self.allXs.append(allX)
         else:
             self.allXs.append(tensor.dot(self.allXs[i-1], self.sigmoid_layers[-1].W) + self.sigmoid_layers[-1].b)
         sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                     input=layer_input,
                                     n_in=input_size,
                                     n_out=hidden_layers_sizes[i],
                                     activation=tensor.nnet.sigmoid)
         self.sigmoid_layers.append(sigmoid_layer)
         self.params.extend(sigmoid_layer.params)
         sugar_layer = sugar(numpy_rng=numpy_rng,
                             alpha=alpha,
                             sample_rate=sample_rate,
                             x=layer_input,
                             y=self.y,
                             n_visible=input_size,
                             n_hidden=hidden_layers_sizes[i],
                             W=sigmoid_layer.W,
                             bhid=sigmoid_layer.b,
                             allX=self.allXs[i],
                             allY=allY,
                             srng=srng)
         self.sugar_layers.append(sugar_layer)
     self.logLayer = LogisticRegression(
                      input=self.sigmoid_layers[-1].output,
                      n_in=hidden_layers_sizes[-1], n_out=n_outs)
     self.params.extend(self.logLayer.params)
     self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
     self.errors = self.logLayer.errors(self.y)
Esempio n. 4
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        batch_size=params["batch_size"]
        n_output=params['n_output']
        corruption_level=params["corruption_level"]

        X = T.matrix(name="input",dtype=dtype) # batch of sequence of vector
        Y = T.matrix(name="output",dtype=dtype) # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
        bin_noise=rng.binomial(size=(batch_size,n_output/3,1), n=1,p=1 - corruption_level,dtype=theano.config.floatX)
        #bin_noise_3d= T.reshape(T.concatenate((bin_noise, bin_noise,bin_noise),axis=1),(batch_size,n_output/3,3))
        bin_noise_3d= T.concatenate((bin_noise, bin_noise,bin_noise),axis=2)

        noise= rng.normal(size=(batch_size,n_output), std=0.03, avg=0.0,dtype=theano.config.floatX)
        noise_bin=T.reshape(noise,(batch_size,n_output/3,3))*bin_noise_3d
        X_train=T.reshape(noise_bin,(batch_size,n_output))+X

        X_tilde= T.switch(T.neq(is_train, 0), X_train, X)

        W_1_e =u.init_weight(shape=(n_output,1024),rng=rng,name="w_hid",sample="glorot")
        b_1_e=u.init_bias(1024,rng)

        W_2_e =u.init_weight(shape=(1024,2048),rng=rng,name="w_hid",sample="glorot")
        b_2_e=u.init_bias(2048,rng)

        W_2_d = W_2_e.T
        b_2_d=u.init_bias(1024,rng)

        W_1_d = W_1_e.T
        b_1_d=u.init_bias(n_output,rng)

        h_1_e=HiddenLayer(rng,X_tilde,0,0, W=W_1_e,b=b_1_e,activation=nn.relu)
        h_2_e=HiddenLayer(rng,h_1_e.output,0,0, W=W_2_e,b=b_2_e,activation=nn.relu)
        h_2_d=HiddenLayer(rng,h_2_e.output,0,0, W=W_2_d,b=b_2_d,activation=u.do_nothing)
        h_1_d=LogisticRegression(rng,h_2_d.output,0,0, W=W_1_d,b=b_1_d)

        self.output = h_1_d.y_pred

        self.params =h_1_e.params+h_2_e.params
        self.params.append(b_2_d)
        self.params.append(b_1_d)

        cost=get_err_fn(self,cost_function,Y)
        L2_reg=0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param[0] ** 2)+T.sum(param[1] ** 2))

        cost += L2_reg*L2_sqr

        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train], outputs = self.output,allow_input_downcast=True)
        self.mid_layer = theano.function(inputs = [X,is_train], outputs = h_2_e.output,allow_input_downcast=True)
        self.n_param=count_params(self.params)
Esempio n. 5
0
    def __init__(self, nkerns=[48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 65
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # Fully connected sigmoidal layer, goes from
        # X  -> 200
        #--------------------------------------------------
        layer1_input = layer0.output.flatten(2)
        layer1 = HiddenLayer(rng,
                             input=layer1_input,
                             n_in=nkerns[0] * os0 * os0,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 2
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer2 = LogisticRegression(input=layer1.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2)
Esempio n. 6
0
def fit_logistic(image_size=(28, 28),
             datasets='../data/mnist.pkl.gz', outpath='../output/mnist_logistic_regression.params',
             learning_rate=0.13, n_epochs=1000, batch_size=600,
             patience=5000, patience_increase=2, improvement_threshold=0.995):

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')


    classifier = LogisticRegression(
        input=x,
        n_in=reduce(np.multiply, image_size),
        n_out=10
    )
    cost = classifier.negative_log_likelihood(y)
    learner = SupervisedMSGD(
        index,
        x,
        y,
        batch_size,
        learning_rate,
        load_data(datasets),
        outpath,
        classifier,
        cost
    )

    best_validation_loss, best_iter, epoch, elapsed_time = learner.fit(
        n_epochs=n_epochs,
        patience=patience,
        patience_increase=patience_increase,
        improvement_threshold=improvement_threshold
    )
    display_results(best_validation_loss, elapsed_time, epoch)

    return learner
Esempio n. 7
0
def cifar_fast_net(batch_size=128,n_epochs=300,test_frequency=13, learning_rate=0.001):

    rng1 = numpy.random.RandomState(23455)
    rng2 = numpy.random.RandomState(12423)
    rng3 = numpy.random.RandomState(23245)
    rng4 = numpy.random.RandomState(12123)
    rng5 = numpy.random.RandomState(25365)
    rng6 = numpy.random.RandomState(15323)
    train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4'])
    valid_set_x, valid_set_y = load_cifar_data(['data_batch_5'],WHICHSET='valid')
    test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test')

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    index = T.lscalar()

    x = T.matrix('x')
    y = T.ivector('y')

    img_input = x.reshape((batch_size,3,32,32))
    img_input = img_input.dimshuffle(1,2,3,0)
####define the layers:
    conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input,
                                    filter_shape=(3,5,5,32),
                                    image_shape=(3,32,32,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.0001,initB=0,partial_sum=4,
                                    pooling='max',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004
                                    )

    conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool1.output,
                                    filter_shape=(32,5,5,32),
                                    image_shape=(32,16,16,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)
    conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool2.output,
                                    filter_shape=(32,5,5,64),
                                    image_shape=(32,8,8,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)

    layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2)
    #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0)
    fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0,
                        epsW=0.001,
                        epsB=0.002,
                        momW=0.9,
                        momB=0.9,
                        wc=0.03)
    fc_10 = LogisticRegression(input=fc_64.output,rng=rng5,n_in=64,n_out=10,initW=0.1,
                               epsW=0.001,
                                epsB=0.002,
                                momW=0.9,
                                momB=0.9,
                                wc=0.03)
####build the models:
    cost = fc_10.negative_log_likelihood(y)
    test_model = theano.function([index], fc_10.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], fc_10.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    Ws = [conv_pool1.W, conv_pool2.W, conv_pool3.W, fc_64.W, fc_10.W]
    pgradWs = [conv_pool1.grad_W, conv_pool2.grad_W, conv_pool3.grad_W, fc_64.grad_W, fc_10.grad_W]

    bs = [conv_pool1.b, conv_pool2.b, conv_pool3.b, fc_64.b, fc_10.b]
    pgradbs = [conv_pool1.grad_b, conv_pool2.grad_b, conv_pool3.grad_b, fc_64.grad_b, fc_10.grad_b]

    momWs = [conv_pool1.momW, conv_pool2.momW, conv_pool3.momW, fc_64.momW, fc_10.momW]
    momBs = [conv_pool1.momB, conv_pool2.momB, conv_pool3.momB, fc_64.momB, fc_10.momB]
    wcs = [conv_pool1.wc, conv_pool2.wc, conv_pool3.wc, fc_64.wc, fc_10.wc]
    epsWs = [conv_pool1.epsW, conv_pool2.epsW, conv_pool3.epsW, fc_64.epsW, fc_10.epsW]
    epsBs = [conv_pool1.epsB, conv_pool2.epsB, conv_pool3.epsB, fc_64.epsB, fc_10.epsB]

    gradWs = T.grad(cost, Ws)
    gradbs = T.grad(cost, bs)
    updates = []
    for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
        grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
        updates.append((W_i, W_i+grad_i))
        updates.append((pgW_i,grad_i))

    for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
        grad_i = - epsB_i*gradb_i + momB_i*pgB_i
        updates.append((b_i, b_i+grad_i))
        updates.append((pgB_i,grad_i))







    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        #below is the code for reduce learning_rate
        ###########################################

        if epoch == 50:
            epsWs = [k/10.0 for k in epsWs]
            epsBs = [k/10.0 for k in epsBs]
            print 'reduce eps by a factor of 10'
            updates = []
            for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
                grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
                updates.append((W_i, W_i+grad_i))
                updates.append((pgW_i,grad_i))

            for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
                grad_i = - epsB_i*gradb_i + momB_i*pgB_i
                updates.append((b_i, b_i+grad_i))
                updates.append((pgB_i,grad_i))
            train_model = theano.function([index], cost, updates=updates,
              givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]})

        ##############################################
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    conv_pool1.bestW = conv_pool1.W.get_value().copy()
                    conv_pool1.bestB = conv_pool1.b.get_value().copy()
                    conv_pool2.bestW = conv_pool2.W.get_value().copy()
                    conv_pool2.bestB = conv_pool2.b.get_value().copy()
                    conv_pool3.bestW = conv_pool3.W.get_value().copy()
                    conv_pool3.bestB = conv_pool3.b.get_value().copy()
                    fc_64.bestW = fc_64.W.get_value().copy()
                    fc_64.bestB = fc_64.b.get_value().copy()
                    fc_10.bestW = fc_10.W.get_value().copy()
                    fc_10.bestB = fc_10.b.get_value().copy()

                    ##saving current best
                    print 'saving current best params..'
                    current_params = (conv_pool1.bestW,conv_pool1.bestB,conv_pool2.bestW,
                    conv_pool2.bestB,conv_pool3.bestW,conv_pool3.bestB,fc_64.bestW,fc_64.bestB,
                    fc_10.bestW,fc_10.bestB,momWs,momBs,epsWs,epsBs,wcs)
                    outfile = file('current_best_params.pkl','wb')
                    cPickle.dump(current_params,outfile)
                    outfile.close()


                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Esempio n. 8
0
                                     poolsize=(1, kmaxEntities))
    layers.append(cnnEntities)
    hidden_in = 2 * (2 * nkernsContext * kmaxContext +
                     nkernsEntities * kmaxEntities)
hiddenLayer = HiddenLayer(rng=rng, n_in=hidden_in, n_out=hiddenUnits)
layers.append(hiddenLayer)
hiddenLayerET = HiddenLayer(rng=rng,
                            n_in=2 * nkernsContext * kmaxContext +
                            nkernsEntities * kmaxEntities,
                            n_out=hiddenUnitsET)
layers.append(hiddenLayerET)
randomInit = False
if doCRF:
    randomInit = True
outputLayer = LogisticRegression(n_in=hiddenUnits,
                                 n_out=numClasses,
                                 rng=rng,
                                 randomInit=randomInit)
layers.append(outputLayer)
outputLayerET = LogisticRegression(n_in=hiddenUnitsET,
                                   n_out=numClassesET,
                                   rng=rng,
                                   randomInit=randomInit)
layers.append(outputLayerET)
if doCRF:
    crfLayer = CRF(numClasses=numClasses + numClassesET,
                   rng=rng,
                   batchsizeVar=batchsizeVar,
                   sequenceLength=3)
    layers.append(crfLayer)

x1_resh = x1.reshape((batchsizeVar * numPerBag, contextsize))
Esempio n. 9
0
    def build_model(self, flag_preserve_params=False):

        logging.info('... building the model')

        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(rng=self.rng,
                                       input=self.x,
                                       n_in=self.n_in,
                                       n_out=self.n_hidden,
                                       activation=self.hidden_activation)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=self.n_hidden,
            n_out=self.n_out,
            activation=self.logreg_activation)

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = abs(self.hiddenLayer.W).sum() \
            + abs(self.logRegressionLayer.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
            + (self.logRegressionLayer.W ** 2).sum()

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

        self.cost = self.negative_log_likelihood(self.y) \
            + self.alpha_l1 * self.L1 \
            + self.alpha_l2 * self.L2_sqr

        self.grads = T.grad(self.cost, self.params)

        # fixed batch size based prediction
        self.predict_proba_batch = theano.function(
            [self.x], self.logRegressionLayer.p_y_given_x)
        self.predict_batch = theano.function(
            [self.x], T.argmax(self.logRegressionLayer.p_y_given_x, axis=1))
        self.predict_cost_batch = theano.function([self.x, self.y],
                                                  self.cost,
                                                  allow_input_downcast=True)
Esempio n. 10
0
def train_cifar(learning_rate_base=1.0,batch_size=128,n_epochs=200,test_frequency=1300, check_point_frequency=5000,show_progress_frequency=100):
    check_point_path = '/home/chensi/mylocal/sichen/data/check_points/'
    parser = optparse.OptionParser()
    parser.add_option("-f",dest="filename", default='None')

    (options, args) = parser.parse_args()

    #defining the rngs
    rng1 = numpy.random.RandomState(23455)
    rng2 = numpy.random.RandomState(12423)
    rng3 = numpy.random.RandomState(23245)
    rng4 = numpy.random.RandomState(12123)
    rng5 = numpy.random.RandomState(25365)


    train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4','data_batch_5'])
    test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test')

    n_training_batches = train_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    n_training_batches /= batch_size
    n_test_batches /= batch_size

    index = T.lscalar()

    x = T.matrix('x')
    y = T.ivector('y')

    img_input = x.reshape((batch_size,3,32,32)) #bc01
    img_input = img_input.dimshuffle(1,2,3,0) #c01b

    #####################
    #defining the layers#
    #####################

    if options.filename == 'None':
        print 'start new training...'
        print 'building model...'
        conv1_input = img_input
        conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=conv1_input,
                                    filter_shape=(3,5,5,32),
                                    image_shape=(3,32,32,batch_size),
                                    activation='relu',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.0001,initB=0,partial_sum=4,
                                    pooling='max',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004,
                                    name='conv1'
                                    )
        conv_pool2_input = drop_out_layer(rng2,conv_pool1.output,0.5)
        conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool2_input,
                                        filter_shape=(32,5,5,32),
                                        image_shape=(32,16,16,batch_size),
                                        activation='relu',
                                        poolsize=(3,3),poolstride=2,pad=2,
                                        convstride=1,initW=0.01,initB=0,partial_sum=4,
                                        pooling='average',
                                        epsW=0.001,
                                        epsB=0.002,
                                        momW=0.9,
                                        momB=0.9,
                                        wc=0.004,
                                        name='conv2')
        conv_pool3_input = drop_out_layer(rng3,conv_pool2.output,0.5)
        conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool3_input,
                                        filter_shape=(32,5,5,64),
                                        image_shape=(32,8,8,batch_size),
                                        activation='relu',
                                        poolsize=(3,3),poolstride=2,pad=2,
                                        convstride=1,initW=0.01,initB=0,partial_sum=4,
                                        pooling='average',
                                        epsW=0.001,
                                        epsB=0.002,
                                        momW=0.9,
                                        momB=0.9,
                                        wc=0.004,
                                        name='conv3')

        layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2)
        #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0)

        fc_2_input = drop_out_layer(rng1,input=layer4_input,p=0.5)
        fc_2 = LogisticRegression(input=fc_2_input,rng=rng5,n_in=64*4*4,n_out=10,initW=0.01,
                                   epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=1.0,
                                    name='fc2')
    else:
        print 'resume training %s...' % options.filename

        params_file = open(check_point_path+options.filename,'rb')
        params = cPickle.load(params_file)
        params_file.close()
        layer1_W = theano.shared(params[0],borrow=True)
        layer1_b = theano.shared(params[1],borrow=True)
        layer2_W = theano.shared(params[2],borrow=True)
        layer2_b = theano.shared(params[3],borrow=True)
        layer3_W = theano.shared(params[4],borrow=True)
        layer3_b = theano.shared(params[5],borrow=True)
        fc10_W = theano.shared(params[6],borrow=True)
        fc10_b = theano.shared(params[7],borrow=True)
        print 'building model...'

        conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input,
                                    filter_shape=(3,5,5,32),
                                    image_shape=(3,32,32,batch_size),
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.0001,initB=0,partial_sum=4,
                                    activation='relu',
                                    pooling='max',
                                    epsW=0.001,
                                    epsB=0.001,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004,
                                    name='conv1',
                                    W1=layer1_W,
                                    b1=layer1_b
                                    )
        conv_pool2_input = drop_out_layer(rng2,conv_pool1.output,0.5)
        conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool2_input,
                                        filter_shape=(32,5,5,32),
                                        image_shape=(32,16,16,batch_size),
                                        poolsize=(3,3),poolstride=2,pad=2,
                                        convstride=1,initW=0.01,initB=0,partial_sum=4,
                                        pooling='average',
                                        activation='relu',
                                        epsW=0.001,
                                        epsB=0.001,
                                        momW=0.9,
                                        momB=0.9,
                                        wc=0.004,
                                        name='conv2',
                                        W1=layer2_W,
                                        b1=layer2_b
                                        )
        conv_pool3_input = drop_out_layer(rng3,conv_pool2.output,0.5)
        conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool3_input,
                                        filter_shape=(32,5,5,64),
                                        image_shape=(32,8,8,batch_size),
                                        poolsize=(3,3),poolstride=2,pad=2,
                                        convstride=1,initW=0.01,initB=0,partial_sum=4,
                                        pooling='average',
                                        activation='relu',
                                        epsW=0.001,
                                        epsB=0.001,
                                        momW=0.9,
                                        momB=0.9,
                                        wc=0.004,
                                        name='conv3',
                                        W1=layer3_W,
                                        b1=layer3_b
                                        )

        layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2)
        #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0)

        fc_2_input = drop_out_layer(rng1,input=layer4_input,p=0.5)

        fc_2 = LogisticRegression(input=fc_2_input,rng=rng5,n_in=64*4*4,n_out=10,initW=0.01,
                                   epsW=0.001,
                                    epsB=0.001,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=1.0,
                                    W=fc10_W,
                                    b=fc10_b,
                                    name='fc2'
                                    )

    all_layers = [conv_pool1,conv_pool2,conv_pool3,fc_2]
#############################################
############### test model###################
#############################################
    print 'building test model...'
    conv1_input_test = img_input
    conv_pool1_test = LeNetConvPoolLayer(rng=rng1,input=conv1_input_test,
                                filter_shape=(3,5,5,32),
                                image_shape=(3,32,32,batch_size),
                                activation='relu',
                                poolsize=(3,3),poolstride=2,pad=2,
                                convstride=1,initW=0.0001,initB=0,partial_sum=4,
                                pooling='max',
                                W1=conv_pool1.W*0.5,
                                b1=conv_pool1.b,
                                name='conv1'
                                )

    conv_pool2_test = LeNetConvPoolLayer(rng=rng2,input=conv_pool1_test.output,
                                    filter_shape=(32,5,5,32),
                                    image_shape=(32,16,16,batch_size),
                                    activation='relu',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    W1=conv_pool2.W*0.5,
                                    b1=conv_pool2.b,
                                    name='conv2')
    conv_pool3_test = LeNetConvPoolLayer(rng=rng3,input=conv_pool2_test.output,
                                    filter_shape=(32,5,5,64),
                                    image_shape=(32,8,8,batch_size),
                                    activation='relu',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    W1=conv_pool3.W*0.5,
                                    b1=conv_pool3.b,
                                    name='conv3')

    layer4_input_test = conv_pool3_test.output.dimshuffle(3,0,1,2).flatten(2)


    fc_2_test = LogisticRegression(input=layer4_input_test,rng=rng5,n_in=64*4*4,n_out=10,initW=0.01,
                                W=fc_2.W,
                                b=fc_2.b,
                                name='fc2')


    #cost_test = fc_2_test.negative_log_likelihood(y)



    test_model = theano.function(inputs=[index], outputs=fc_2_test.errors(y),
                                 givens={
                                     x:test_set_x[index*batch_size: (index+1)*batch_size],
                                     y:test_set_y[index*batch_size: (index+1)*batch_size]
                                 })
########train model
    cost = fc_2.negative_log_likelihood(y)
    Ws = []
    pgradWs = []

    bs = []
    pgradbs = []

    momWs = []
    mombs = []

    epsWs = []
    epsbs = []
    wcs = []

    for i in range(len(all_layers)):
        Ws.append(all_layers[i].W)
        pgradWs.append(all_layers[i].grad_W)
        bs.append(all_layers[i].b)
        pgradbs.append(all_layers[i].grad_b)
        momWs.append(all_layers[i].momW)
        mombs.append(all_layers[i].momB)
        epsWs.append(all_layers[i].epsW)
        epsbs.append(all_layers[i].epsB)
        wcs.append(all_layers[i].wc)

    gradWs = T.grad(cost, Ws)
    gradbs = T.grad(cost, bs)
    updates = []
    for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws, gradWs, momWs, wcs, epsWs, pgradWs):
        epsW_i *= learning_rate_base
        grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i

        updates.append((W_i, W_i+grad_i))
        updates.append((pgW_i, grad_i))

    for b_i, gradb_i, momb_i, epsb_i, pgb_i in zip(bs, gradbs, mombs, epsbs, pgradbs):
        grad_i = - epsb_i*gradb_i + momb_i*pgb_i
        updates.append((b_i, b_i+grad_i))
        updates.append((pgb_i,grad_i))

    train_model = theano.function(inputs=[index],outputs=[cost,fc_2.errors(y)],updates=updates,
                                  givens={
                                      x: train_set_x[index*batch_size:(index+1)*batch_size],
                                      y: train_set_y[index*batch_size:(index+1)*batch_size]
                                  })

    #############
    #train model#
    #############
    print 'training...'


    best_validation_loss = numpy.inf
    best_epoch = 0

    epoch = 0

    pweights = []
    pbias = []
    for i in range(len(all_layers)):
        pweights.append(numpy.mean(numpy.abs(all_layers[i].W.get_value()[0,:])))
        pbias.append(numpy.mean(numpy.abs(all_layers[i].b.get_value())))
    time_start = time.time()
    start_time = time.time()
    while(epoch<n_epochs):
        epoch = epoch + 1
        for minibatch_index in range(n_training_batches):

            iter = (epoch-1)*n_training_batches + minibatch_index
            train_out = train_model(minibatch_index)
            if iter % show_progress_frequency == 0:
                time_end = time.time()
                print 'epoch: %d, batch_num: %d, cost: %f, training_error: %f, (%f seconds)' % (epoch, minibatch_index, train_out[0], train_out[1], time_end-time_start)
                time_start = time.time()

            if (iter+1) % test_frequency == 0:
                time1 = time.time()
                test_losses = [test_model(i) for i in range(n_test_batches)]
                this_test_loss = numpy.mean(test_losses)
                print '=====================testing output==========================='
                print 'epoch: %d, batch_num: %d, test_error: %f ' % (epoch, minibatch_index, this_test_loss*100.)
                for i in range(len(all_layers)):
                    weights = numpy.mean(numpy.abs(all_layers[i].W.get_value()[0,:]))
                    bias = numpy.mean(numpy.abs(all_layers[i].b.get_value()))

                    print 'Layer: %s, weights[0]:%e [%e]' % (all_layers[i].name, weights*1.00, weights-pweights[i])
                    print 'Layer: %s,bias: %e[%e]' % (all_layers[i].name, bias*1.00, bias-pbias[i])
                    pweights[i] = weights
                    pbias[i] = bias
                if this_test_loss < best_validation_loss:
                    best_epoch = epoch
                    best_validation_loss = this_test_loss
                    best_params = []
                    for i in range(len(all_layers)):
                        best_params.append(all_layers[i].W.get_value().copy())
                        best_params.append(all_layers[i].b.get_value().copy())
                    outfile_name = check_point_path+'current_best_params.pkl'
                    outfile = open(outfile_name,'wb')
                    cPickle.dump(best_params,outfile)
                    outfile.close()
                    print 'saved best params to %s' % outfile_name
                time2 = time.time()
                print '==================================================(%f seconds)' % (time2-time1)
            if (iter+1) % check_point_frequency == 0:
                print '~~~~~~~~~~~~~~~~~~saving check_point~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
                time1 = time.time()
                current_params = []
                for i in range(len(all_layers)):
                    current_params.append(all_layers[i].W.get_value().copy())
                    current_params.append(all_layers[i].b.get_value().copy())
                outfile_name = check_point_path + 'current_params_' + str(time.localtime().tm_mon) + '_' + str(time.localtime().tm_mday) \
                + '_' + str(time.localtime().tm_hour) + '_' + str(time.localtime().tm_min) + '_' + str(time.localtime().tm_sec)+'.pkl'
                outfile = open(outfile_name,'wb')
                cPickle.dump(current_params,outfile)
                outfile.close()
                print 'saved check_point to %s' % outfile_name
                time2 = time.time()
                print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~(%f seconds)' % (time2-time1)

    end_time = time.time()
    print 'Best test score is %f at epoch %d. Total time:%f hour' % (best_validation_loss * 100., best_epoch, (end_time-start_time)/3600.)
Esempio n. 11
0
class DBN(object):
    def __init__(self,
                 input,
                 output,
                 n_in,
                 hidden_layers_sizes,
                 n_out,
                 dropout=None,
                 optimizer=SGD,
                 is_train=0):

        self.dense_layers = []
        self.rbm_layers = []
        self.params = []
        self.consider_constants = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        self.rng = np.random.RandomState(888)
        self.theano_rng = RandomStreams(self.rng.randint(2**30))

        for i in range(self.n_layers):
            if i == 0:
                input_size = n_in
                layer_input = input
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.dense_layers[-1].output

            dense_layer = DenseLayer(rng=self.rng,
                                     theano_rng=self.theano_rng,
                                     input=layer_input,
                                     n_in=input_size,
                                     n_out=hidden_layers_sizes[i],
                                     activation=T.nnet.softplus,
                                     dropout=dropout,
                                     is_train=is_train)

            rbm_layer = RBM(input=layer_input,
                            rng=self.rng,
                            theano_rng=self.theano_rng,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=dense_layer.W,
                            hbias=dense_layer.b,
                            dropout=dropout,
                            h_activation=T.nnet.softplus,
                            optimizer=optimizer,
                            is_train=is_train)

            self.dense_layers.append(dense_layer)
            self.rbm_layers.append(rbm_layer)
            self.params.extend(dense_layer.params)

            if dense_layer.consider_constant is not None:
                self.consider_constants.extend(dense_layer.consider_constant)
            # end-for

        self.logistic_layer = LogisticRegression(
            input=self.dense_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_out)

        self.params.extend(self.logistic_layer.params)

        self.finetune_cost = self.logistic_layer.negative_loglikelihood(output)
        self.finetune_errors = self.logistic_layer.errors(output)

        self.input = input
        self.output = output
        self.is_train = is_train

        # model updates
        self.finetune_opt = optimizer(self.params)

    def _finetune_updates(self, learning_rate):
        return self.finetune_opt.update(self.finetune_cost, self.params,
                                        learning_rate, self.consider_constants)

    def build_pretraining_functions(self, datasets, batch_size, k=1):

        train_set_x = datasets[0][0]
        valid_set_x = datasets[1][0]

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.scalar('learning_rate')

        self.rbm_pretraining_fns = []
        self.rbm_pretraining_errors = []

        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size

        for n, rbm_layer in enumerate(self.rbm_layers):

            persistent_chain = theano.shared(value=np.zeros(
                shape=(batch_size, rbm_layer.n_hidden),
                dtype=theano.config.floatX),
                                             borrow=True)

            rbm_cost, rbm_updates = rbm_layer.get_cost_updates(
                learning_rate, persistent_chain, k)

            train_rbm = theano.function(inputs=[index, learning_rate],
                                        outputs=rbm_cost,
                                        updates=rbm_updates,
                                        givens={
                                            self.input:
                                            train_set_x[batch_begin:batch_end],
                                            rbm_layer.is_train:
                                            T.cast(1, 'int32')
                                        },
                                        name='train_rbm' + '_' + str(n))
            self.rbm_pretraining_fns.append(train_rbm)

            validate_rbm = theano.function(
                inputs=[index],
                outputs=rbm_layer.get_valid_error(),
                givens={
                    self.input: valid_set_x[batch_begin:batch_end],
                    rbm_layer.is_train: T.cast(0, 'int32')
                },
                name='valid_rbm' + '_' + str(n))
            self.rbm_pretraining_errors.append(validate_rbm)
            # end-for

        return self.rbm_pretraining_fns, self.rbm_pretraining_errors

    def build_finetune_functions(self, datasets, batch_size):

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.scalar('learning_rate')

        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size

        test_model = theano.function(inputs=[index],
                                     outputs=self.finetune_errors,
                                     givens={
                                         self.input:
                                         test_set_x[batch_begin:batch_end],
                                         self.output:
                                         test_set_y[batch_begin:batch_end],
                                         self.is_train:
                                         T.cast(0, 'int32')
                                     })

        validate_model = theano.function(
            inputs=[index],
            outputs=self.finetune_errors,
            givens={
                self.input: valid_set_x[batch_begin:batch_end],
                self.output: valid_set_y[batch_begin:batch_end],
                self.is_train: T.cast(0, 'int32')
            })

        train_model = theano.function(
            inputs=[index, learning_rate],
            outputs=self.finetune_cost,
            updates=self._finetune_updates(learning_rate),
            givens={
                self.input: train_set_x[batch_begin:batch_end],
                self.output: train_set_y[batch_begin:batch_end],
                self.is_train: T.cast(1, 'int32')
            })

        return train_model, validate_model, test_model
    def __init__(self, configfile, train=False):

        self.slotList = [
            "N", "per:age", "per:alternate_names", "per:children",
            "per:cause_of_death", "per:date_of_birth", "per:date_of_death",
            "per:employee_or_member_of", "per:location_of_birth",
            "per:location_of_death", "per:locations_of_residence",
            "per:origin", "per:schools_attended", "per:siblings", "per:spouse",
            "per:title", "org:alternate_names", "org:date_founded",
            "org:founded_by", "org:location_of_headquarters", "org:members",
            "org:parents", "org:top_members_employees"
        ]

        typeList = [
            "O", "PERSON", "LOCATION", "ORGANIZATION", "DATE", "NUMBER"
        ]

        self.config = readConfig(configfile)

        self.addInputSize = 1
        logger.info("additional mlp input")

        wordvectorfile = self.config["wordvectors"]
        logger.info("wordvectorfile " + wordvectorfile)
        networkfile = self.config["net"]
        logger.info("networkfile " + networkfile)
        hiddenunits = int(self.config["hidden"])
        logger.info("hidden units " + str(hiddenunits))
        hiddenunitsNer = hiddenunits
        if "hiddenunitsNER" in self.config:
            hiddenunitsNer = int(self.config["hiddenunitsNER"])
        representationsizeNER = 50
        if "representationsizeNER" in self.config:
            representationsizeNER = int(self.config["representationsizeNER"])
        learning_rate = float(self.config["lrate"])
        logger.info("learning rate " + str(learning_rate))
        if train:
            self.batch_size = int(self.config["batchsize"])
        else:
            self.batch_size = 1
        logger.info("batch size " + str(self.batch_size))
        self.filtersize = [1, int(self.config["filtersize"])]
        nkerns = [int(self.config["nkerns"])]
        logger.info("nkerns " + str(nkerns))
        pool = [1, int(self.config["kmax"])]

        self.contextsize = int(self.config["contextsize"])
        logger.info("contextsize " + str(self.contextsize))

        if self.contextsize < self.filtersize[1]:
            logger.info("setting filtersize to " + str(self.contextsize))
            self.filtersize[1] = self.contextsize
        logger.info("filtersize " + str(self.filtersize))

        sizeAfterConv = self.contextsize - self.filtersize[1] + 1

        sizeAfterPooling = -1
        if sizeAfterConv < pool[1]:
            logger.info("setting poolsize to " + str(sizeAfterConv))
            pool[1] = sizeAfterConv
        sizeAfterPooling = pool[1]
        logger.info("kmax pooling: k = " + str(pool[1]))

        # reading word vectors
        self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

        self.representationsize = self.vectorsize + 1

        rng = numpy.random.RandomState(
            23455
        )  # not relevant, parameters will be overwritten by stored model anyways
        if train:
            seed = rng.get_state()[1][0]
            logger.info("seed: " + str(seed))

        numSFclasses = 23
        numNERclasses = 6

        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.xa = T.matrix('xa')  # left context
        self.xb = T.matrix('xb')  # middle context
        self.xc = T.matrix('xc')  # right context
        self.y = T.imatrix('y')  # label (only present in training)
        self.yNER1 = T.imatrix(
            'yNER1')  # label for first entity (only present in training)
        self.yNER2 = T.imatrix(
            'yNER2')  # label for second entity (only present in training)
        ishape = [self.representationsize,
                  self.contextsize]  # this is the size of context matrizes

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        logger.info('... building the model')

        # Reshape input matrix to be compatible with LeNetConvPoolLayer
        layer0a_input = self.xa.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))
        layer0b_input = self.xb.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))
        layer0c_input = self.xc.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))

        y_reshaped = self.y.reshape((self.batch_size, 1))
        yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
        yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

        # Construct convolutional pooling layer:
        filter_shape = (nkerns[0], 1, self.representationsize,
                        self.filtersize[1])
        poolsize = (pool[0], pool[1])
        fan_in = numpy.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        # the convolution weight matrix
        convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                        high=W_bound,
                                                        size=filter_shape),
                                            dtype=theano.config.floatX),
                              borrow=True)
        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        convB = theano.shared(value=b_values, borrow=True)

        self.layer0a = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0a_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)
        self.layer0b = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0b_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)
        self.layer0c = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0c_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)

        layer0aflattened = self.layer0a.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0bflattened = self.layer0b.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0cflattened = self.layer0c.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0outputSF = T.concatenate(
            [layer0aflattened, layer0bflattened, layer0cflattened], axis=1)
        layer0outputSFsize = 3 * (nkerns[0] * sizeAfterPooling)

        layer0outputNER1 = T.concatenate([layer0aflattened, layer0bflattened],
                                         axis=1)
        layer0outputNER2 = T.concatenate([layer0bflattened, layer0cflattened],
                                         axis=1)
        layer0outputNERsize = 2 * (nkerns[0] * sizeAfterPooling)

        layer2ner1 = HiddenLayer(rng,
                                 input=layer0outputNER1,
                                 n_in=layer0outputNERsize,
                                 n_out=hiddenunitsNer,
                                 activation=T.tanh)
        layer2ner2 = HiddenLayer(rng,
                                 input=layer0outputNER2,
                                 n_in=layer0outputNERsize,
                                 n_out=hiddenunitsNer,
                                 activation=T.tanh,
                                 W=layer2ner1.W,
                                 b=layer2ner1.b)

        # concatenate additional features to sentence representation
        self.additionalFeatures = T.matrix('additionalFeatures')
        self.additionalFeatsShaped = self.additionalFeatures.reshape(
            (self.batch_size, 1))

        layer2SFinput = T.concatenate(
            [layer0outputSF, self.additionalFeatsShaped], axis=1)
        layer2SFinputSize = layer0outputSFsize + self.addInputSize

        layer2SF = HiddenLayer(rng,
                               input=layer2SFinput,
                               n_in=layer2SFinputSize,
                               n_out=hiddenunits,
                               activation=T.tanh)

        # classify the values of the fully-connected sigmoidal layer
        layer3rel = LogisticRegression(input=layer2SF.output,
                                       n_in=hiddenunits,
                                       n_out=numSFclasses)
        layer3et = LogisticRegression(input=layer2ner1.output,
                                      n_in=hiddenunitsNer,
                                      n_out=numNERclasses)

        scoresForR1 = layer3rel.getScores(layer2SF.output)
        scoresForE1 = layer3et.getScores(layer2ner1.output)
        scoresForE2 = layer3et.getScores(layer2ner2.output)

        self.crfLayer = CRF(numClasses=numSFclasses + numNERclasses,
                            rng=rng,
                            batchsizeVar=self.batch_size,
                            sequenceLength=3)

        scores = T.zeros((self.batch_size, 3, numSFclasses + numNERclasses))
        scores = T.set_subtensor(scores[:, 0, numSFclasses:], scoresForE1)
        scores = T.set_subtensor(scores[:, 1, :numSFclasses], scoresForR1)
        scores = T.set_subtensor(scores[:, 2, numSFclasses:], scoresForE2)
        self.scores = scores

        self.y_conc = T.concatenate([
            yNER1reshaped + numSFclasses, y_reshaped,
            yNER2reshaped + numSFclasses
        ],
                                    axis=1)

        # create a list of all model parameters
        self.paramList = [
            self.crfLayer.params, layer3rel.params, layer3et.params,
            layer2SF.params, layer2ner1.params, self.layer0a.params
        ]
        self.params = []
        for p in self.paramList:
            self.params += p
            logger.info(p)

        if not train:
            self.gotNetwork = 1
            # load parameters
            if not os.path.isfile(networkfile):
                logger.error("network file does not exist")
                self.gotNetwork = 0
            else:
                save_file = open(networkfile, 'rb')
                for p in self.params:
                    p.set_value(cPickle.load(save_file), borrow=False)
                save_file.close()

        self.relation_scores_global = self.crfLayer.getProbForClass(
            self.scores, numSFclasses)
        self.predictions_global = self.crfLayer.getPrediction(self.scores)
Esempio n. 13
0
    def __init__(self,
                 numpy_rng,
                 train_set_x,
                 train_set_y,
                 hidden_layers_sizes,
                 n_ins=784,
                 n_outs=10):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: np.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type train_set_x: theano.shared float32
        :param: train_set_x: Training data set, shape (n_samples, n_pixels)

        :type train_set_y: theano.shared, int32
        :param: train_set_x: GT for training data, shape (n_samples)

        :type n_ins: int
        :param n_ins: dimension of the input to the SAE

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
               at least one value
        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.AE_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.train_set_x = train_set_x
        self.train_set_y = train_set_y

        assert self.n_layers > 0

        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of

        for i in xrange(self.n_layers):  # used to be n layers

            # construct the sigmoid layer = encoder stack
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(
                rng=numpy_rng,
                input=layer_input,
                n_in=(n_ins if i == 0 else hidden_layers_sizes[i - 1]),
                n_out=hidden_layers_sizes[i],
                activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # init the DA_layer, takes weights from sigmoid layer
            AE_layer = AutoEncoder(
                numpy_rng=numpy_rng,
                input=layer_input,
                n_visible=(n_ins if i == 0 else hidden_layers_sizes[i - 1]),
                n_hidden=hidden_layers_sizes[i],
                W=sigmoid_layer.W,
                bhid=sigmoid_layer.b)

            self.AE_layers.append(AE_layer)

        # on top of the layers
        # log layer for fine-tuning
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
Esempio n. 14
0
class StackedAutoEncoder(object):
    """Stacked auto-encoder class (SAE)
    Adopted from:
    https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/SdA.py

    A stacked autoencoder (SAE) model is obtained by stacking several
    AEs. The hidden layer of the AE at layer `i` becomes the input of
    the AE at layer `i+1`. The first layer AE gets as input the input of
    the SAE, and the hidden layer of the last AE represents the output.
    Note that after pretraining, the SAE is dealt with as a normal MLP,
    the AEs are only used to initialize the weights.
    """
    def __init__(self,
                 numpy_rng,
                 train_set_x,
                 train_set_y,
                 hidden_layers_sizes,
                 n_ins=784,
                 n_outs=10):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: np.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type train_set_x: theano.shared float32
        :param: train_set_x: Training data set, shape (n_samples, n_pixels)

        :type train_set_y: theano.shared, int32
        :param: train_set_x: GT for training data, shape (n_samples)

        :type n_ins: int
        :param n_ins: dimension of the input to the SAE

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
               at least one value
        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.AE_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.train_set_x = train_set_x
        self.train_set_y = train_set_y

        assert self.n_layers > 0

        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of

        for i in xrange(self.n_layers):  # used to be n layers

            # construct the sigmoid layer = encoder stack
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(
                rng=numpy_rng,
                input=layer_input,
                n_in=(n_ins if i == 0 else hidden_layers_sizes[i - 1]),
                n_out=hidden_layers_sizes[i],
                activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # init the DA_layer, takes weights from sigmoid layer
            AE_layer = AutoEncoder(
                numpy_rng=numpy_rng,
                input=layer_input,
                n_visible=(n_ins if i == 0 else hidden_layers_sizes[i - 1]),
                n_hidden=hidden_layers_sizes[i],
                W=sigmoid_layer.W,
                bhid=sigmoid_layer.b)

            self.AE_layers.append(AE_layer)

        # on top of the layers
        # log layer for fine-tuning
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, batch_size):
        """
        Generates a list of functions to time each AE training.

        :type batch_size: int
        :param batch_size: size of a [mini]batch
        """

        index = T.lscalar('index')  # index to a minibatch

        # beginning of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        forward_backward_step = []
        forward_step_fns = []
        i = 0
        for AE in self.AE_layers:

            # get the cost and the updates list
            cost = AE.get_cost_updates()

            params = AE.params
            shared_cost = theano.shared(np.float32(0.0))
            forward_step_fns.append(
                theano.function([index], [],
                                updates=[(shared_cost, cost)],
                                givens={
                                    self.x:
                                    self.train_set_x[batch_begin:batch_end],
                                }))
            grads_temp = T.grad(cost, params)

            # This is both forward and backward
            forward_backward_step.append(
                theano.function([index],
                                grads_temp,
                                givens={
                                    self.x:
                                    self.train_set_x[batch_begin:batch_end],
                                }))
            i += 1

        return forward_backward_step, forward_step_fns

    def build_finetune_functions(self, batch_size):

        index = T.lscalar('index')  # index to a [mini]batch
        # beginning of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        cost = self.finetune_cost
        shared_cost = theano.shared(np.float32(0.0))
        forward_mlp = theano.function(
            [index], [],
            updates=[(shared_cost, cost)],
            givens={
                self.x: self.train_set_x[batch_begin:batch_end],
                self.y: self.train_set_y[batch_begin:batch_end],
            })

        grads_temp = T.grad(cost, self.params)

        # This is both forward and backward
        forward_backward_mlp = theano.function(
            [index],
            grads_temp,
            givens={
                self.x: self.train_set_x[batch_begin:batch_end],
                self.y: self.train_set_y[batch_begin:batch_end],
            })

        return forward_mlp, forward_backward_mlp
Esempio n. 15
0
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
            filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2))


    layer2 = LeNetConvPoolLayer(rng, input=layer1.output,
                image_shape=(batch_size, 60, layer2_w, layer2_h),
                filter_shape=(90, 60, 3, 3), poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)


    layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h  ,
                         n_out=500, activation=T.tanh)
  

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)   # change the number of output labels


    cost = layer4.negative_log_likelihood(y)

    classify = theano.function([index], outputs=layer4.get_output_labels(y),
                               givens={
                                   x: test_set_x[index * batch_size: (index + 1) * batch_size],
                                   y: test_set_y[index * batch_size: (index + 1) * batch_size]})
           
    # load weights  
    print 'loading weights state'
    f = file('weights.save', 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(cPickle.load(f))
Esempio n. 16
0
def train_nnet(learning_rate=0.1, n_epochs=2,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=500):
    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    data = T.matrix('x')
    result = T.matrix('y')  # the labels are presented as 1D vector [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    init_input = data.reshape((batch_size, 1, 16, 16))

    # Check for pkl file holding old weights
    old_weights = [[None, None]] * 4;
    try:
        old_weights = pickle.load(open(sys.argv[1], "rb"))
    except FileNotFoundError as e:
        print(e)
    except IndexError:
        pass

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=init_input,
        image_shape=(batch_size, 1, 16, 16),
        filter_shape=(nkerns[0], 1, 5, 5),
        oldWeights=old_weights[0][0],
        oldBias=old_weights[0][1],
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 6, 6),
        filter_shape=(nkerns[1], nkerns[0], 3, 3),
        oldWeights=old_weights[1][0],
        oldBias=old_weights[1][1],
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 2 * 2,
        n_out=64,
        oldWeights=old_weights[2][0],
        oldBias=old_weights[2][1],
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(
        input=layer2.output,
        n_in=64,
        n_out=256,
        oldWeights=old_weights[3][0],
        oldBias=old_weights[3][1],
    )

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(result)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(result),
        givens={
            data: test_set_x[index * batch_size: (index + 1) * batch_size],
            result: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(result),
        givens={
            data: valid_set_x[index * batch_size: (index + 1) * batch_size],
            result: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            data: train_set_x[index * batch_size: (index + 1) * batch_size],
            result: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    done = False
    for epoch in range(1, n_epochs + 1):
        for minibatch_index in range(int(n_train_batches)):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(int(n_valid_batches))]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase) 
                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(int(n_test_batches))
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done = True
                break
        if done:
            break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    weights = []
    weights.append([layer0.W.get_value(), layer0.b.get_value()])
    weights.append([layer1.W.get_value(), layer1.b.get_value()])
    weights.append([layer2.W.get_value(), layer2.b.get_value()])
    weights.append([layer3.W.get_value(), layer3.b.get_value()])

    pickle.dump(weights, open("mlp.pkl", "wb"))
Esempio n. 17
0
    def __init__(self,
                 input,
                 output,
                 n_in,
                 hidden_layers_sizes,
                 n_out,
                 dropout=None,
                 optimizer=SGD,
                 is_train=0):

        self.dense_layers = []
        self.rbm_layers = []
        self.params = []
        self.consider_constants = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        self.rng = np.random.RandomState(888)
        self.theano_rng = RandomStreams(self.rng.randint(2**30))

        for i in range(self.n_layers):
            if i == 0:
                input_size = n_in
                layer_input = input
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.dense_layers[-1].output

            dense_layer = DenseLayer(rng=self.rng,
                                     theano_rng=self.theano_rng,
                                     input=layer_input,
                                     n_in=input_size,
                                     n_out=hidden_layers_sizes[i],
                                     activation=T.nnet.softplus,
                                     dropout=dropout,
                                     is_train=is_train)

            rbm_layer = RBM(input=layer_input,
                            rng=self.rng,
                            theano_rng=self.theano_rng,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=dense_layer.W,
                            hbias=dense_layer.b,
                            dropout=dropout,
                            h_activation=T.nnet.softplus,
                            optimizer=optimizer,
                            is_train=is_train)

            self.dense_layers.append(dense_layer)
            self.rbm_layers.append(rbm_layer)
            self.params.extend(dense_layer.params)

            if dense_layer.consider_constant is not None:
                self.consider_constants.extend(dense_layer.consider_constant)
            # end-for

        self.logistic_layer = LogisticRegression(
            input=self.dense_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_out)

        self.params.extend(self.logistic_layer.params)

        self.finetune_cost = self.logistic_layer.negative_loglikelihood(output)
        self.finetune_errors = self.logistic_layer.errors(output)

        self.input = input
        self.output = output
        self.is_train = is_train

        # model updates
        self.finetune_opt = optimizer(self.params)
Esempio n. 18
0
def train_rep(
    learning_rate=0.002,
    L1_reg=0.0002,
    L2_reg=0.005,
    n_epochs=200,
    nkerns=[20, 50],
    batch_size=25,
):

    rng = numpy.random.RandomState(23455)

    train_dir = "../out/h5/"
    valid_dir = "../out/h5/"

    weights_dir = "./weights/"

    print("... load input data")
    filename = train_dir + "rep_train_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    train_set_x, train_set_y, shared_train_set_y = datasets

    filename = valid_dir + "rep_valid_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    valid_set_x, valid_set_y, shared_valid_set_y = datasets

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets

    # compute number of minibatches for training, validation and testing
    n_all_train_batches = 30000
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_all_train_batches /= batch_size
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of
    # [int] labels

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) / 2
    layer1_h = (layer0_h - 4) / 2
    layer2_w = (layer1_w - 2) / 2
    layer2_h = (layer1_h - 2) / 2
    layer3_w = (layer2_w - 2) / 2
    layer3_h = (layer2_h - 2) / 2

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # image sizes
    batchsize = batch_size
    in_channels = 20
    in_width = 50
    in_height = 50
    # filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=signals_shape,
        filter_shape=filters_shape,
        poolsize=(2, 2),
    )

    # TODO: incase of flt_time < in_time the output dimension will be different
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
        filter_shape=(60, flt_channels, 3, 3),
        poolsize=(2, 2),
    )

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, 60, layer2_w, layer2_h),
        filter_shape=(90, 60, 3, 3),
        poolsize=(2, 2),
    )
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=90 * layer3_w * layer3_h,
        n_out=500,
        activation=T.tanh,
    )

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size : (index + 1) * batch_size],
            y: test_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size : (index + 1) * batch_size],
            y: valid_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = (
        layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    )

    # symbolic Theano variable that represents the L1 regularization term
    L1 = (
        T.sum(abs(layer4.params[0]))
        + T.sum(abs(layer3.params[0]))
        + T.sum(abs(layer2.params[0]))
        + T.sum(abs(layer1.params[0]))
        + T.sum(abs(layer0.params[0]))
    )
    # symbolic Theano variable that represents the squared L2 term
    L2_sqr = (
        T.sum(layer4.params[0] ** 2)
        + T.sum(layer3.params[0] ** 2)
        + T.sum(layer2.params[0] ** 2)
        + T.sum(layer1.params[0] ** 2)
        + T.sum(layer0.params[0] ** 2)
    )
    # the loss
    cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size : (index + 1) * batch_size],
            y: train_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")

    start_time = time.clock()

    epoch = 0
    done_looping = False
    cost_ij = 0
    train_files_num = 600
    val_files_num = 100

    startc = time.clock()
    while (epoch < n_epochs) and (not done_looping):
        endc = time.clock()
        print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0)))
        startc = time.clock()
        epoch = epoch + 1
        for nTrainSet in range(1, train_files_num + 1):
            # load next train data
            if nTrainSet % 50 == 0:
                print("training @ nTrainSet =  ", nTrainSet, ", cost = ", cost_ij)
            filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_train_set_x, ns_train_set_y = datasets
            train_set_x.set_value(ns_train_set_x, borrow=True)
            shared_train_set_y.set_value(
                numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_train_batches = train_set_x.get_value(borrow=True).shape[0]
            n_train_batches /= batch_size

            # train
            for minibatch_index in range(n_train_batches):

                # training itself
                # --------------------------------------
                cost_ij = train_model(minibatch_index)
                # -------------------------

        # at the end of each epoch run validation
        this_validation_loss = 0
        for nValSet in range(1, val_files_num + 1):
            filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_valid_set_x, ns_valid_set_y = datasets
            valid_set_x.set_value(ns_valid_set_x, borrow=True)
            shared_valid_set_y.set_value(
                numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
            n_valid_batches /= batch_size

            # compute zero-one loss on validation set
            validation_losses = [validate_model(i) for i in range(n_valid_batches)]
            this_validation_loss += numpy.mean(validation_losses)
        this_validation_loss /= val_files_num
        print((
            "epoch %i, minibatch %i/%i, validation error %f %%"
            % (
                epoch,
                minibatch_index + 1,
                n_train_batches,
                this_validation_loss * 100.0,
            )
        ))

        # save snapshots
        print("saving weights state, epoch = ", epoch)
        f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb")
        state_L0 = layer0.__getstate__()
        pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L1 = layer1.__getstate__()
        pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L2 = layer2.__getstate__()
        pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L3 = layer3.__getstate__()
        pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L4 = layer4.__getstate__()
        pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL)
        f.close()

    end_time = time.clock()
    print ("Optimization complete.")
    print((
        "The code for file "
        + os.path.split(__file__)[1]
        + " ran for %.2fm" % ((end_time - start_time) / 60.0)
    ), file=sys.stderr)
  def __init__(self, configfile, train = False):

    self.config = readConfig(configfile)

    self.addInputSize = 1
    logger.info("additional mlp input")

    wordvectorfile = self.config["wordvectors"]
    logger.info("wordvectorfile " + str(wordvectorfile))
    networkfile = self.config["net"]
    logger.info("networkfile " + str(networkfile))
    hiddenunits = int(self.config["hidden"])
    logger.info("hidden units " + str(hiddenunits))
    hiddenunitsNER = 50
    if "hiddenunitsNER" in self.config:
      hiddenunitsNER = int(self.config["hiddenunitsNER"])
    logger.info("hidden units NER " + str(hiddenunitsNER))
    learning_rate = float(self.config["lrate"])
    logger.info("learning rate " + str(learning_rate))
    if train:
      self.batch_size = int(self.config["batchsize"])
    else:
      self.batch_size = 1
    logger.info("batch size " + str(self.batch_size))
    self.filtersize = [1,int(self.config["filtersize"])]
    nkerns = [int(self.config["nkerns"])]
    logger.info("nkerns " + str(nkerns))
    pool = [1, int(self.config["kmax"])]

    self.contextsize = int(self.config["contextsize"])
    logger.info("contextsize " + str(self.contextsize))

    if self.contextsize < self.filtersize[1]:
      logger.info("setting filtersize to " + str(self.contextsize))
      self.filtersize[1] = self.contextsize
    logger.info("filtersize " + str(self.filtersize))

    sizeAfterConv = self.contextsize - self.filtersize[1] + 1

    sizeAfterPooling = -1
    if sizeAfterConv < pool[1]:
      logger.info("setting poolsize to " + str(sizeAfterConv))
      pool[1] = sizeAfterConv
    sizeAfterPooling = pool[1]
    logger.info("kmax pooling: k = " + str(pool[1]))

    # reading word vectors
    self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

    self.representationsize = self.vectorsize + 1

    rng = numpy.random.RandomState(23455)
    if train:
      seed = rng.get_state()[1][0]
      logger.info("seed: " + str(seed))

    # allocate symbolic variables for the data
    self.index = T.lscalar()  # index to a [mini]batch
    self.xa = T.matrix('xa')   # left context
    self.xb = T.matrix('xb')   # middle context
    self.xc = T.matrix('xc')   # right context
    self.y = T.imatrix('y')   # label (only present in training)
    self.yNER1 = T.imatrix('yNER1') # label for first entity
    self.yNER2 = T.imatrix('yNER2') # label for second entity
    ishape = [self.representationsize, self.contextsize]  # this is the size of context matrizes

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    logger.info('... building the model')

    # Reshape input matrix to be compatible with our LeNetConvPoolLayer
    layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1]))

    self.y_reshaped = self.y.reshape((self.batch_size, 1))
    yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
    yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

    # Construct convolutional pooling layer:
    filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1])
    poolsize=(pool[0], pool[1])
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
              numpy.prod(poolsize))
    W_bound = numpy.sqrt(6. / (fan_in + fan_out))
    # the convolution weight matrix
    convW = theano.shared(numpy.asarray(
           rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
           dtype=theano.config.floatX),
                               borrow=True)
    # the bias is a 1D tensor -- one bias per output feature map
    b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
    convB = theano.shared(value=b_values, borrow=True)

    self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)

    #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3)
    layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1)

    self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh)
    self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b)


    layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1)
    layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER

    self.additionalFeatures = T.matrix('additionalFeatures')
    additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1))
    layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1)
    layer2_inputSize += self.addInputSize

    self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize,
                         n_out=hiddenunits, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23)

    # create a list of all model parameters
    self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params]
    self.params = []
    for p in self.paramList:
      self.params += p
      logger.info(p)

    if not train:
      self.gotNetwork = 1
      # load parameters
      if not os.path.isfile(networkfile):
        logger.error("network file does not exist")
        self.gotNetwork = 0
      else:
        save_file = open(networkfile, 'rb')
        for p in self.params:
          p.set_value(cPickle.load(save_file), borrow=False)
        save_file.close()
Esempio n. 20
0
def build_lenet(config):
    rng = np.random.RandomState(23455)

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector

    image_width = config.image_width
    batch_size = config.batch_size
    image_size = image_width**2

    x_shared = T.cast(theano.shared(np.random.rand(batch_size, image_size),
                                    borrow=True), theano.config.floatX)
    y_shared = T.cast(theano.shared(np.random.randint(config.ydim,
                                                      size=batch_size),
                                    borrow=True), 'int32')

    layer0_input = x.reshape((batch_size, 1, image_width, image_width))

    # construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, image_width, image_width),
        filter_shape=(config.num_kerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, config.num_kerns[0], 12, 12),
        filter_shape=(config.num_kerns[1], config.num_kerns[0], 5, 5),
        poolsize=(2, 2)
    )

    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=config.num_kerns[1] * 4 * 4,
        n_out=500,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500,
                                n_out=config.ydim)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a list of all model parameters to be fit by gradient descent
    params_W = [layer3.W, layer2.W, layer1.W, layer0.W]
    params_b = [layer3.b, layer2.b, layer1.b, layer0.b]
    params = params_W + params_b

    shared_cost = theano.shared(np.float32(0.0))
    grads_temp = T.grad(cost, params)
    start_compilation = time.time()
    forward_step = theano.function([], [], updates=[(shared_cost, cost)],
                                   givens={x: x_shared, y: y_shared})
    forward_backward_step = theano.function([], grads_temp,
                                            givens={x: x_shared, y: y_shared})
    print 'compilation time: %.4f s' % (time.time() - start_compilation)
    return forward_step, forward_backward_step
Esempio n. 21
0
class StackedAutoEncoder(object):
    """Stacked auto-encoder class (SAE)
    Adopted from:
    https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/SdA.py

    A stacked autoencoder (SAE) model is obtained by stacking several
    AEs. The hidden layer of the AE at layer `i` becomes the input of
    the AE at layer `i+1`. The first layer AE gets as input the input of
    the SAE, and the hidden layer of the last AE represents the output.
    Note that after pretraining, the SAE is dealt with as a normal MLP,
    the AEs are only used to initialize the weights.
    """

    def __init__(
        self,
        numpy_rng,
        train_set_x,
        train_set_y,
        hidden_layers_sizes,
        n_ins=784,
        n_outs=10
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: np.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type train_set_x: theano.shared float32
        :param: train_set_x: Training data set, shape (n_samples, n_pixels)

        :type train_set_y: theano.shared, int32
        :param: train_set_x: GT for training data, shape (n_samples)

        :type n_ins: int
        :param n_ins: dimension of the input to the SAE

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
               at least one value
        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.AE_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.train_set_x = train_set_x
        self.train_set_y = train_set_y

        assert self.n_layers > 0

        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of

        for i in xrange(self.n_layers):     # used to be n layers

            # construct the sigmoid layer = encoder stack
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=(n_ins if i == 0 else
                                              hidden_layers_sizes[i-1]),
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # init the DA_layer, takes weights from sigmoid layer
            AE_layer = AutoEncoder(
                numpy_rng=numpy_rng,
                input=layer_input,
                n_visible=(n_ins if i == 0 else hidden_layers_sizes[i-1]),
                n_hidden=hidden_layers_sizes[i],
                W=sigmoid_layer.W,
                bhid=sigmoid_layer.b)

            self.AE_layers.append(AE_layer)

        # on top of the layers
        # log layer for fine-tuning
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, batch_size):
        """
        Generates a list of functions to time each AE training.

        :type batch_size: int
        :param batch_size: size of a [mini]batch
        """

        index = T.lscalar('index')  # index to a minibatch

        # beginning of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        forward_backward_step = []
        forward_step_fns = []
        i = 0
        for AE in self.AE_layers:

            # get the cost and the updates list
            cost = AE.get_cost_updates()

            params = AE.params
            shared_cost = theano.shared(np.float32(0.0))
            forward_step_fns.append(
                theano.function(
                    [index], [],
                    updates=[(shared_cost, cost)],
                    givens={
                            self.x: self.train_set_x[batch_begin: batch_end],
                            }))
            grads_temp = T.grad(cost, params)

            # This is both forward and backward
            forward_backward_step.append(
                theano.function(
                    [index], grads_temp,
                    givens={
                            self.x: self.train_set_x[batch_begin: batch_end],
                            }))
            i += 1

        return forward_backward_step, forward_step_fns

    def build_finetune_functions(self, batch_size):

        index = T.lscalar('index')  # index to a [mini]batch
        # beginning of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        cost = self.finetune_cost
        shared_cost = theano.shared(np.float32(0.0))
        forward_mlp = theano.function(
            [index], [],
            updates=[(shared_cost, cost)],
            givens={
                    self.x: self.train_set_x[batch_begin: batch_end],
                    self.y: self.train_set_y[batch_begin: batch_end],
                    })

        grads_temp = T.grad(cost, self.params)

        # This is both forward and backward
        forward_backward_mlp = theano.function(
            [index], grads_temp,
            givens={
                    self.x: self.train_set_x[batch_begin: batch_end],
                    self.y: self.train_set_y[batch_begin: batch_end],
                    })

        return forward_mlp, forward_backward_mlp
Esempio n. 22
0
    def __init__(
        self,
        numpy_rng,
        train_set_x,
        train_set_y,
        hidden_layers_sizes,
        n_ins=784,
        n_outs=10
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: np.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type train_set_x: theano.shared float32
        :param: train_set_x: Training data set, shape (n_samples, n_pixels)

        :type train_set_y: theano.shared, int32
        :param: train_set_x: GT for training data, shape (n_samples)

        :type n_ins: int
        :param n_ins: dimension of the input to the SAE

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
               at least one value
        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.AE_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.train_set_x = train_set_x
        self.train_set_y = train_set_y

        assert self.n_layers > 0

        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of

        for i in xrange(self.n_layers):     # used to be n layers

            # construct the sigmoid layer = encoder stack
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=(n_ins if i == 0 else
                                              hidden_layers_sizes[i-1]),
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # init the DA_layer, takes weights from sigmoid layer
            AE_layer = AutoEncoder(
                numpy_rng=numpy_rng,
                input=layer_input,
                n_visible=(n_ins if i == 0 else hidden_layers_sizes[i-1]),
                n_hidden=hidden_layers_sizes[i],
                W=sigmoid_layer.W,
                bhid=sigmoid_layer.b)

            self.AE_layers.append(AE_layer)

        # on top of the layers
        # log layer for fine-tuning
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
def prepare_network():

    rng = numpy.random.RandomState(23455)

    print('Preparing Theano model...')

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    # allocate symbolic variables for the data
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) // 2
    layer1_h = (layer0_h - 4) // 2
    layer2_w = (layer1_w - 2) // 2
    layer2_h = (layer1_h - 2) // 2
    layer3_w = (layer2_w - 2) // 2
    layer3_h = (layer2_h - 2) // 2

    ######################
    # BUILD NETWORK #
    ######################
    # image sizes
    batchsize = 1
    in_channels = 20
    in_width = 50
    in_height = 50
    #filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=signals_shape,
                                filter_shape=filters_shape,
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batchsize, flt_channels, layer1_w,
                                             layer1_h),
                                filter_shape=(60, flt_channels, 3, 3),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batchsize, 60, layer2_w,
                                             layer2_h),
                                filter_shape=(90, 60, 3, 3),
                                poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=90 * layer3_w * layer3_h,
                         n_out=500,
                         activation=T.tanh)

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batchsize:(index + 1) * batchsize],
            y: test_set_y[index * batchsize:(index + 1) * batchsize]
        })

    print('Loading network weights...')
    weightFile = '../live_count/weights.save'
    f = open(weightFile, 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(pickle.load(f))
    f.close()
    layer0.__setstate__(loaded_objects[0])
    layer1.__setstate__(loaded_objects[1])
    layer2.__setstate__(loaded_objects[2])
    layer3.__setstate__(loaded_objects[3])
    layer4.__setstate__(loaded_objects[4])

    return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize
Esempio n. 24
0
class lenet5(object):
    
    def __init__(self, learning_rate=0.1, n_epochs=200, nkerns=[20, 50], 
                 batch_size=500, img_size=28, img_dim=1, filtersize=(5, 5), 
                 poolsize=(2, 2), num_hidden=500, num_class=10, shuffle=True, 
                 cost_type ='nll_softmax', 
                 alpha_l1 = 0, alpha_l2 = 0, alpha_entropy=0,
                 rng = np.random.RandomState(23455),
                 logreg_activation=T.nnet.softmax,
                 hidden_activation=relu,
                 conv_activation=relu):
        """ Demonstrates lenet on MNIST dataset

        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
                              gradient)

        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer

        :type dataset: string
        :param dataset: path to the dataset used for training /testing (MNIST here)

        :type nkerns: list of ints
        :param nkerns: number of kernels on each layer
        """
        
        #####################
        # assign parameters #
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.nkerns = nkerns
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dim = img_dim
        self.filtersize = filtersize
        self.poolsize = poolsize
        self.num_hidden = num_hidden
        self.num_class = num_class
        self.shuffle = shuffle
        self.cost_type = cost_type
        self.alpha_l1 = alpha_l1
        self.alpha_l2 = alpha_l2
        self.alpha_entropy = alpha_entropy
        self.rng = rng
        self.logreg_activation = logreg_activation
        self.conv_activation = conv_activation
        self.hidden_activation = hidden_activation
        # assign parameters #
        #####################
        
        # call build model to build theano and other expressions
        self.build_model()
        self.build_functions()
    # end def __init__     


    def build_model(self, flag_preserve_params=False):
    
        
        ###################
        # build the model #
        logging.info('... building the model')
        
        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')   # the data is presented as rasterized images
        
        # self.y = T.ivector('y')  
        # the labels are presented as 1D vector of
        # [int] labels, used to represent labels given by 
        # data
        
        # the y as features, used for taking in intermediate layer "y" values                    
        self.y = T.matrix('y')   
        

        
        # Reshape matrix of rasterized images of shape (batch_size,28*28)
        # to a 4D tensor, compatible with our LeNetConvPoolLayer
        self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size))

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
        # maxpooling reduces this further to (24/2,24/2) = (12,12)
        # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
        self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input,
                                         image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size),
                                         filter_shape=(self.nkerns[0], self.img_dim, 
                                                       self.filtersize[0], self.filtersize[0]),
                                         poolsize=(self.poolsize[0], self.poolsize[0]),
                                         activation=self.conv_activation)

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
        # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        
        self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0]
        
        self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output,
                                         image_shape=(self.batch_size, self.nkerns[0], 
                                                      self.img_size1, self.img_size1),
                                         filter_shape=(self.nkerns[1], self.nkerns[0], 
                                                       self.filtersize[1], self.filtersize[1]), 
                                         poolsize=(self.poolsize[1], self.poolsize[1]),
                                         activation=self.conv_activation)

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (20,32*4*4) = (20,512)
        self.layer2_input = self.layer1.output.flatten(2)
        
        self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1]
        # construct a fully-connected sigmoidal layer
        self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, 
                                  n_in=self.nkerns[1] * self.img_size2 * self.img_size2,
                                  n_out=self.num_hidden, 
                                  activation=self.hidden_activation)

        # classify the values of the fully-connected sigmoidal layer
        self.layer3 = LogisticRegression(input=self.layer2.output, 
                                         n_in=self.num_hidden, 
                                         n_out=self.num_class,
                                         activation=self.logreg_activation)
        
        
        # regularization term
        self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \
            self.alpha_l2 * (self.layer2.W ** 2).sum()
            
        self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \
            self.alpha_l2 * (self.layer3.W ** 2).sum()
        
        
        # there's different choices of cost models
        if self.cost_type == 'nll_softmax':
            # the cost we minimize during training is the NLL of the model
            self.y = T.ivector('y')  # index involved so has to use integer
            self.cost = self.layer3.negative_log_likelihood(self.y) + \
                self.decay_hidden + self.decay_softmax + \
                self.alpha_entropy * self.layer3.p_y_entropy
                
                
        elif self.cost_type == 'ssd_softmax':
            self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \
                self.decay_hidden + self.decay_softmax
            
        elif self.cost_type == 'ssd_hidden':
            self.cost = T.mean((self.layer2.output - self.y) ** 2) + \
                self.decay_hidden
        
        elif self.cost_type == 'ssd_conv':
            self.cost = T.mean((self.layer2_input - self.y) ** 2)
        
        # create a list of all model parameters to be fit by gradient descent
        
        # preserve parameters if the exist, used for keep parameter while 
        # changing
        # some of the theano functions
        # but the user need to be aware that if the parameters should be kept 
        # only if the network structure doesn't change
        
        if flag_preserve_params and hasattr(self, 'params'):
            pass
            params_temp = copy.deepcopy(self.params)
        else:
            params_temp = None
        
        self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params
            
        # if needed, assign old parameters
        if flag_preserve_params and (params_temp is not None):
            for ind in range(len(params_temp)):
                self.params[ind].set_value(params_temp[ind].get_value(), borrow=True)


        # create a list of gradients for all model parameters
        self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn')
        
        # error function from the last layer logistic regression
        self.errors = self.layer3.errors 
        # the above line will cause the crash of cPickle, need to use 
        # __getstate__ and __setstate__ to deal with it
        
        # build the model #
        ###################

    # end def build_model       

    def build_functions(self):
        # prediction methods
        
        self.fcns = {}

        self.fcns['predict_proba_batch'] = theano.function([self.x], self.layer3.p_y_given_x)
        self.fcns['predict_batch'] = theano.function([self.x], T.argmax(self.layer3.p_y_given_x, axis=1))
        self.fcns['predict_hidden_batch'] = theano.function([self.x], self.layer2.output)
        self.fcns['predict_convout_batch'] = theano.function([self.x], self.layer2_input)
        # self.predict_proba_batch = theano.function([self.x], self.layer3.p_y_given_x)
        # self.predict_batch = theano.function([self.x], T.argmax(self.layer3.p_y_given_x, axis=1))
        # self.predict_hidden_batch = theano.function([self.x], self.layer2.output)
        # self.predict_convout_batch = theano.function([self.x], self.layer2_input)
        
        # cost function for a single batch
        # suitable for negative_log_likelihood input y
        self.fcns['predict_cost_batch'] = theano.function([self.x, self.y], self.cost, allow_input_downcast=True)
        
        # predict entropy
        # this function is for debugging purpose
        self.fcns['predict_entropy_batch'] = theano.function([self.x], self.layer3.p_y_entropy)
        
    
    
    def predict_cost(self, X, y):
        return cost_batch_to_any_size(self.batch_size, self.fcns['predict_cost_batch'], X, y)
    # end def predict_cost  


    def predict_proba(self, X):
        return batch_to_anysize(self.batch_size, self.fcns['predict_proba_batch'], X)
    # end def predict_proba
   
    
    def predict(self, X):
        return batch_to_anysize(self.batch_size, self.fcns['predict_batch'], X)
    # end def predict
    
    
    def predict_hidden(self, X):
        return batch_to_anysize(self.batch_size, self.fcns['predict_hidden_batch'], X)
    # end def predict_hidden
    
    
    def predict_convout(self, X):
        return batch_to_anysize(self.batch_size, self.fcns['predict_convout_batch'], X)
    # end def predict_convout
       
    
    # copy weight parameters from another lenet5
    def copy_weights(self, clf):
        
        # check the whether should copy
        if type(clf) is lenet5 and self.nkerns == clf.nkerns and self.img_size == clf.img_size and self.filtersize == clf.filtersize and self.poolsize == clf.poolsize and self.num_hidden == self.num_hidden and self.num_class == clf.num_class:
            self.set_weights(clf.params)
        else:
            print "Weight's not copied, the input classifier doesn't match the original classifier"   
    # end def copy_params
    
    
    def set_weights(self, params_other):
        '''
        set weights from other trained network or recorded early stopping.
        Use this function with caution, because it doesn't check whether the 
        weights are safe to copied
        '''
        for ind in range(len(params_other)):
            self.params[ind].set_value(params_other[ind].get_value(), borrow=True)   
    # end def set_weights    
    
        
    #################################
    # dealing with cPickle problems #
    
    def __getstate__(self):
        print '__getstate__ executed'


        saved_weights = []

        for param in self.params:
            saved_weights.append(param.get_value())

        list_to_del = ["index", "x", "y", "layer0_input",
                       "layer0", "img_size1", "layer1", "layer2_input",
                       "img_size2", "layer2", "layer3", "decay_hidden",
                       "decay_softmax", "cost",
                       "params", "grads",
                       "errors", "fcns", ]

        state = self.__dict__.copy()

        state['saved_weights'] = saved_weights
        for key in state.keys():
            if key in list_to_del:
                del state[key]
        # del state['errors']
        # del state['fcns']
        return state
    # end def __getstate__
        
        
    def __setstate__(self, state):
        print '__setstate__ executed'
        self.__dict__ = state
        # self.errors = self.layer3.errors
         
        self.build_model()
        self.build_functions()

        for ind in range(len(state['saved_weights'])):
            self.params[ind].set_value(state['saved_weights'][ind])
Esempio n. 25
0
        input=layer1.output,
        image_shape=(batch_size, 60, layer2_w, layer2_h),
        filter_shape=(90, 60, 3, 3),
        poolsize=(2, 2),
    )
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=90 * layer3_w * layer3_h,
        n_out=500,
        activation=T.tanh,
    )

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
        },
    )

    # load weights
    print("loading weights state")
    f = open("weights.save", "rb")
Esempio n. 26
0
    def build_model(self, flag_preserve_params=False):
    
        
        ###################
        # build the model #
        logging.info('... building the model')
        
        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')   # the data is presented as rasterized images
        
        # self.y = T.ivector('y')  
        # the labels are presented as 1D vector of
        # [int] labels, used to represent labels given by 
        # data
        
        # the y as features, used for taking in intermediate layer "y" values                    
        self.y = T.matrix('y')   
        

        
        # Reshape matrix of rasterized images of shape (batch_size,28*28)
        # to a 4D tensor, compatible with our LeNetConvPoolLayer
        self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size))

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
        # maxpooling reduces this further to (24/2,24/2) = (12,12)
        # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
        self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input,
                                         image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size),
                                         filter_shape=(self.nkerns[0], self.img_dim, 
                                                       self.filtersize[0], self.filtersize[0]),
                                         poolsize=(self.poolsize[0], self.poolsize[0]),
                                         activation=self.conv_activation)

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
        # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        
        self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0]
        
        self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output,
                                         image_shape=(self.batch_size, self.nkerns[0], 
                                                      self.img_size1, self.img_size1),
                                         filter_shape=(self.nkerns[1], self.nkerns[0], 
                                                       self.filtersize[1], self.filtersize[1]), 
                                         poolsize=(self.poolsize[1], self.poolsize[1]),
                                         activation=self.conv_activation)

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (20,32*4*4) = (20,512)
        self.layer2_input = self.layer1.output.flatten(2)
        
        self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1]
        # construct a fully-connected sigmoidal layer
        self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, 
                                  n_in=self.nkerns[1] * self.img_size2 * self.img_size2,
                                  n_out=self.num_hidden, 
                                  activation=self.hidden_activation)

        # classify the values of the fully-connected sigmoidal layer
        self.layer3 = LogisticRegression(input=self.layer2.output, 
                                         n_in=self.num_hidden, 
                                         n_out=self.num_class,
                                         activation=self.logreg_activation)
        
        
        # regularization term
        self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \
            self.alpha_l2 * (self.layer2.W ** 2).sum()
            
        self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \
            self.alpha_l2 * (self.layer3.W ** 2).sum()
        
        
        # there's different choices of cost models
        if self.cost_type == 'nll_softmax':
            # the cost we minimize during training is the NLL of the model
            self.y = T.ivector('y')  # index involved so has to use integer
            self.cost = self.layer3.negative_log_likelihood(self.y) + \
                self.decay_hidden + self.decay_softmax + \
                self.alpha_entropy * self.layer3.p_y_entropy
                
                
        elif self.cost_type == 'ssd_softmax':
            self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \
                self.decay_hidden + self.decay_softmax
            
        elif self.cost_type == 'ssd_hidden':
            self.cost = T.mean((self.layer2.output - self.y) ** 2) + \
                self.decay_hidden
        
        elif self.cost_type == 'ssd_conv':
            self.cost = T.mean((self.layer2_input - self.y) ** 2)
        
        # create a list of all model parameters to be fit by gradient descent
        
        # preserve parameters if the exist, used for keep parameter while 
        # changing
        # some of the theano functions
        # but the user need to be aware that if the parameters should be kept 
        # only if the network structure doesn't change
        
        if flag_preserve_params and hasattr(self, 'params'):
            pass
            params_temp = copy.deepcopy(self.params)
        else:
            params_temp = None
        
        self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params
            
        # if needed, assign old parameters
        if flag_preserve_params and (params_temp is not None):
            for ind in range(len(params_temp)):
                self.params[ind].set_value(params_temp[ind].get_value(), borrow=True)


        # create a list of gradients for all model parameters
        self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn')
        
        # error function from the last layer logistic regression
        self.errors = self.layer3.errors 
Esempio n. 27
0
class deep_sugar(object):
    def __init__(self, numpy_rng, theano_rng=None, y=None, 
                 alpha=0.9, sample_rate=0.1, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 corruption_levels=[0.1, 0.1],
                 allX=None,allY=None,srng=None):
        self.sigmoid_layers = []
        self.sugar_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.allXs = []
        if y == None:
            self.y = tensor.ivector(name='y')
        else:
            self.y = y
        assert self.n_layers > 0
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.x = tensor.matrix('x')  
        self.x = tensor.matrix('x')  
        self.y = tensor.ivector('y')  
        self.y = tensor.ivector('y')  
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output
            if i == 0:
                self.allXs.append(allX)
            else:
                self.allXs.append(tensor.dot(self.allXs[i-1], self.sigmoid_layers[-1].W) + self.sigmoid_layers[-1].b)
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=tensor.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            sugar_layer = sugar(numpy_rng=numpy_rng,
                                alpha=alpha,
                                sample_rate=sample_rate,
                                x=layer_input,
                                y=self.y,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                bhid=sigmoid_layer.b,
                                allX=self.allXs[i],
                                allY=allY,
                                srng=srng)
            self.sugar_layers.append(sugar_layer)
        self.logLayer = LogisticRegression(
                         input=self.sigmoid_layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
        
    def pretraining_functions(self, train_set_x, train_set_y, batch_size):
        index = tensor.lscalar('index')  
        index = tensor.lscalar('index')  
        corruption_level = tensor.scalar('corruption')  
        corruption_level = tensor.scalar('corruption')  
        learning_rate = tensor.scalar('lr')  
        learning_rate = tensor.scalar('lr')  
        switch = tensor.iscalar('switch')
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size
        pretrain_fns = []
        for sugar in self.sugar_layers:
            cost, updates = sugar.get_cost_updates(corruption_level,
                                                learning_rate,
                                                switch)
            fn = function(inputs=[index,
                                         Param(corruption_level, default=0.2),
                                         Param(learning_rate, default=0.1),
                                         Param(switch, default=1)],
                                 outputs=[cost],
                                 updates=updates,
                                 givens={self.x: train_set_x[batch_begin:batch_end],
                                         self.y: train_set_y[batch_begin:batch_end]}, on_unused_input='ignore')
            pretrain_fns.append(fn)
        return pretrain_fns
        
    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y)   = datasets[2]
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size
        index = tensor.lscalar('index')  
        gparams = tensor.grad(self.finetune_cost, self.params)
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))
        train_fn = function(inputs=[index],
              outputs=self.finetune_cost,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]})
        test_score_i = function([index], self.errors,
                 givens={
                   self.x: test_set_x[index * batch_size:
                                      (index + 1) * batch_size],
                   self.y: test_set_y[index * batch_size:
                                      (index + 1) * batch_size]})
        valid_score_i = function([index], self.errors,
              givens={
                 self.x: valid_set_x[index * batch_size:
                                     (index + 1) * batch_size],
                 self.y: valid_set_y[index * batch_size:
                                     (index + 1) * batch_size]})
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]
        return train_fn, valid_score, test_score
Esempio n. 28
0
 def __init__(self,
              numpy_rng,
              theano_rng=None,
              y=None,
              alpha=0.9,
              sample_rate=0.1,
              n_ins=784,
              hidden_layers_sizes=[500, 500],
              n_outs=10,
              corruption_levels=[0.1, 0.1],
              allX=None,
              allY=None,
              srng=None):
     self.sigmoid_layers = []
     self.sugar_layers = []
     self.params = []
     self.n_layers = len(hidden_layers_sizes)
     self.allXs = []
     if y == None:
         self.y = tensor.ivector(name='y')
     else:
         self.y = y
     assert self.n_layers > 0
     if not theano_rng:
         theano_rng = RandomStreams(numpy_rng.randint(2**30))
     self.x = tensor.matrix('x')
     self.x = tensor.matrix('x')
     self.y = tensor.ivector('y')
     self.y = tensor.ivector('y')
     for i in xrange(self.n_layers):
         if i == 0:
             input_size = n_ins
         else:
             input_size = hidden_layers_sizes[i - 1]
         if i == 0:
             layer_input = self.x
         else:
             layer_input = self.sigmoid_layers[-1].output
         if i == 0:
             self.allXs.append(allX)
         else:
             self.allXs.append(
                 tensor.dot(self.allXs[i - 1], self.sigmoid_layers[-1].W) +
                 self.sigmoid_layers[-1].b)
         sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                     input=layer_input,
                                     n_in=input_size,
                                     n_out=hidden_layers_sizes[i],
                                     activation=tensor.nnet.sigmoid)
         self.sigmoid_layers.append(sigmoid_layer)
         self.params.extend(sigmoid_layer.params)
         sugar_layer = sugar(numpy_rng=numpy_rng,
                             alpha=alpha,
                             sample_rate=sample_rate,
                             x=layer_input,
                             y=self.y,
                             n_visible=input_size,
                             n_hidden=hidden_layers_sizes[i],
                             W=sigmoid_layer.W,
                             bhid=sigmoid_layer.b,
                             allX=self.allXs[i],
                             allY=allY,
                             srng=srng)
         self.sugar_layers.append(sugar_layer)
     self.logLayer = LogisticRegression(
         input=self.sigmoid_layers[-1].output,
         n_in=hidden_layers_sizes[-1],
         n_out=n_outs)
     self.params.extend(self.logLayer.params)
     self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
     self.errors = self.logLayer.errors(self.y)
Esempio n. 29
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.matrix(name="input", dtype=dtype)  # batch of sequence of vector
        Y = T.matrix(name="output", dtype=dtype)  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "same"
        cnn_batch_size = batch_size
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (128, 1, 10, 10)
        input_shape = (cnn_batch_size, 1, 144, 176
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train)

        #Layer2: conv2+pool
        subsample = (1, 1)
        filter_shape = (256, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       dl1.output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (256, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: conv2+pool
        filter_shape = (128, p3.output_shape[1], 3, 3)
        c4 = ConvLayer(rng,
                       p3.output,
                       filter_shape,
                       p3.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p4 = PoolLayer(c4.output,
                       pool_size=pool_size,
                       input_shape=c4.output_shape)

        #Layer5: hidden
        n_in = reduce(lambda x, y: x * y, p4.output_shape[1:])
        x_flat = p4.output.flatten(2)

        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)

        #Layer6: hidden
        lreg = LogisticRegression(rng, h1.output, 1024, params['n_output'])
        self.output = lreg.y_pred

        self.params = c1.params + c2.params + c3.params + c4.params + h1.params + lreg.params

        cost = get_err_fn(self, cost_function, Y)
        L2_reg = 0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param[0]**2) + T.sum(param[1]**2))

        cost += L2_reg * L2_sqr

        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
Esempio n. 30
0
class deep_sugar(object):
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 y=None,
                 alpha=0.9,
                 sample_rate=0.1,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 corruption_levels=[0.1, 0.1],
                 allX=None,
                 allY=None,
                 srng=None):
        self.sigmoid_layers = []
        self.sugar_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.allXs = []
        if y == None:
            self.y = tensor.ivector(name='y')
        else:
            self.y = y
        assert self.n_layers > 0
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        self.x = tensor.matrix('x')
        self.x = tensor.matrix('x')
        self.y = tensor.ivector('y')
        self.y = tensor.ivector('y')
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output
            if i == 0:
                self.allXs.append(allX)
            else:
                self.allXs.append(
                    tensor.dot(self.allXs[i - 1], self.sigmoid_layers[-1].W) +
                    self.sigmoid_layers[-1].b)
            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=tensor.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            sugar_layer = sugar(numpy_rng=numpy_rng,
                                alpha=alpha,
                                sample_rate=sample_rate,
                                x=layer_input,
                                y=self.y,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                bhid=sigmoid_layer.b,
                                allX=self.allXs[i],
                                allY=allY,
                                srng=srng)
            self.sugar_layers.append(sugar_layer)
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, train_set_y, batch_size):
        index = tensor.lscalar('index')
        index = tensor.lscalar('index')
        corruption_level = tensor.scalar('corruption')
        corruption_level = tensor.scalar('corruption')
        learning_rate = tensor.scalar('lr')
        learning_rate = tensor.scalar('lr')
        switch = tensor.iscalar('switch')
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size
        pretrain_fns = []
        for sugar in self.sugar_layers:
            cost, updates = sugar.get_cost_updates(corruption_level,
                                                   learning_rate, switch)
            fn = function(inputs=[
                index,
                Param(corruption_level, default=0.2),
                Param(learning_rate, default=0.1),
                Param(switch, default=1)
            ],
                          outputs=[cost],
                          updates=updates,
                          givens={
                              self.x: train_set_x[batch_begin:batch_end],
                              self.y: train_set_y[batch_begin:batch_end]
                          },
                          on_unused_input='ignore')
            pretrain_fns.append(fn)
        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size
        index = tensor.lscalar('index')
        gparams = tensor.grad(self.finetune_cost, self.params)
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))
        train_fn = function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })
        test_score_i = function(
            [index],
            self.errors,
            givens={
                self.x:
                test_set_x[index * batch_size:(index + 1) * batch_size],
                self.y: test_set_y[index * batch_size:(index + 1) * batch_size]
            })
        valid_score_i = function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score, test_score
Esempio n. 31
0
    def __init__(self, nkerns=[48, 48, 48, 48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 95
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # labels := 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 95 -> 92 -> 46
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 46)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 46 -> 42 -> 21
        #--------------------------------------------------
        fs1 = 5  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 21)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 21 -> 18 -> 9
        #--------------------------------------------------
        fs2 = 4
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 9)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # layer3 convolution+max pool reduces image dimensions by:
        # 9 -> 6 -> 3
        #--------------------------------------------------
        fs3 = 4
        os3 = (os2 - fs3 + 1) / nMaxPool
        assert (os3 == 3)
        layer3 = LeNetConvPoolLayer(rng,
                                    input=layer2.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os2, os2),
                                    filter_shape=(nkerns[3], nkerns[2], fs3,
                                                  fs3),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 4
        # Fully connected sigmoidal layer, goes from
        # 3*3*48 ~ 450 -> 200
        #--------------------------------------------------
        layer4_input = layer3.output.flatten(2)
        layer4 = HiddenLayer(rng,
                             input=layer4_input,
                             n_in=nkerns[3] * os3 * os3,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 5
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer5 = LogisticRegression(input=layer4.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4, layer5)
def test_conv(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], kern_shape=[9,7],
            batch_size=200, verbose=False, loadmodel=False):
    """
    learning_rate: term for the gradient 

    n_epochs: maximal number of epochs before exiting

    nkerns: number of kernels on each layer
    
    kern_shape: list of numbers with the dimensions of the kernels

    batch_szie: number of examples in minibatch.

    verbose: to print out epoch summary or not to
    
    loadmodel: load parameters from a saved .npy file

    """
    
    # Folder for saving and loading parameters
    folder='results'
    # Seed the random generator
    rng = numpy.random.RandomState(1990)

    # Load the dataset
    datasets = load_faceScrub(theano_shared=True)
    
    # Functions for saving and loading parameters
    def save(folder):
        for param in params:
            print (str(param.name))
            numpy.save(os.path.join(folder,
                       param.name + '.npy'), param.get_value())

    def load(folder):
        for param in params:
            param.set_value(numpy.load(os.path.join(folder,
                            param.name + '.npy')))


    # Accassing the train,test and validation set
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ###############
    # BUILD MODEL #
    ###############
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 1 * 100 * 100)
    # to a 4D tensor, which is expected by theano
    layer0_input = x.reshape((batch_size, 1, 100, 100))
    
    # First convolutional pooling layer
    layer0 = ConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=((batch_size, 1, 100, 100)), 
        filter_shape=((nkerns[0], 1, kern_shape[0], kern_shape[0])),
        poolsize=((2,2)),
        idx=0
    )

    # Second layer
    layer1 = ConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=((batch_size, nkerns[0], 46, 46)),
        filter_shape=((nkerns[1], nkerns[0], kern_shape[1], kern_shape[1])),
        poolsize=((2,2)),
        idx=1
    )
    
    # Flatten input for the fully connected layer
    layer2_input = layer1.output.flatten(2)

    # Fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=(nkerns[1]*20*20),
        n_out=(500),
        activation=T.tanh
    )   
    

    # Output layer
    layer3 = LogisticRegression(
         input=layer2.output,
         n_in=500,
    n_out=530)

    # Cost function
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # Calculate validation error
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # Parameter list which needs update
    params = layer3.params + layer2.params + layer1.params + layer0.params
    
    # Load the parameters if we want
    if loadmodel == True:
        load(folder)
        

    # Gradient of costfunction w.r.t. parameters
    grads = T.grad(cost, params)

    # Gradient decent for every parameters
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    # Theano function for calculating the cost and updating the model
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )


    print('... training')
    train_net(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
    
    # Save parameters after training
    save(folder)
Esempio n. 33
0
    def __init__(self,
                 nkerns=[48, 48, 48],
                 miniBatchSize=200,
                 nHidden=200,
                 nClasses=2,
                 nMaxPool=2,
                 nChannels=1):
        """
        nClasses : the number of target classes (e.g. 2 for binary classification)
        nMaxPool : number of pixels to max pool
        nChannels : number of input channels (e.g. 1 for single grayscale channel)
        """
        rng = numpy.random.RandomState(23455)

        self.p = 65
        self.miniBatchSize = miniBatchSize

        # Note: self.x and self.y will be re-bound to a subset of the
        # training/validation/test data dynamically by the update
        # stage of the appropriate function.
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels

        # We now assume the input will already be reshaped to the
        # proper size (i.e. we don't need a theano resize op here).
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # conv. filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, nChannels,
                                                 self.p, self.p),
                                    filter_shape=(nkerns[0], nChannels, fs0,
                                                  fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 31 -> 28 -> 14
        #--------------------------------------------------
        fs1 = 4  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 14)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 14 -> 10 -> 5
        #--------------------------------------------------
        fs2 = 5
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 5)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[1],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # Fully connected sigmoidal layer, goes from
        # 5*5*48  -> 200
        #--------------------------------------------------
        layer3_input = layer2.output.flatten(2)
        layer3 = HiddenLayer(rng,
                             input=layer3_input,
                             n_in=nkerns[2] * os2 * os2,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 4
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer4 = LogisticRegression(input=layer3.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4)
Esempio n. 34
0
class SentConv(object):
    def __init__(self,
                 learning_rate=0.1,
                 L1_reg=0.00,
                 L2_reg=0.0001,
                 filter_hs=[3, 4, 5],
                 filter_num=100,
                 n_hidden=100,
                 n_out=2,
                 word_idx_map=None,
                 wordvec=None,
                 k=300,
                 adjust_input=False):
        """
        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
        gradient

        :type L1_reg: float
        :param L1_reg: L1-norm's weight when added to the cost (see
        regularization)

        :type L2_reg: float
        :param L2_reg: L2-norm's weight when added to the cost (see
        regularization)
        """
        self.learning_rate = learning_rate
        self.L1_reg = L1_reg
        self.L2_reg = L2_reg
        self.word_idx_map = word_idx_map
        rng = np.random.RandomState(3435)
        self.rng = rng
        self.k = k
        self.filter_num = filter_num
        self.filter_hs = filter_hs
        # Can be assigned at the fit step.
        self.batch_size = None

        self.epoch = 0

        self.Words = theano.shared(value=wordvec, name="Words")
        X = T.matrix('X')
        Y = T.ivector('Y')
        self.X = X
        self.Y = Y

        layer0_input = self.Words[T.cast(X.flatten(), dtype='int32')].reshape((X.shape[0], X.shape[1], self.Words.shape[1]))
        self.layer0_input = layer0_input
        c_max_list = []
        self.conv_layer_s = []
        test_case = []

        for filter_h in filter_hs:
            conv_layer = ConvLayer(rng, layer0_input, filter_h=filter_h, filter_num=filter_num, k=k)
            self.conv_layer_s.append(conv_layer)
            c_max_list.append(conv_layer.c_max)
        max_pooling_out = T.concatenate(c_max_list, axis=1)
        max_pooling_out_size = filter_num * len(filter_hs)

        self.hidden_layer = HiddenLayer(rng, max_pooling_out, max_pooling_out_size, n_hidden)

        self.lr_layer = LogisticRegression(
            input=self.hidden_layer.output,
            n_in=n_hidden,
            n_out=n_out,
        )
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = (
            sum([abs(conv_layer.W).sum() for conv_layer in self.conv_layer_s])
            + abs(self.hidden_layer.W).sum()
            + abs(self.lr_layer.W).sum()
        )

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (
            sum([(conv_layer.W ** 2).sum() for conv_layer in self.conv_layer_s])
            + (self.hidden_layer.W ** 2).sum()
            + (self.lr_layer.W ** 2).sum()
        )



        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically
        self.cost = (
            self.negative_log_likelihood(Y)
            + self.L1_reg * self.L1
            + self.L2_reg * self.L2_sqr
        )

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = []
        # also adjust the input word vectors
        if adjust_input:
            self.params.append(self.Words)
        for conv_layer in self.conv_layer_s:
            self.params += conv_layer.params
        self.params += self.hidden_layer.params
        self.params += self.lr_layer.params

    # negative log likelihood of the MLP is given by the negative
    # log likelihood of the output of the model, computed in the
    # logistic regression layer
    def negative_log_likelihood(self, Y):
        return self.lr_layer.negative_log_likelihood(Y)

    # same holds for the function computing the number of errors
    def errors(self, Y):
        return self.lr_layer.errors(Y)

    def fit(self, datasets, batch_size=50, n_epochs=400):
        train_x, train_y, valid_x, valid_y = datasets
        self.batch_size = batch_size

        # compute number of minibatches for training, validation and testing
        train_len = train_x.get_value(borrow=True).shape[0]
        valid_len = valid_x.get_value(borrow=True).shape[0]
        n_train_batches = train_len / batch_size
        if train_len % batch_size != 0:
            n_train_batches += 1
        n_valid_batches = valid_len / batch_size
        if valid_len % batch_size != 0:
            n_valid_batches += 1

        print 'number of train mini batch: %s' % n_train_batches

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the model'

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        X = self.X
        Y = self.Y
        learn_rate = T.scalar('Learning Rate')

        # compute the gradient of cost with respect to theta (sotred in params)
        # the resulting gradients will be stored in a list gparams
        gparams = [T.grad(self.cost, param) for param in self.params]

        # specify how to update the parameters of the model as a list of
        # (variable, update expression) pairs

        # given two lists of the same length, A = [a1, a2, a3, a4] and
        # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
        # element is a pair formed from the two lists :
        #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
        updates = [
            (param, param - learn_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        train_model = theano.function(
            inputs=[index, learn_rate],
            outputs=self.cost,
            updates=updates,
            givens={
                X: train_x[index * batch_size: (index + 1) * batch_size],
                Y: train_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        test_train_model = theano.function(
            inputs=[index],
            outputs=self.errors(Y),
            givens={
                X: train_x[index * batch_size: (index + 1) * batch_size],
                Y: train_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        validate_model = theano.function(
            inputs=[index],
            outputs=self.errors(Y),
            givens={
                X: valid_x[index * batch_size:(index + 1) * batch_size],
                Y: valid_y[index * batch_size:(index + 1) * batch_size]
            }
        )

        ###############
        # TRAIN MODEL #
        ###############
        print '... training'

        # early-stopping parameters
        patience = 1000000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
                               # found
        improvement_threshold = 0.9999  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatche before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch

        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0.
        start_time = timeit.default_timer()

        done_looping = False
        last_cost = np.inf
        sys.stdout.flush()
        logger.info('already traned number of epochs: %s' % self.epoch)
        epoch = self.epoch
        while (epoch < n_epochs) and (not done_looping):
            epoch += 1
            avg_cost_list = []
            for minibatch_index in xrange(n_train_batches):

                minibatch_avg_cost = train_model(minibatch_index, self.learning_rate)
                avg_cost_list.append(minibatch_avg_cost)
                # print self.lr_layer.W.get_value()
                # iteration number
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    # print self.lr_layer.W.get_value()
                    # print self.lr_layer.b.get_value()

                    # train_losses = [test_train_model(i) for i in xrange(n_train_batches)]
                    # this_train_loss = np.mean(train_losses)


                    # # compute zero-one loss on validation set
                    # validation_losses = [validate_model(i) for i
                    #                      in xrange(n_valid_batches)]
                    # this_validation_loss = np.mean(validation_losses)
                    # train_all_precison, train_label_precision, train_label_recall = \
                    #     self.test(train_x, train_y.eval())
                    # this_train_loss = 1 - train_all_precison

                    valid_all_precison, valid_label_precision, valid_label_recall = \
                        self.test(valid_x, valid_y.eval())
                    this_validation_loss = 1 - valid_all_precison

                    avg_cost = np.mean(avg_cost_list)
                    if avg_cost >= last_cost:
                        self.learning_rate *= 0.95
                    last_cost = avg_cost


                    logger.info(
                        'epoch %i, learning rate: %f, avg_cost: %f, valid P: %f %%, valid_1_P: %s, valid_1_R: %s' %
                        (
                            epoch,
                            self.learning_rate,
                            avg_cost,
                            # (1 - this_train_loss) * 100,
                            (1 - this_validation_loss) * 100.,
                            # train_label_precision[1],
                            # train_label_recall[1],
                            valid_label_precision[1],
                            valid_label_recall[1]
                        )
                    )
                    sys.stdout.flush()

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:
                        #improve patience if loss improvement is good enough
                        if (
                            this_validation_loss < best_validation_loss *
                            improvement_threshold
                        ):
                        # Increase patience_increase times based on the current iteration.
                            patience = max(patience, iter * patience_increase)

                        best_validation_loss = this_validation_loss
                        best_iter = iter

                if patience <= iter:
                    done_looping = True
                    break
                self.epoch = epoch

        end_time = timeit.default_timer()
        logger.info(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i') %
              (( 1 - best_validation_loss) * 100., best_iter + 1))
        logger.info('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))

    def save(self, path):
        with open(path, 'wb') as f:
            pickle.dump(self, f, -1)
        logger.info('save model to path %s' % path)
        return None

    @classmethod
    def load(self, path):
        with open(path, 'rb') as f:
            return pickle.load(f)

    def predict(self, shared_x, batch_size=None):
        if not batch_size:
            batch_size = self.batch_size
        shared_x_len = shared_x.get_value(borrow=True).shape[0]
        n_batches = shared_x_len / batch_size
        if shared_x_len % batch_size != 0:
            n_batches += 1

        index = T.lscalar()  # index to a [mini]batch
        X = self.X

        predict_model = theano.function(
            inputs=[index],
            outputs=self.lr_layer.y_pred,
            givens={
                X: shared_x[index * batch_size:(index + 1) * batch_size]
            }
        )
        pred_y = np.concatenate([predict_model(i) for i in range(n_batches)])
        return pred_y

    def test(self, shared_x, data_y, out_path=None):
        pred_y = self.predict(shared_x)
        if out_path:
            with codecs.open(out_path, 'wb') as f:
                f.writelines(['%s\t%s\n' % (x, y) for x, y in zip(data_y, pred_y)])
        return evaluate(data_y, pred_y)

    def test_from_file(self, path, out_path=None, encoding='utf-8'):
        data_x = []
        data_y = []
        with codecs.open(path, 'rb', encoding=encoding) as f:
            for i, line in enumerate(f):
                tokens = line.strip('\n').split('\t')
                if len(tokens) != 2:
                    raise ValueError('invalid line %s' % (i+1))
                label = int(tokens[0])
                sent = tokens[1]
                s = get_idx_from_sent(sent, self.word_idx_map)
                data_x.append(s)
                data_y.append(label)
        shared_x = theano.shared(
            value=np.asarray(data_x, dtype=theano.config.floatX),
            borrow='True'
        )
        return self.test(shared_x, data_y, out_path=out_path)
class CNN:

  def __init__(self, configfile, train = False):

    self.config = readConfig(configfile)

    self.addInputSize = 1
    logger.info("additional mlp input")

    wordvectorfile = self.config["wordvectors"]
    logger.info("wordvectorfile " + str(wordvectorfile))
    networkfile = self.config["net"]
    logger.info("networkfile " + str(networkfile))
    hiddenunits = int(self.config["hidden"])
    logger.info("hidden units " + str(hiddenunits))
    hiddenunitsNER = 50
    if "hiddenunitsNER" in self.config:
      hiddenunitsNER = int(self.config["hiddenunitsNER"])
    logger.info("hidden units NER " + str(hiddenunitsNER))
    learning_rate = float(self.config["lrate"])
    logger.info("learning rate " + str(learning_rate))
    if train:
      self.batch_size = int(self.config["batchsize"])
    else:
      self.batch_size = 1
    logger.info("batch size " + str(self.batch_size))
    self.filtersize = [1,int(self.config["filtersize"])]
    nkerns = [int(self.config["nkerns"])]
    logger.info("nkerns " + str(nkerns))
    pool = [1, int(self.config["kmax"])]

    self.contextsize = int(self.config["contextsize"])
    logger.info("contextsize " + str(self.contextsize))

    if self.contextsize < self.filtersize[1]:
      logger.info("setting filtersize to " + str(self.contextsize))
      self.filtersize[1] = self.contextsize
    logger.info("filtersize " + str(self.filtersize))

    sizeAfterConv = self.contextsize - self.filtersize[1] + 1

    sizeAfterPooling = -1
    if sizeAfterConv < pool[1]:
      logger.info("setting poolsize to " + str(sizeAfterConv))
      pool[1] = sizeAfterConv
    sizeAfterPooling = pool[1]
    logger.info("kmax pooling: k = " + str(pool[1]))

    # reading word vectors
    self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

    self.representationsize = self.vectorsize + 1

    rng = numpy.random.RandomState(23455)
    if train:
      seed = rng.get_state()[1][0]
      logger.info("seed: " + str(seed))

    # allocate symbolic variables for the data
    self.index = T.lscalar()  # index to a [mini]batch
    self.xa = T.matrix('xa')   # left context
    self.xb = T.matrix('xb')   # middle context
    self.xc = T.matrix('xc')   # right context
    self.y = T.imatrix('y')   # label (only present in training)
    self.yNER1 = T.imatrix('yNER1') # label for first entity
    self.yNER2 = T.imatrix('yNER2') # label for second entity
    ishape = [self.representationsize, self.contextsize]  # this is the size of context matrizes

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    logger.info('... building the model')

    # Reshape input matrix to be compatible with our LeNetConvPoolLayer
    layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1]))

    self.y_reshaped = self.y.reshape((self.batch_size, 1))
    yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
    yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

    # Construct convolutional pooling layer:
    filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1])
    poolsize=(pool[0], pool[1])
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
              numpy.prod(poolsize))
    W_bound = numpy.sqrt(6. / (fan_in + fan_out))
    # the convolution weight matrix
    convW = theano.shared(numpy.asarray(
           rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
           dtype=theano.config.floatX),
                               borrow=True)
    # the bias is a 1D tensor -- one bias per output feature map
    b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
    convB = theano.shared(value=b_values, borrow=True)

    self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)

    #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3)
    layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1)

    self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh)
    self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b)


    layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1)
    layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER

    self.additionalFeatures = T.matrix('additionalFeatures')
    additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1))
    layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1)
    layer2_inputSize += self.addInputSize

    self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize,
                         n_out=hiddenunits, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23)

    # create a list of all model parameters
    self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params]
    self.params = []
    for p in self.paramList:
      self.params += p
      logger.info(p)

    if not train:
      self.gotNetwork = 1
      # load parameters
      if not os.path.isfile(networkfile):
        logger.error("network file does not exist")
        self.gotNetwork = 0
      else:
        save_file = open(networkfile, 'rb')
        for p in self.params:
          p.set_value(cPickle.load(save_file), borrow=False)
        save_file.close()


  def classify(self, candidateAndFillerAndOffsetList, slot):
    ##############
    # TEST MODEL #
    ##############

    logger.info('... testing')

    index = T.lscalar()  # index to a [mini]batch

    if self.gotNetwork == 0:
      return []

    inputMatrixDev_a, inputMatrixDev_b, inputMatrixDev_c, lengthListDev_a, lengthListDev_b, lengthListDev_c, inputFeaturesDev, _ = getInput(candidateAndFillerAndOffsetList, self.representationsize, self.contextsize, self.filtersize, self.wordvectors, self.vectorsize)
    # create input matrix and save them in valid_set

    slot2types, binarizer, numTypes = getSlot2Types()

    yner1Dev = []
    yner2Dev = []
    type1bin = binarizer.transform([slot2types[slot][0]])
    type2bin = binarizer.transform([slot2types[slot][1]])
    
    dt = theano.config.floatX

    for item in range(len(inputMatrixDev_a)):
      yner1Dev.append(type1bin)
      yner2Dev.append(type2bin)
    yner1DevNumpy = numpy.array(yner1Dev, dtype = numpy.int32)
    yner2DevNumpy = numpy.array(yner2Dev, dtype = numpy.int32)

    valid_set_xa = theano.shared(numpy.matrix(inputMatrixDev_a, dtype = dt))
    valid_set_xb = theano.shared(numpy.matrix(inputMatrixDev_b, dtype = dt))
    valid_set_xc = theano.shared(numpy.matrix(inputMatrixDev_c, dtype = dt))
    valid_mlp = theano.shared(numpy.matrix(inputFeaturesDev, dtype = dt))
    valid_yner1 = theano.shared(yner1DevNumpy.reshape(yner1DevNumpy.shape[0], yner1DevNumpy.shape[2]))
    valid_yner2 = theano.shared(yner2DevNumpy.reshape(yner2DevNumpy.shape[0], yner2DevNumpy.shape[2]))

    # compute number of minibatches for testing
    n_valid_batches = valid_set_xa.get_value(borrow=True).shape[0]
    n_valid_batches /= self.batch_size

    input_dict = {}
    input_dict[self.xa] = valid_set_xa[index * self.batch_size: (index + 1) * self.batch_size]
    input_dict[self.xb] = valid_set_xb[index * self.batch_size: (index + 1) * self.batch_size]
    input_dict[self.xc] = valid_set_xc[index * self.batch_size: (index + 1) * self.batch_size]
    input_dict[self.yNER1] = valid_yner1[index * self.batch_size: (index + 1) * self.batch_size]
    input_dict[self.yNER2] = valid_yner2[index * self.batch_size: (index + 1) * self.batch_size]
    input_dict[self.additionalFeatures] = valid_mlp[index * self.batch_size: (index + 1) * self.batch_size]

    test_model_confidence = theano.function([index], self.layer3.results(), givens = input_dict)

    resultList = [test_model_confidence(i) for i in xrange(n_valid_batches)]

    return resultList
def start(inputfile):
    global in_time, out_time, cooldown_in_time, cooldown_out_time, classify
    global global_counter, winner_stride, cur_state, in_frame_num, actions_counter
    global test_set_x, test_set_y, shared_test_set_y
    rng = numpy.random.RandomState(23455)

    # ####################### build start ########################

    # create an empty shared variables to be filled later

    data_x = numpy.zeros([1, 20 * 50 * 50])
    data_y = numpy.zeros(20)
    train_set = (data_x, data_y)
    (test_set_x, test_set_y, shared_test_set_y) = \
        shared_dataset(train_set)

    print 'building ... '
    batch_size = 1

    # allocate symbolic variables for the data

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # image size

    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) / 2
    layer1_h = (layer0_h - 4) / 2
    layer2_w = (layer1_w - 2) / 2
    layer2_h = (layer1_h - 2) / 2
    layer3_w = (layer2_w - 2) / 2
    layer3_h = (layer2_h - 2) / 2

    # #####################
    # BUILD ACTUAL MODEL #
    # #####################

    # image sizes

    batchsize = batch_size
    in_channels = 20
    in_width = 50
    in_height = 50

    # filter sizes

    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=signals_shape,
                                filter_shape=filters_shape,
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, flt_channels,
                                             layer1_w, layer1_h),
                                filter_shape=(60, flt_channels, 3, 3),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batch_size, 60, layer2_w,
                                             layer2_h),
                                filter_shape=(90, 60, 3, 3),
                                poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=90 * layer3_w * layer3_h,
                         n_out=500,
                         activation=T.tanh)

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # load weights

    print 'loading weights state'
    f = file('weights.save', 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(cPickle.load(f))
    f.close()
    layer0.__setstate__(loaded_objects[0])
    layer1.__setstate__(loaded_objects[1])
    layer2.__setstate__(loaded_objects[2])
    layer3.__setstate__(loaded_objects[3])
    layer4.__setstate__(loaded_objects[4])

    # ####################### build done ########################

    fromCam = False

    if fromCam:
        print 'using camera input'
        cap = cv2.VideoCapture(0)
    else:
        print 'using input file: ', inputfile
        cap = cv2.VideoCapture(inputfile)

    # my timing

    frame_rate = 5
    frame_interval_ms = 1000 / frame_rate

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video_writer = cv2.VideoWriter('../out/live_out.avi', fourcc, frame_rate,
                                   (640, 480))

    frame_counter = 0
    (ret, frame) = cap.read()

    proFrame = process_single_frame(frame)

    # init detectors

    st_a_det = RepDetector(proFrame, detector_strides[0])
    st_b_det = RepDetector(proFrame, detector_strides[1])
    st_c_det = RepDetector(proFrame, detector_strides[2])

    frame_wise_counts = []
    while True:

        in_frame_num += 1
        if in_frame_num % 2 == 1:
            continue

        (ret, frame) = cap.read()
        if ret == 0:
            print 'unable to read frame'
            break
        proFrame = process_single_frame(frame)

        # handle stride A....
        if frame_counter % st_a_det.stride_number == 0:
            st_a_det.count(proFrame)

    # handle stride B

        if frame_counter % st_b_det.stride_number == 0:
            st_b_det.count(proFrame)

    # handle stride C

        if frame_counter % st_c_det.stride_number == 0:
            st_c_det.count(proFrame)

    # display result on video................

        blue_color = (130, 0, 0)
        green_color = (0, 130, 0)
        red_color = (0, 0, 130)
        orange_color = (0, 140, 0xFF)

        out_time = in_frame_num / 60
        if cur_state == state.IN_REP and (out_time - in_time < 4
                                          or global_counter < 5):
            draw_str(frame, (20, 120),
                     ' new hypothesis (%d) ' % global_counter, orange_color,
                     1.5)
        if cur_state == state.IN_REP and out_time - in_time >= 4 \
            and global_counter >= 5:
            draw_str(
                frame, (20, 120), 'action %d: counting... %d' %
                (actions_counter, global_counter), green_color, 2)
        if cur_state == state.COOLDOWN and global_counter >= 5:
            draw_str(
                frame, (20, 120), 'action %d: done. final counting: %d' %
                (actions_counter, global_counter), blue_color, 2)
        # print "pls", global_counter
        frame_wise_counts.append(global_counter)

        # print 'action %d: done. final counting: %d' % (actions_counter, global_counter)
    print "Dhruv", frame_wise_counts, global_counter
    return frame_wise_counts
Esempio n. 37
0
  layer2_inputSize = layer1outputsize
  layer2_input = layer1flattened
elif combinationMethod == "noAtt":
  layer2_inputSize = layer0outputsize
  layer2_input = layer0flattened
else: # concatenation
  layer2_inputSize = layer0outputsize + layer1outputsize
  layer2_input = T.concatenate([layer0flattened, layer1flattened], axis = 1)

if useHiddenLayer: 
  # construct a fully-connected sigmoidal layer
  layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize,
                         n_out=hiddenunits, activation=T.tanh)

  # classify the values of the fully-connected sigmoidal layer
  layer3 = LogisticRegression(input=layer2.output, n_in=hiddenunits, n_out=2)
else:
  # classify the values of the fully-connected sigmoidal layer
  layer3 = LogisticRegression(input=layer2_input, n_in=layer2_inputSize, n_out=2)

# create a list of all model non-bricks parameters
paramList = [layer3.params]
if useHiddenLayer:
  paramList.append(layer2.params)
if combinationMethod != "noAtt":
  paramList.append(layer1.params)
# params from layer0 already have the blocks role
params = []
for p in paramList:
  for i, p_part in enumerate(p):
    if i == 0:
Esempio n. 38
0
    def __init__(self,
                 learning_rate=0.1,
                 L1_reg=0.00,
                 L2_reg=0.0001,
                 filter_hs=[3, 4, 5],
                 filter_num=100,
                 n_hidden=100,
                 n_out=2,
                 word_idx_map=None,
                 wordvec=None,
                 k=300,
                 adjust_input=False):
        """
        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
        gradient

        :type L1_reg: float
        :param L1_reg: L1-norm's weight when added to the cost (see
        regularization)

        :type L2_reg: float
        :param L2_reg: L2-norm's weight when added to the cost (see
        regularization)
        """
        self.learning_rate = learning_rate
        self.L1_reg = L1_reg
        self.L2_reg = L2_reg
        self.word_idx_map = word_idx_map
        rng = np.random.RandomState(3435)
        self.rng = rng
        self.k = k
        self.filter_num = filter_num
        self.filter_hs = filter_hs
        # Can be assigned at the fit step.
        self.batch_size = None

        self.epoch = 0

        self.Words = theano.shared(value=wordvec, name="Words")
        X = T.matrix('X')
        Y = T.ivector('Y')
        self.X = X
        self.Y = Y

        layer0_input = self.Words[T.cast(X.flatten(), dtype='int32')].reshape((X.shape[0], X.shape[1], self.Words.shape[1]))
        self.layer0_input = layer0_input
        c_max_list = []
        self.conv_layer_s = []
        test_case = []

        for filter_h in filter_hs:
            conv_layer = ConvLayer(rng, layer0_input, filter_h=filter_h, filter_num=filter_num, k=k)
            self.conv_layer_s.append(conv_layer)
            c_max_list.append(conv_layer.c_max)
        max_pooling_out = T.concatenate(c_max_list, axis=1)
        max_pooling_out_size = filter_num * len(filter_hs)

        self.hidden_layer = HiddenLayer(rng, max_pooling_out, max_pooling_out_size, n_hidden)

        self.lr_layer = LogisticRegression(
            input=self.hidden_layer.output,
            n_in=n_hidden,
            n_out=n_out,
        )
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = (
            sum([abs(conv_layer.W).sum() for conv_layer in self.conv_layer_s])
            + abs(self.hidden_layer.W).sum()
            + abs(self.lr_layer.W).sum()
        )

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (
            sum([(conv_layer.W ** 2).sum() for conv_layer in self.conv_layer_s])
            + (self.hidden_layer.W ** 2).sum()
            + (self.lr_layer.W ** 2).sum()
        )



        # the cost we minimize during training is the negative log likelihood of
        # the model plus the regularization terms (L1 and L2); cost is expressed
        # here symbolically
        self.cost = (
            self.negative_log_likelihood(Y)
            + self.L1_reg * self.L1
            + self.L2_reg * self.L2_sqr
        )

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = []
        # also adjust the input word vectors
        if adjust_input:
            self.params.append(self.Words)
        for conv_layer in self.conv_layer_s:
            self.params += conv_layer.params
        self.params += self.hidden_layer.params
        self.params += self.lr_layer.params