Python LeNetConvPoolLayer Examples

Programming Language: Python

Namespace/Package Name: layers

Examples at hotexamples.com: 19

Python LeNetConvPoolLayer - 19 examples found. These are the top rated real world Python examples of layers.LeNetConvPoolLayer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LeNetConvPoolLayer(17)

__setstate__(3)

__getstate__(1)

bestB(1)

bestW(1)

getOutput(1)

Example #1

Show file

File: models.py Project: mesnilgr/sarco

    def __init__(self,
                 cropsize,
                 batch_size,
                 nkerns=[10, 10, 10],
                 filters=[11, 6, 4]):
        self.X_batch, self.y_batch = T.tensor4('x'), T.matrix('y')
        self.layers, self.params = [], []
        rng = np.random.RandomState(23455)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=self.X_batch,
                                    image_shape=(batch_size, 1, cropsize,
                                                 cropsize),
                                    filter_shape=(nkerns[0], 1, filters[0],
                                                  filters[0]),
                                    poolsize=(2, 2))
        self.layers += [layer0]
        self.params += layer0.params
        # 400 - 11 + 1 = 390 / 2 = 195
        map_size = (cropsize - filters[0] + 1) / 2
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(batch_size, nkerns[0],
                                                 map_size, map_size),
                                    filter_shape=(nkerns[1], nkerns[0],
                                                  filters[1], filters[1]),
                                    poolsize=(2, 2))
        self.layers += [layer1]
        self.params += layer1.params

        # 195 - 6 + 1 = 190 / 2 = 95
        map_size = (map_size - filters[1] + 1) / 2
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(batch_size, nkerns[1],
                                                 map_size, map_size),
                                    filter_shape=(nkerns[2], nkerns[1],
                                                  filters[2], filters[2]),
                                    poolsize=(2, 2))
        self.layers += [layer2]
        self.params += layer2.params

        # 95 - 4 + 1 = 92 / 2 = 46
        map_size = (map_size - filters[2] + 1) / 2
        layer3_input = layer2.output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer3 = HiddenLayer(rng,
                             input=layer3_input,
                             n_in=nkerns[2] * map_size * map_size,
                             n_out=1,
                             activation=None)
        self.layers += [layer3]
        self.params += layer3.params

        nparams = np.sum(
            [p.get_value().flatten().shape[0] for p in self.params])
        print "model contains %i parameters" % nparams
        self.output = self.layers[-1].output

Example #2

Show file

    def __init__(self, nkerns=[48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 65
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # Fully connected sigmoidal layer, goes from
        # X  -> 200
        #--------------------------------------------------
        layer1_input = layer0.output.flatten(2)
        layer1 = HiddenLayer(rng,
                             input=layer1_input,
                             n_in=nkerns[0] * os0 * os0,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 2
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer2 = LogisticRegression(input=layer1.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2)

Example #3

Show file

    def __init__(self, nkerns=[48, 48, 48, 48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 95
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # labels := 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 95 -> 92 -> 46
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 46)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 46 -> 42 -> 21
        #--------------------------------------------------
        fs1 = 5  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 21)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 21 -> 18 -> 9
        #--------------------------------------------------
        fs2 = 4
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 9)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # layer3 convolution+max pool reduces image dimensions by:
        # 9 -> 6 -> 3
        #--------------------------------------------------
        fs3 = 4
        os3 = (os2 - fs3 + 1) / nMaxPool
        assert (os3 == 3)
        layer3 = LeNetConvPoolLayer(rng,
                                    input=layer2.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os2, os2),
                                    filter_shape=(nkerns[3], nkerns[2], fs3,
                                                  fs3),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 4
        # Fully connected sigmoidal layer, goes from
        # 3*3*48 ~ 450 -> 200
        #--------------------------------------------------
        layer4_input = layer3.output.flatten(2)
        layer4 = HiddenLayer(rng,
                             input=layer4_input,
                             n_in=nkerns[3] * os3 * os3,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 5
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer5 = LogisticRegression(input=layer4.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4, layer5)

Example #4

Show file

File: cifar_fast.py Project: dennis910130/myCifar

def cifar_fast_net(batch_size=128,n_epochs=300,test_frequency=13, learning_rate=0.001):

    rng1 = numpy.random.RandomState(23455)
    rng2 = numpy.random.RandomState(12423)
    rng3 = numpy.random.RandomState(23245)
    rng4 = numpy.random.RandomState(12123)
    rng5 = numpy.random.RandomState(25365)
    rng6 = numpy.random.RandomState(15323)
    train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4'])
    valid_set_x, valid_set_y = load_cifar_data(['data_batch_5'],WHICHSET='valid')
    test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test')

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    index = T.lscalar()

    x = T.matrix('x')
    y = T.ivector('y')

    img_input = x.reshape((batch_size,3,32,32))
    img_input = img_input.dimshuffle(1,2,3,0)
####define the layers:
    conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input,
                                    filter_shape=(3,5,5,32),
                                    image_shape=(3,32,32,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.0001,initB=0,partial_sum=4,
                                    pooling='max',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004
                                    )

    conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool1.output,
                                    filter_shape=(32,5,5,32),
                                    image_shape=(32,16,16,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)
    conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool2.output,
                                    filter_shape=(32,5,5,64),
                                    image_shape=(32,8,8,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)

    layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2)
    #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0)
    fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0,
                        epsW=0.001,
                        epsB=0.002,
                        momW=0.9,
                        momB=0.9,
                        wc=0.03)
    fc_10 = LogisticRegression(input=fc_64.output,rng=rng5,n_in=64,n_out=10,initW=0.1,
                               epsW=0.001,
                                epsB=0.002,
                                momW=0.9,
                                momB=0.9,
                                wc=0.03)
####build the models:
    cost = fc_10.negative_log_likelihood(y)
    test_model = theano.function([index], fc_10.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], fc_10.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    Ws = [conv_pool1.W, conv_pool2.W, conv_pool3.W, fc_64.W, fc_10.W]
    pgradWs = [conv_pool1.grad_W, conv_pool2.grad_W, conv_pool3.grad_W, fc_64.grad_W, fc_10.grad_W]

    bs = [conv_pool1.b, conv_pool2.b, conv_pool3.b, fc_64.b, fc_10.b]
    pgradbs = [conv_pool1.grad_b, conv_pool2.grad_b, conv_pool3.grad_b, fc_64.grad_b, fc_10.grad_b]

    momWs = [conv_pool1.momW, conv_pool2.momW, conv_pool3.momW, fc_64.momW, fc_10.momW]
    momBs = [conv_pool1.momB, conv_pool2.momB, conv_pool3.momB, fc_64.momB, fc_10.momB]
    wcs = [conv_pool1.wc, conv_pool2.wc, conv_pool3.wc, fc_64.wc, fc_10.wc]
    epsWs = [conv_pool1.epsW, conv_pool2.epsW, conv_pool3.epsW, fc_64.epsW, fc_10.epsW]
    epsBs = [conv_pool1.epsB, conv_pool2.epsB, conv_pool3.epsB, fc_64.epsB, fc_10.epsB]

    gradWs = T.grad(cost, Ws)
    gradbs = T.grad(cost, bs)
    updates = []
    for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
        grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
        updates.append((W_i, W_i+grad_i))
        updates.append((pgW_i,grad_i))

    for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
        grad_i = - epsB_i*gradb_i + momB_i*pgB_i
        updates.append((b_i, b_i+grad_i))
        updates.append((pgB_i,grad_i))







    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        #below is the code for reduce learning_rate
        ###########################################

        if epoch == 50:
            epsWs = [k/10.0 for k in epsWs]
            epsBs = [k/10.0 for k in epsBs]
            print 'reduce eps by a factor of 10'
            updates = []
            for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
                grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
                updates.append((W_i, W_i+grad_i))
                updates.append((pgW_i,grad_i))

            for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
                grad_i = - epsB_i*gradb_i + momB_i*pgB_i
                updates.append((b_i, b_i+grad_i))
                updates.append((pgB_i,grad_i))
            train_model = theano.function([index], cost, updates=updates,
              givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]})

        ##############################################
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    conv_pool1.bestW = conv_pool1.W.get_value().copy()
                    conv_pool1.bestB = conv_pool1.b.get_value().copy()
                    conv_pool2.bestW = conv_pool2.W.get_value().copy()
                    conv_pool2.bestB = conv_pool2.b.get_value().copy()
                    conv_pool3.bestW = conv_pool3.W.get_value().copy()
                    conv_pool3.bestB = conv_pool3.b.get_value().copy()
                    fc_64.bestW = fc_64.W.get_value().copy()
                    fc_64.bestB = fc_64.b.get_value().copy()
                    fc_10.bestW = fc_10.W.get_value().copy()
                    fc_10.bestB = fc_10.b.get_value().copy()

                    ##saving current best
                    print 'saving current best params..'
                    current_params = (conv_pool1.bestW,conv_pool1.bestB,conv_pool2.bestW,
                    conv_pool2.bestB,conv_pool3.bestW,conv_pool3.bestB,fc_64.bestW,fc_64.bestB,
                    fc_10.bestW,fc_10.bestB,momWs,momBs,epsWs,epsBs,wcs)
                    outfile = file('current_best_params.pkl','wb')
                    cPickle.dump(current_params,outfile)
                    outfile.close()


                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

Example #5

Show file

embeddings = theano.shared(numpy.array(wordvectors,
                                       dtype=theano.config.floatX)).dimshuffle(
                                           1, 0)

batchsizeVar = numSamples.shape[0]
y_resh = y.reshape((batchsizeVar, ))  # rel:e1->e2
y1ET_resh = y1ET.reshape((batchsizeVar, ))
y2ET_resh = y2ET.reshape((batchsizeVar, ))

numSamples_resh = numSamples.reshape((batchsizeVar, ))

layers = []

cnnContext = LeNetConvPoolLayer(rng=rng,
                                filter_shape=(nkernsContext, 1,
                                              representationsize,
                                              filtersizeContext),
                                poolsize=(1, kmaxContext))
layers.append(cnnContext)
if "middleContext" in config:
    hidden_in = nkernsContext * kmaxContext
else:
    cnnEntities = LeNetConvPoolLayer(rng=rng,
                                     filter_shape=(nkernsEntities, 1,
                                                   representationsize,
                                                   filtersizeEntities),
                                     poolsize=(1, kmaxEntities))
    layers.append(cnnEntities)
    hidden_in = 2 * (2 * nkernsContext * kmaxContext +
                     nkernsEntities * kmaxEntities)
hiddenLayer = HiddenLayer(rng=rng, n_in=hidden_in, n_out=hiddenUnits)

Example #6

Show file

File: live_rep.py Project: notsimon/DeepRepICCV2015

    in_height = 50
    # filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=signals_shape,
        filter_shape=filters_shape,
        poolsize=(2, 2),
    )

    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
        filter_shape=(60, flt_channels, 3, 3),
        poolsize=(2, 2),
    )

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,

Example #7

Show file

File: rep_test_benchmark.py Project: Light--/LiveRepetitionCounting

def prepare_network():

    rng = numpy.random.RandomState(23455)

    print('Preparing Theano model...')

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    # allocate symbolic variables for the data
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) // 2
    layer1_h = (layer0_h - 4) // 2
    layer2_w = (layer1_w - 2) // 2
    layer2_h = (layer1_h - 2) // 2
    layer3_w = (layer2_w - 2) // 2
    layer3_h = (layer2_h - 2) // 2

    ######################
    # BUILD NETWORK #
    ######################
    # image sizes
    batchsize = 1
    in_channels = 20
    in_width = 50
    in_height = 50
    #filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=signals_shape,
                                filter_shape=filters_shape,
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batchsize, flt_channels, layer1_w,
                                             layer1_h),
                                filter_shape=(60, flt_channels, 3, 3),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batchsize, 60, layer2_w,
                                             layer2_h),
                                filter_shape=(90, 60, 3, 3),
                                poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=90 * layer3_w * layer3_h,
                         n_out=500,
                         activation=T.tanh)

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batchsize:(index + 1) * batchsize],
            y: test_set_y[index * batchsize:(index + 1) * batchsize]
        })

    print('Loading network weights...')
    weightFile = '../live_count/weights.save'
    f = open(weightFile, 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(pickle.load(f))
    f.close()
    layer0.__setstate__(loaded_objects[0])
    layer1.__setstate__(loaded_objects[1])
    layer2.__setstate__(loaded_objects[2])
    layer3.__setstate__(loaded_objects[3])
    layer4.__setstate__(loaded_objects[4])

    return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize

Example #8

Show file

File: testCNN_multiClass_global.py Project: heikeadel/slot_filling_system

    def __init__(self, configfile, train=False):

        self.slotList = [
            "N", "per:age", "per:alternate_names", "per:children",
            "per:cause_of_death", "per:date_of_birth", "per:date_of_death",
            "per:employee_or_member_of", "per:location_of_birth",
            "per:location_of_death", "per:locations_of_residence",
            "per:origin", "per:schools_attended", "per:siblings", "per:spouse",
            "per:title", "org:alternate_names", "org:date_founded",
            "org:founded_by", "org:location_of_headquarters", "org:members",
            "org:parents", "org:top_members_employees"
        ]

        typeList = [
            "O", "PERSON", "LOCATION", "ORGANIZATION", "DATE", "NUMBER"
        ]

        self.config = readConfig(configfile)

        self.addInputSize = 1
        logger.info("additional mlp input")

        wordvectorfile = self.config["wordvectors"]
        logger.info("wordvectorfile " + wordvectorfile)
        networkfile = self.config["net"]
        logger.info("networkfile " + networkfile)
        hiddenunits = int(self.config["hidden"])
        logger.info("hidden units " + str(hiddenunits))
        hiddenunitsNer = hiddenunits
        if "hiddenunitsNER" in self.config:
            hiddenunitsNer = int(self.config["hiddenunitsNER"])
        representationsizeNER = 50
        if "representationsizeNER" in self.config:
            representationsizeNER = int(self.config["representationsizeNER"])
        learning_rate = float(self.config["lrate"])
        logger.info("learning rate " + str(learning_rate))
        if train:
            self.batch_size = int(self.config["batchsize"])
        else:
            self.batch_size = 1
        logger.info("batch size " + str(self.batch_size))
        self.filtersize = [1, int(self.config["filtersize"])]
        nkerns = [int(self.config["nkerns"])]
        logger.info("nkerns " + str(nkerns))
        pool = [1, int(self.config["kmax"])]

        self.contextsize = int(self.config["contextsize"])
        logger.info("contextsize " + str(self.contextsize))

        if self.contextsize < self.filtersize[1]:
            logger.info("setting filtersize to " + str(self.contextsize))
            self.filtersize[1] = self.contextsize
        logger.info("filtersize " + str(self.filtersize))

        sizeAfterConv = self.contextsize - self.filtersize[1] + 1

        sizeAfterPooling = -1
        if sizeAfterConv < pool[1]:
            logger.info("setting poolsize to " + str(sizeAfterConv))
            pool[1] = sizeAfterConv
        sizeAfterPooling = pool[1]
        logger.info("kmax pooling: k = " + str(pool[1]))

        # reading word vectors
        self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

        self.representationsize = self.vectorsize + 1

        rng = numpy.random.RandomState(
            23455
        )  # not relevant, parameters will be overwritten by stored model anyways
        if train:
            seed = rng.get_state()[1][0]
            logger.info("seed: " + str(seed))

        numSFclasses = 23
        numNERclasses = 6

        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.xa = T.matrix('xa')  # left context
        self.xb = T.matrix('xb')  # middle context
        self.xc = T.matrix('xc')  # right context
        self.y = T.imatrix('y')  # label (only present in training)
        self.yNER1 = T.imatrix(
            'yNER1')  # label for first entity (only present in training)
        self.yNER2 = T.imatrix(
            'yNER2')  # label for second entity (only present in training)
        ishape = [self.representationsize,
                  self.contextsize]  # this is the size of context matrizes

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        logger.info('... building the model')

        # Reshape input matrix to be compatible with LeNetConvPoolLayer
        layer0a_input = self.xa.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))
        layer0b_input = self.xb.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))
        layer0c_input = self.xc.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))

        y_reshaped = self.y.reshape((self.batch_size, 1))
        yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
        yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

        # Construct convolutional pooling layer:
        filter_shape = (nkerns[0], 1, self.representationsize,
                        self.filtersize[1])
        poolsize = (pool[0], pool[1])
        fan_in = numpy.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        # the convolution weight matrix
        convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                        high=W_bound,
                                                        size=filter_shape),
                                            dtype=theano.config.floatX),
                              borrow=True)
        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        convB = theano.shared(value=b_values, borrow=True)

        self.layer0a = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0a_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)
        self.layer0b = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0b_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)
        self.layer0c = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0c_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)

        layer0aflattened = self.layer0a.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0bflattened = self.layer0b.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0cflattened = self.layer0c.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0outputSF = T.concatenate(
            [layer0aflattened, layer0bflattened, layer0cflattened], axis=1)
        layer0outputSFsize = 3 * (nkerns[0] * sizeAfterPooling)

        layer0outputNER1 = T.concatenate([layer0aflattened, layer0bflattened],
                                         axis=1)
        layer0outputNER2 = T.concatenate([layer0bflattened, layer0cflattened],
                                         axis=1)
        layer0outputNERsize = 2 * (nkerns[0] * sizeAfterPooling)

        layer2ner1 = HiddenLayer(rng,
                                 input=layer0outputNER1,
                                 n_in=layer0outputNERsize,
                                 n_out=hiddenunitsNer,
                                 activation=T.tanh)
        layer2ner2 = HiddenLayer(rng,
                                 input=layer0outputNER2,
                                 n_in=layer0outputNERsize,
                                 n_out=hiddenunitsNer,
                                 activation=T.tanh,
                                 W=layer2ner1.W,
                                 b=layer2ner1.b)

        # concatenate additional features to sentence representation
        self.additionalFeatures = T.matrix('additionalFeatures')
        self.additionalFeatsShaped = self.additionalFeatures.reshape(
            (self.batch_size, 1))

        layer2SFinput = T.concatenate(
            [layer0outputSF, self.additionalFeatsShaped], axis=1)
        layer2SFinputSize = layer0outputSFsize + self.addInputSize

        layer2SF = HiddenLayer(rng,
                               input=layer2SFinput,
                               n_in=layer2SFinputSize,
                               n_out=hiddenunits,
                               activation=T.tanh)

        # classify the values of the fully-connected sigmoidal layer
        layer3rel = LogisticRegression(input=layer2SF.output,
                                       n_in=hiddenunits,
                                       n_out=numSFclasses)
        layer3et = LogisticRegression(input=layer2ner1.output,
                                      n_in=hiddenunitsNer,
                                      n_out=numNERclasses)

        scoresForR1 = layer3rel.getScores(layer2SF.output)
        scoresForE1 = layer3et.getScores(layer2ner1.output)
        scoresForE2 = layer3et.getScores(layer2ner2.output)

        self.crfLayer = CRF(numClasses=numSFclasses + numNERclasses,
                            rng=rng,
                            batchsizeVar=self.batch_size,
                            sequenceLength=3)

        scores = T.zeros((self.batch_size, 3, numSFclasses + numNERclasses))
        scores = T.set_subtensor(scores[:, 0, numSFclasses:], scoresForE1)
        scores = T.set_subtensor(scores[:, 1, :numSFclasses], scoresForR1)
        scores = T.set_subtensor(scores[:, 2, numSFclasses:], scoresForE2)
        self.scores = scores

        self.y_conc = T.concatenate([
            yNER1reshaped + numSFclasses, y_reshaped,
            yNER2reshaped + numSFclasses
        ],
                                    axis=1)

        # create a list of all model parameters
        self.paramList = [
            self.crfLayer.params, layer3rel.params, layer3et.params,
            layer2SF.params, layer2ner1.params, self.layer0a.params
        ]
        self.params = []
        for p in self.paramList:
            self.params += p
            logger.info(p)

        if not train:
            self.gotNetwork = 1
            # load parameters
            if not os.path.isfile(networkfile):
                logger.error("network file does not exist")
                self.gotNetwork = 0
            else:
                save_file = open(networkfile, 'rb')
                for p in self.params:
                    p.set_value(cPickle.load(save_file), borrow=False)
                save_file.close()

        self.relation_scores_global = self.crfLayer.getProbForClass(
            self.scores, numSFclasses)
        self.predictions_global = self.crfLayer.getPrediction(self.scores)

Example #9

Show file

File: rep_train_main.py Project: notsimon/DeepRepICCV2015

def train_rep(
    learning_rate=0.002,
    L1_reg=0.0002,
    L2_reg=0.005,
    n_epochs=200,
    nkerns=[20, 50],
    batch_size=25,
):

    rng = numpy.random.RandomState(23455)

    train_dir = "../out/h5/"
    valid_dir = "../out/h5/"

    weights_dir = "./weights/"

    print("... load input data")
    filename = train_dir + "rep_train_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    train_set_x, train_set_y, shared_train_set_y = datasets

    filename = valid_dir + "rep_valid_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    valid_set_x, valid_set_y, shared_valid_set_y = datasets

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets

    # compute number of minibatches for training, validation and testing
    n_all_train_batches = 30000
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_all_train_batches /= batch_size
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of
    # [int] labels

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) / 2
    layer1_h = (layer0_h - 4) / 2
    layer2_w = (layer1_w - 2) / 2
    layer2_h = (layer1_h - 2) / 2
    layer3_w = (layer2_w - 2) / 2
    layer3_h = (layer2_h - 2) / 2

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # image sizes
    batchsize = batch_size
    in_channels = 20
    in_width = 50
    in_height = 50
    # filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=signals_shape,
        filter_shape=filters_shape,
        poolsize=(2, 2),
    )

    # TODO: incase of flt_time < in_time the output dimension will be different
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
        filter_shape=(60, flt_channels, 3, 3),
        poolsize=(2, 2),
    )

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, 60, layer2_w, layer2_h),
        filter_shape=(90, 60, 3, 3),
        poolsize=(2, 2),
    )
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=90 * layer3_w * layer3_h,
        n_out=500,
        activation=T.tanh,
    )

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size : (index + 1) * batch_size],
            y: test_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size : (index + 1) * batch_size],
            y: valid_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = (
        layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    )

    # symbolic Theano variable that represents the L1 regularization term
    L1 = (
        T.sum(abs(layer4.params[0]))
        + T.sum(abs(layer3.params[0]))
        + T.sum(abs(layer2.params[0]))
        + T.sum(abs(layer1.params[0]))
        + T.sum(abs(layer0.params[0]))
    )
    # symbolic Theano variable that represents the squared L2 term
    L2_sqr = (
        T.sum(layer4.params[0] ** 2)
        + T.sum(layer3.params[0] ** 2)
        + T.sum(layer2.params[0] ** 2)
        + T.sum(layer1.params[0] ** 2)
        + T.sum(layer0.params[0] ** 2)
    )
    # the loss
    cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size : (index + 1) * batch_size],
            y: train_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")

    start_time = time.clock()

    epoch = 0
    done_looping = False
    cost_ij = 0
    train_files_num = 600
    val_files_num = 100

    startc = time.clock()
    while (epoch < n_epochs) and (not done_looping):
        endc = time.clock()
        print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0)))
        startc = time.clock()
        epoch = epoch + 1
        for nTrainSet in range(1, train_files_num + 1):
            # load next train data
            if nTrainSet % 50 == 0:
                print("training @ nTrainSet =  ", nTrainSet, ", cost = ", cost_ij)
            filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_train_set_x, ns_train_set_y = datasets
            train_set_x.set_value(ns_train_set_x, borrow=True)
            shared_train_set_y.set_value(
                numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_train_batches = train_set_x.get_value(borrow=True).shape[0]
            n_train_batches /= batch_size

            # train
            for minibatch_index in range(n_train_batches):

                # training itself
                # --------------------------------------
                cost_ij = train_model(minibatch_index)
                # -------------------------

        # at the end of each epoch run validation
        this_validation_loss = 0
        for nValSet in range(1, val_files_num + 1):
            filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_valid_set_x, ns_valid_set_y = datasets
            valid_set_x.set_value(ns_valid_set_x, borrow=True)
            shared_valid_set_y.set_value(
                numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
            n_valid_batches /= batch_size

            # compute zero-one loss on validation set
            validation_losses = [validate_model(i) for i in range(n_valid_batches)]
            this_validation_loss += numpy.mean(validation_losses)
        this_validation_loss /= val_files_num
        print((
            "epoch %i, minibatch %i/%i, validation error %f %%"
            % (
                epoch,
                minibatch_index + 1,
                n_train_batches,
                this_validation_loss * 100.0,
            )
        ))

        # save snapshots
        print("saving weights state, epoch = ", epoch)
        f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb")
        state_L0 = layer0.__getstate__()
        pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L1 = layer1.__getstate__()
        pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L2 = layer2.__getstate__()
        pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L3 = layer3.__getstate__()
        pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L4 = layer4.__getstate__()
        pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL)
        f.close()

    end_time = time.clock()
    print ("Optimization complete.")
    print((
        "The code for file "
        + os.path.split(__file__)[1]
        + " ran for %.2fm" % ((end_time - start_time) / 60.0)
    ), file=sys.stderr)

Example #10

Show file

File: train_CNN.py Project: heikeadel/attention_methods

W_bound = numpy.sqrt(6. / (fan_in + fan_out))
# the convolution weight matrix
convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                high=W_bound,
                                                size=filter_shape),
                                    dtype=theano.config.floatX),
                      name='conv_W',
                      borrow=True)

# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
convB = theano.shared(value=b_values, name='conv_b', borrow=True)

layer0 = LeNetConvPoolLayer(rng,
                            W=convW,
                            b=convB,
                            input=layer0_input,
                            filter_shape=filter_shape,
                            poolsize=poolsize)

layer0flattened = layer0.output.flatten(2).reshape(
    (batch_size_var, nkerns[0] * sizeAfterPooling))
layer0outputsize = nkerns[0] * sizeAfterPooling

if "internalOnH" in attentionMethod:
    layer1 = AttentionLayer(rng,
                            thisInput=layer0.conv_out_tanh,
                            batchsize=batch_size_var,
                            dim1=nkerns[0],
                            dim2=sizeAfterConv,
                            method=attentionMethod,
                            k=kattention)

Example #11

Show file

def main(args):
    # initial parameters
    embedding_size = args.embedding_size
    mention_context_size = args.mention_context_size
    type_context_size = args.type_context_size
    embedding_file = args.embedding_path
    hidden_units = args.hidden_units
    learning_rate = args.learning_rate
    margin = args.margin
    batch_size = args.batch_size
    n_epochs = args.num_epochs

    relation_size = 82
    nkerns = [500]
    filter_size = [1, 1]
    pool = [1, 1]
    l1 = 0.000001
    l2 = 0.000002

    newbob = False
    network_file = args.model_path
    test_file = args.test
    test_result_file = args.test_result
    label_file = args.ontology_path
    label_file_norm = args.norm_ontology_path
    relation_file = args.relation_path
    train_type_flag = args.seen_types

    tup_representation_size = embedding_size * 2

    # load word vectors
    word_vectors, vector_size = load_word_vec(embedding_file)

    # read train and dev file
    print("start loading train and dev file ... ")
    doc_id_list_test, type_list_test, trigger_list_test, left_word_list_test, relation_list_test, \
        right_word_list_test = load_training_data(test_file)

    print("start loading arg and relation files ... ")
    all_type_list, all_type_structures = load_types_1(label_file_norm)
    rel_index, index_rel = read_relation_index(relation_file)
    type_size = len(all_type_list)

    # using a matrix to represent each relation
    relation_matrix = random_init_rel_vec_factor(
        relation_file, tup_representation_size * tup_representation_size)

    train_types = get_types_for_train(train_type_flag, label_file)

    # prepare data structure
    print("start preparing data structures ... ")
    curSeed = 23455
    rng = numpy.random.RandomState(curSeed)
    seed = rng.get_state()[1][0]
    print("seed: ", seed)

    result_index_test_matrix, result_vector_test_matrix, input_context_test_matrix, input_trigger_test_matrix, \
        relation_binary_test_matrix, pos_neg_test_matrix = input_matrix_1_test(
            type_list_test, trigger_list_test, left_word_list_test, relation_list_test, right_word_list_test,
            embedding_size, mention_context_size, relation_size, label_file, word_vectors, rel_index, train_type_flag)

    input_type_matrix, input_type_structure_matrix = type_matrix(
        all_type_list, all_type_structures, embedding_file, type_context_size)

    time1 = time.time()
    dt = theano.config.floatX
    test_set_content = theano.shared(
        numpy.matrix(input_context_test_matrix, dtype=dt))
    test_set_trigger = theano.shared(
        numpy.matrix(input_trigger_test_matrix, dtype=dt))
    test_set_relation_binary = theano.shared(
        numpy.matrix(relation_binary_test_matrix, dtype=dt))
    test_set_posneg = theano.shared(numpy.matrix(pos_neg_test_matrix,
                                                 dtype=dt))
    test_set_y = theano.shared(
        numpy.array(result_index_test_matrix, dtype=numpy.dtype(numpy.int32)))
    test_set_y_vector = theano.shared(
        numpy.matrix(result_vector_test_matrix, dtype=dt))

    train_set_type = theano.shared(numpy.matrix(input_type_matrix, dtype=dt))
    train_set_type_structure = theano.shared(
        numpy.matrix(input_type_structure_matrix, dtype=dt))

    train_types = theano.shared(numpy.matrix(train_types, dtype=dt))

    # compute number of minibatches for training, validation and testing
    n_test_batches = input_trigger_test_matrix.shape[0]
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x_content = T.matrix(
        'x_content')  # the data is presented as rasterized images
    x_trigger = T.matrix(
        'x_trigger')  # the data is presented as rasterized images
    x_relation_binary = T.matrix('x_relation_binary')
    x_pos_neg_flag = T.matrix('x_pos_neg_flag')
    x_type = T.matrix('x_type')
    x_type_structure = T.matrix('x_type_structure')
    y = T.ivector('y')  # the labels are presented as 1D vector of
    y_vector = T.matrix('y_vector')  # the labels are presented as 1D vector of
    x_train_types = T.matrix('x_train_types')

    # [int] labels
    i_shape = [tup_representation_size,
               mention_context_size]  # this is the size of context matrizes

    time2 = time.time()
    print("time for preparing data structures: ", time2 - time1)

    # build actual model

    print('start building the model ... ')
    time1 = time.time()

    rel_w = theano.shared(value=relation_matrix, borrow=True)  ## 26*400

    # Construct the mention structure input Layer
    layer0_input = x_content.reshape((batch_size, 1, i_shape[0], i_shape[1]))
    layer0_input_binary_relation = x_relation_binary.reshape(
        (batch_size, 1, relation_size, i_shape[1]))  ## 100*1*26*5

    # compose amr relation matrix to each tuple
    compose_layer = ComposeLayerMatrix(
        input=layer0_input,
        input_binary_relation=layer0_input_binary_relation,
        rel_w=rel_w,
        rel_vec_size=tup_representation_size)
    layer1_input = compose_layer.output

    # initialize the convolution weight matrix
    filter_shape = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size = (pool[0], pool[1])

    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
               numpy.prod(pool_size))

    w_bound = numpy.sqrt(6. / (fan_in + fan_out))

    conv_w = theano.shared(numpy.asarray(rng.uniform(low=-w_bound,
                                                     high=w_bound,
                                                     size=filter_shape),
                                         dtype=theano.config.floatX),
                           borrow=True)

    b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
    conv_b = theano.shared(value=b_values, borrow=True)

    # conv with pool layer
    layer1_conv = LeNetConvPoolLayer(rng,
                                     W=conv_w,
                                     b=conv_b,
                                     input=layer1_input,
                                     image_shape=(batch_size, 1, i_shape[0],
                                                  i_shape[1]),
                                     filter_shape=filter_shape,
                                     poolsize=pool_size)

    layer1_output = layer1_conv.output
    layer1_flattened = layer1_output.flatten(2)

    trigger_features_shaped = x_trigger.reshape((batch_size, embedding_size))

    layer2_input = T.concatenate([layer1_flattened, trigger_features_shaped],
                                 axis=1)

    # Construct the type structure input Layer
    layer_type_input = x_type_structure.reshape(
        (type_size, 1, tup_representation_size, type_context_size))
    filter_shape_type = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size_type = (pool[0], pool[1])

    # initialize the implicit relation tensor
    type_tensor_shape = (tup_representation_size, tup_representation_size,
                         tup_representation_size)
    type_tensor_w = theano.shared(numpy.asarray(rng.uniform(
        low=-w_bound, high=w_bound, size=type_tensor_shape),
                                                dtype=theano.config.floatX),
                                  borrow=True)

    # compose relation tensor to each tuple
    compose_type_layer = ComposeLayerTensor(input=layer_type_input,
                                            tensor=type_tensor_w)
    layer_type_input1 = compose_type_layer.output

    # conv with pool layer
    layer1_conv_type = LeNetConvPoolLayer(rng,
                                          W=conv_w,
                                          b=conv_b,
                                          input=layer_type_input1,
                                          image_shape=(type_size, 1,
                                                       tup_representation_size,
                                                       type_context_size),
                                          filter_shape=filter_shape_type,
                                          poolsize=pool_size_type)

    layer1_type_output = layer1_conv_type.output
    layer1_type_flattened = layer1_type_output.flatten(2)

    types_shaped = x_type.reshape((type_size, embedding_size))

    layer2_type_input = T.concatenate([layer1_type_flattened, types_shaped],
                                      axis=1)
    layer2_type_input_size = nkerns[0]**pool[1] + embedding_size

    # ranking based max margin loss layer
    train_types_signal = x_train_types.reshape((type_size, 1))
    pos_neg_flag = x_pos_neg_flag.reshape((batch_size, 1))

    layer3 = MaxRankingMarginCosine1(rng=rng,
                                     input=layer2_input,
                                     input_label=layer2_type_input,
                                     true_label=y_vector,
                                     n_in=layer2_type_input_size,
                                     margin=margin,
                                     batch_size=batch_size,
                                     type_size=type_size,
                                     train_type_signal=train_types_signal,
                                     pos_neg_flag=pos_neg_flag)
    cost = layer3.loss

    # create a list of all model parameters to be fit by gradient descent
    param_list = [
        compose_layer.params, layer1_conv.params, compose_type_layer.params
    ]

    params = []
    for p in param_list:
        params += p

    # the cost we minimize during training is the NLL of the model
    lambd1 = T.scalar('lambda1', dt)
    lambd2 = T.scalar('lambda2', dt)

    # L1 and L2 regularization possible
    reg2 = 0
    reg1 = 0
    for p in param_list:
        reg2 += T.sum(p[0]**2)
        reg1 += T.sum(abs(p[0]))

    cost += lambd2 * reg2
    cost += lambd1 * reg1

    lr = T.scalar('lr', dt)

    start = index * batch_size
    end = (index + 1) * batch_size

    testVariables = {}
    testVariables[x_content] = test_set_content[start:end]
    testVariables[x_trigger] = test_set_trigger[start:end]
    testVariables[x_relation_binary] = test_set_relation_binary[start:end]
    testVariables[x_type] = train_set_type
    testVariables[x_type_structure] = train_set_type_structure
    testVariables[y] = test_set_y[start:end]
    testVariables[y_vector] = test_set_y_vector[start:end]
    testVariables[x_train_types] = train_types
    testVariables[x_pos_neg_flag] = test_set_posneg[start:end]

    print("length of train variables ", len(testVariables))

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by SGD Since this model has many parameters,
    # it would be tedious to manually create an update rule for each model parameter. We thus create the updates
    # list by automatically looping over all (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - lr * grad_i))

    test_model_confidence = theano.function([index],
                                            layer3.results(y),
                                            on_unused_input='ignore',
                                            givens=testVariables)

    time2 = time.time()
    print("time for building the model: ", time2 - time1)

    print("loading saved network")
    netfile = open(network_file)

    relW = cPickle.load(netfile)
    compose_layer.params[0].set_value(relW, borrow=True)

    convolW = cPickle.load(netfile)
    convolB = cPickle.load(netfile)
    layer1_conv.params[0].set_value(convolW, borrow=True)
    layer1_conv.params[1].set_value(convolB, borrow=True)
    layer1_conv_type.params[0].set_value(convolW, borrow=True)
    layer1_conv_type.params[1].set_value(convolB, borrow=True)

    typeW = cPickle.load(netfile)
    compose_type_layer.params[0].set_value(typeW, borrow=True)
    netfile.close()

    print("finish loading network")

    test_batch_size = 100
    all_batches = len(result_index_test_matrix) / test_batch_size

    confidence_prob = []
    confidence_value = []
    confidence_list = []
    confidence = [test_model_confidence(i) for i in xrange(all_batches)]
    for r in range(0, len(confidence)):
        for r1 in range(0, test_batch_size):
            hypo_result = confidence[r][0].item(r1)
            confidence_prob.append(confidence[r][2][r1])
            confidence_value.append(confidence[r][1][r1])
            confidence_list.append(hypo_result)

    y_pred = confidence_list

    f = open(test_result_file, "w")
    for i in range(0, len(y_pred)):
        f.write(str(y_pred[i]) + "\t" + str(confidence_value[i]) + "\t")
        for j in range(0, type_size):
            f.write(str(confidence_prob[i][j]) + " ")
        f.write("\n")

    f.close()

Example #12

Show file

    def build_model(self, flag_preserve_params=False):
    
        
        ###################
        # build the model #
        logging.info('... building the model')
        
        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')   # the data is presented as rasterized images
        
        # self.y = T.ivector('y')  
        # the labels are presented as 1D vector of
        # [int] labels, used to represent labels given by 
        # data
        
        # the y as features, used for taking in intermediate layer "y" values                    
        self.y = T.matrix('y')   
        

        
        # Reshape matrix of rasterized images of shape (batch_size,28*28)
        # to a 4D tensor, compatible with our LeNetConvPoolLayer
        self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size))

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
        # maxpooling reduces this further to (24/2,24/2) = (12,12)
        # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
        self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input,
                                         image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size),
                                         filter_shape=(self.nkerns[0], self.img_dim, 
                                                       self.filtersize[0], self.filtersize[0]),
                                         poolsize=(self.poolsize[0], self.poolsize[0]),
                                         activation=self.conv_activation)

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
        # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        
        self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0]
        
        self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output,
                                         image_shape=(self.batch_size, self.nkerns[0], 
                                                      self.img_size1, self.img_size1),
                                         filter_shape=(self.nkerns[1], self.nkerns[0], 
                                                       self.filtersize[1], self.filtersize[1]), 
                                         poolsize=(self.poolsize[1], self.poolsize[1]),
                                         activation=self.conv_activation)

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (20,32*4*4) = (20,512)
        self.layer2_input = self.layer1.output.flatten(2)
        
        self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1]
        # construct a fully-connected sigmoidal layer
        self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, 
                                  n_in=self.nkerns[1] * self.img_size2 * self.img_size2,
                                  n_out=self.num_hidden, 
                                  activation=self.hidden_activation)

        # classify the values of the fully-connected sigmoidal layer
        self.layer3 = LogisticRegression(input=self.layer2.output, 
                                         n_in=self.num_hidden, 
                                         n_out=self.num_class,
                                         activation=self.logreg_activation)
        
        
        # regularization term
        self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \
            self.alpha_l2 * (self.layer2.W ** 2).sum()
            
        self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \
            self.alpha_l2 * (self.layer3.W ** 2).sum()
        
        
        # there's different choices of cost models
        if self.cost_type == 'nll_softmax':
            # the cost we minimize during training is the NLL of the model
            self.y = T.ivector('y')  # index involved so has to use integer
            self.cost = self.layer3.negative_log_likelihood(self.y) + \
                self.decay_hidden + self.decay_softmax + \
                self.alpha_entropy * self.layer3.p_y_entropy
                
                
        elif self.cost_type == 'ssd_softmax':
            self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \
                self.decay_hidden + self.decay_softmax
            
        elif self.cost_type == 'ssd_hidden':
            self.cost = T.mean((self.layer2.output - self.y) ** 2) + \
                self.decay_hidden
        
        elif self.cost_type == 'ssd_conv':
            self.cost = T.mean((self.layer2_input - self.y) ** 2)
        
        # create a list of all model parameters to be fit by gradient descent
        
        # preserve parameters if the exist, used for keep parameter while 
        # changing
        # some of the theano functions
        # but the user need to be aware that if the parameters should be kept 
        # only if the network structure doesn't change
        
        if flag_preserve_params and hasattr(self, 'params'):
            pass
            params_temp = copy.deepcopy(self.params)
        else:
            params_temp = None
        
        self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params
            
        # if needed, assign old parameters
        if flag_preserve_params and (params_temp is not None):
            for ind in range(len(params_temp)):
                self.params[ind].set_value(params_temp[ind].get_value(), borrow=True)


        # create a list of gradients for all model parameters
        self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn')
        
        # error function from the last layer logistic regression
        self.errors = self.layer3.errors

Example #13

Show file

def build_lenet(config):
    rng = np.random.RandomState(23455)

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector

    image_width = config.image_width
    batch_size = config.batch_size
    image_size = image_width**2

    x_shared = T.cast(theano.shared(np.random.rand(batch_size, image_size),
                                    borrow=True), theano.config.floatX)
    y_shared = T.cast(theano.shared(np.random.randint(config.ydim,
                                                      size=batch_size),
                                    borrow=True), 'int32')

    layer0_input = x.reshape((batch_size, 1, image_width, image_width))

    # construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, image_width, image_width),
        filter_shape=(config.num_kerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, config.num_kerns[0], 12, 12),
        filter_shape=(config.num_kerns[1], config.num_kerns[0], 5, 5),
        poolsize=(2, 2)
    )

    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=config.num_kerns[1] * 4 * 4,
        n_out=500,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500,
                                n_out=config.ydim)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a list of all model parameters to be fit by gradient descent
    params_W = [layer3.W, layer2.W, layer1.W, layer0.W]
    params_b = [layer3.b, layer2.b, layer1.b, layer0.b]
    params = params_W + params_b

    shared_cost = theano.shared(np.float32(0.0))
    grads_temp = T.grad(cost, params)
    start_compilation = time.time()
    forward_step = theano.function([], [], updates=[(shared_cost, cost)],
                                   givens={x: x_shared, y: y_shared})
    forward_backward_step = theano.function([], grads_temp,
                                            givens={x: x_shared, y: y_shared})
    print 'compilation time: %.4f s' % (time.time() - start_compilation)
    return forward_step, forward_backward_step

Example #14

Show file

File: train_relation_extraction.py Project: lxwithgod/noise_mitigation

# the convolution weight matrix
convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                high=W_bound,
                                                size=filter_shape),
                                    dtype=theano.config.floatX),
                      name='conv_W',
                      borrow=True)

# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
convB = theano.shared(value=b_values, name='conv_b', borrow=True)

layer0a = LeNetConvPoolLayer(rng,
                             W=convW,
                             b=convB,
                             input=layer0a_input,
                             image_shape=(xa.shape[0], 1, ishape[0],
                                          ishape[1]),
                             filter_shape=filter_shape,
                             poolsize=poolsize)
layer0b = LeNetConvPoolLayer(rng,
                             W=convW,
                             b=convB,
                             input=layer0b_input,
                             image_shape=(xb.shape[0], 1, ishape[0],
                                          ishape[1]),
                             filter_shape=filter_shape,
                             poolsize=poolsize)
layer0c = LeNetConvPoolLayer(rng,
                             W=convW,
                             b=convB,
                             input=layer0c_input,

Example #15

Show file

File: zero_shot_arg_final.py Project: aka-zyq/Event-Extraction-2

def main(args):
    # initial parameters
    embedding_size = args.embedding_size
    arg_context_size = args.arg_context_size
    role_context_size = args.role_context_size
    embedding_file = args.embedding_path
    hidden_units = args.hidden_units
    learning_rate = args.learning_rate
    margin = args.margin
    batch_size = args.batch_size
    n_epochs = args.num_epochs

    relation_size = 82
    nkerns = [500]
    filter_size = [1, 1]
    pool = [1, 1]
    l1 = 0.001
    l2 = 0.002

    newbob = False
    arg_network_file = args.model_path
    arg_train_file = args.train
    arg_dev_file = args.dev
    arg_test_file = args.test
    arg_label_file = args.ontology_path
    arg_label_file_norm = args.norm_ontology_path
    relation_file = args.relation_path
    train_role_flag = args.seen_args
    arg_path_file_merge = args.arg_path_file
    arg_path_file_universal = args.arg_path_file_universal
    trigger_role_matrix_file = args.trigger_role_matrix
    tup_representation_size = embedding_size * 2

    # load word vectors
    word_vectors, vector_size = load_word_vec(embedding_file)

    # read train and dev file
    print("start loading train and dev file ... ")

    arg_trigger_list_train, arg_trigger_type_list_train, arg_list_train, arg_path_left_list_train, \
        arg_path_rel_list_train, arg_path_right_list_train, arg_role_list_train = load_arg_data(arg_train_file)

    arg_trigger_list_dev, arg_trigger_type_list_dev, arg_list_dev, arg_path_left_list_dev, \
        arg_path_rel_list_dev, arg_path_right_list_dev, arg_role_list_dev = load_arg_data(arg_dev_file)

    arg_trigger_list_test, arg_trigger_type_list_test, arg_list_test, arg_path_left_list_test, \
        arg_path_rel_list_test, arg_path_right_list_test, arg_role_list_test = load_arg_data(arg_test_file)

    num_examples_per_epoch = len(arg_trigger_list_train)

    print("start loading arg and relation files ... ")
    all_type_list, all_type_structures = load_types(arg_label_file_norm)
    type_size = len(all_type_list)

    all_arg_role_list, all_type_role_structures, index_2_role, trigger_role_2_index, index_2_norm_role, \
    trigger_norm_role_2_index = load_roles_1(arg_path_file_merge)
    role_size = len(all_arg_role_list)

    trigger_role_matrix = get_trigger_arg_matrix(trigger_role_matrix_file,
                                                 type_size, role_size)
    train_roles = get_roles_for_train_1(train_role_flag, arg_path_file_merge)

    rel_2_index, index_2_rel = read_relation_index(relation_file)
    relation_matrix = random_init_rel_vec_factor(
        relation_file, tup_representation_size * tup_representation_size)

    print("start preparing data structures ... ")
    curSeed = 23455
    rng = numpy.random.RandomState(curSeed)
    seed = rng.get_state()[1][0]
    print("seed: ", seed)

    # arg data matrix
    role_index_train_matrix, role_vector_train_matrix, input_arg_context_train_matrix, input_arg_train_matrix, \
        arg_relation_binary_train_matrix, pos_neg_role_train_matrix, limited_roles_train_matrix = \
        input_arg_matrix(arg_trigger_list_train, arg_trigger_type_list_train, arg_list_train, arg_path_left_list_train,
                         arg_path_rel_list_train, arg_path_right_list_train, arg_role_list_train, word_vectors,
                         all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size,
                         rel_2_index, train_roles, trigger_role_matrix, arg_label_file)

    role_index_dev_matrix, role_vector_dev_matrix, input_arg_context_dev_matrix, input_arg_dev_matrix, \
        arg_relation_binary_dev_matrix, pos_neg_role_dev_matrix, limited_roles_dev_matrix = \
        input_arg_matrix_test(arg_trigger_list_dev, arg_trigger_type_list_dev, arg_list_dev, arg_path_left_list_dev,
                         arg_path_rel_list_dev,arg_path_right_list_dev, arg_role_list_dev, word_vectors,
                         all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size,
                         rel_2_index, train_roles, trigger_role_matrix, arg_label_file)

    role_index_test_matrix, role_vector_test_matrix, input_arg_context_test_matrix, input_arg_test_matrix, \
        arg_relation_binary_test_matrix, pos_neg_role_test_matrix, limited_roles_test_matrix = \
        input_arg_matrix_test(arg_trigger_list_test, arg_trigger_type_list_test, arg_list_test, arg_path_left_list_test,
                              arg_path_rel_list_test, arg_path_right_list_test, arg_role_list_test, word_vectors,
                              all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size,
                              rel_2_index, train_roles, trigger_role_matrix, arg_label_file)

    input_role_matrix, input_role_structure_matrix = role_matrix_1(
        all_arg_role_list, all_type_role_structures, embedding_file,
        role_context_size)

    time1 = time.time()
    dt = theano.config.floatX

    ## arg data
    train_set_content_arg = theano.shared(
        numpy.matrix(input_arg_context_train_matrix, dtype=dt))
    valid_set_content_arg = theano.shared(
        numpy.matrix(input_arg_context_dev_matrix, dtype=dt))
    test_set_content_arg = theano.shared(
        numpy.matrix(input_arg_context_test_matrix, dtype=dt))

    train_set_arg = theano.shared(
        numpy.matrix(input_arg_train_matrix, dtype=dt))
    valid_set_arg = theano.shared(numpy.matrix(input_arg_dev_matrix, dtype=dt))
    test_set_arg = theano.shared(numpy.matrix(input_arg_test_matrix, dtype=dt))

    train_set_relation_binary_arg = theano.shared(
        numpy.matrix(arg_relation_binary_train_matrix, dtype=dt))
    valid_set_relation_binary_arg = theano.shared(
        numpy.matrix(arg_relation_binary_dev_matrix, dtype=dt))
    test_set_relation_binary_arg = theano.shared(
        numpy.matrix(arg_relation_binary_test_matrix, dtype=dt))

    train_set_posneg_arg = theano.shared(
        numpy.matrix(pos_neg_role_train_matrix, dtype=dt))
    valid_set_posneg_arg = theano.shared(
        numpy.matrix(pos_neg_role_dev_matrix, dtype=dt))
    test_set_posneg_arg = theano.shared(
        numpy.matrix(pos_neg_role_test_matrix, dtype=dt))

    train_set_arg_y = theano.shared(
        numpy.array(role_index_train_matrix, dtype=numpy.dtype(numpy.int32)))
    valid_set_arg_y = theano.shared(
        numpy.array(role_index_dev_matrix, dtype=numpy.dtype(numpy.int32)))
    test_set_arg_y = theano.shared(
        numpy.array(role_index_test_matrix, dtype=numpy.dtype(numpy.int32)))

    train_set_arg_y_vector = theano.shared(
        numpy.matrix(role_vector_train_matrix, dtype=dt))
    valid_set_arg_y_vector = theano.shared(
        numpy.matrix(role_vector_dev_matrix, dtype=dt))
    test_set_arg_y_vector = theano.shared(
        numpy.matrix(role_vector_test_matrix, dtype=dt))

    train_set_arg_limited_role = theano.shared(
        numpy.matrix(limited_roles_train_matrix, dtype=dt))
    valid_set_arg_limited_role = theano.shared(
        numpy.matrix(limited_roles_dev_matrix, dtype=dt))
    test_set_arg_limited_role = theano.shared(
        numpy.matrix(limited_roles_test_matrix, dtype=dt))

    train_set_role = theano.shared(numpy.matrix(input_role_matrix, dtype=dt))
    train_set_role_structure = theano.shared(
        numpy.matrix(input_role_structure_matrix, dtype=dt))

    train_roles = theano.shared(numpy.matrix(train_roles, dtype=dt))

    # compute number of minibatches for training, validation and testing
    n_train_batches = input_arg_train_matrix.shape[0]
    n_valid_batches = input_arg_dev_matrix.shape[0]
    n_test_batches = input_arg_test_matrix.shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x_content_arg = T.matrix('x_content_arg')
    x_arg = T.matrix('x_arg')
    x_relation_binary_arg = T.matrix('x_relation_binary_arg')
    x_pos_neg_flag_arg = T.matrix('x_pos_neg_flag_arg')
    x_role = T.matrix('x_role')
    x_role_structure = T.matrix('x_role_structure')
    x_train_roles = T.matrix('x_train_roles')
    arg_y = T.ivector('arg_y')
    arg_y_vector = T.matrix('arg_y_vector')
    arg_limited_role = T.matrix('arg_limited_role')

    # [int] labels
    ishape = [tup_representation_size,
              arg_context_size]  # this is the size of context matrizes

    time2 = time.time()
    print("time for preparing data structures: ", time2 - time1)

    # build the actual model
    print('start building the model ... ')
    time1 = time.time()

    # argument representation layer
    layer0_arg_input = x_content_arg.reshape(
        (batch_size, 1, ishape[0], ishape[1]))
    layer0_input_binary_relation = x_relation_binary_arg.reshape(
        (batch_size, 1, relation_size, ishape[1]))  ## 100*1*26*5

    # compose amr relation matrix to each tuple
    rel_w = theano.shared(value=relation_matrix, borrow=True)  ## 26*400
    compose_layer = ComposeLayerMatrix(
        input=layer0_arg_input,
        input_binary_relation=layer0_input_binary_relation,
        rel_w=rel_w,
        rel_vec_size=tup_representation_size)

    layer1_input = compose_layer.output

    filter_shape = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size = (pool[0], pool[1])
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
               numpy.prod(pool_size))

    w_bound = numpy.sqrt(6. / (fan_in + fan_out))
    conv_w = theano.shared(numpy.asarray(rng.uniform(low=-w_bound,
                                                     high=w_bound,
                                                     size=filter_shape),
                                         dtype=theano.config.floatX),
                           borrow=True)
    b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
    conv_b = theano.shared(value=b_values, borrow=True)

    layer1_arg_conv = LeNetConvPoolLayer(rng,
                                         W=conv_w,
                                         b=conv_b,
                                         input=layer1_input,
                                         image_shape=(batch_size, 1, ishape[0],
                                                      arg_context_size),
                                         filter_shape=filter_shape,
                                         poolsize=pool_size)

    layer1_arg_output = layer1_arg_conv.output
    layer1_arg_flattened = layer1_arg_output.flatten(2)
    arg_features_shaped = x_arg.reshape((batch_size, embedding_size))
    layer2_arg_input = T.concatenate(
        [layer1_arg_flattened, arg_features_shaped], axis=1)
    layer2_arg_input_size = nkerns[0] * pool[1] + embedding_size

    # arg role representation layer
    layer_role_input = x_role_structure.reshape(
        (role_size, 1, tup_representation_size, role_context_size))
    filter_shape_role = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size_role = (pool[0], pool[1])

    # initialize the implicit relation tensor
    type_tensor_shape = (tup_representation_size, tup_representation_size,
                         tup_representation_size)
    type_tensor_w = theano.shared(numpy.asarray(rng.uniform(
        low=-w_bound, high=w_bound, size=type_tensor_shape),
                                                dtype=theano.config.floatX),
                                  borrow=True)

    # compose relation tensor to each tuple
    compose_type_layer = ComposeLayerTensor(input=layer_role_input,
                                            tensor=type_tensor_w)
    layer_type_input1 = compose_type_layer.output

    layer1_conv_role = LeNetConvPoolLayer(rng,
                                          W=conv_w,
                                          b=conv_b,
                                          input=layer_type_input1,
                                          image_shape=(role_size, 1,
                                                       tup_representation_size,
                                                       role_context_size),
                                          filter_shape=filter_shape_role,
                                          poolsize=pool_size_role)

    layer1_role_output = layer1_conv_role.output
    layer1_role_flattened = layer1_role_output.flatten(2)

    role_shaped = x_role.reshape((role_size, embedding_size))

    layer2_role_input = T.concatenate([layer1_role_flattened, role_shaped],
                                      axis=1)
    layer2_role_input_size = nkerns[0]**pool[1] + embedding_size

    # ranking based max margin loss layer
    train_roles_signal = x_train_roles.reshape((role_size, 1))
    pos_neg_flag_arg = x_pos_neg_flag_arg.reshape((batch_size, 1))
    limited_role = arg_limited_role.reshape((batch_size, role_size))

    layer3 = MaxRankingMarginCosine1Arg1(rng=rng,
                                         input=layer2_arg_input,
                                         input_label=layer2_role_input,
                                         true_label=arg_y_vector,
                                         n_in=layer2_arg_input_size,
                                         n_in2=layer2_role_input_size,
                                         margin=margin,
                                         batch_size=batch_size,
                                         type_size=role_size,
                                         train_type_signal=train_roles_signal,
                                         pos_neg_flag=pos_neg_flag_arg,
                                         limited_role=limited_role)

    # cost and parameters update
    cost = layer3.loss
    # create a list of all model parameters to be fit by gradient descent
    param_list = [
        layer1_arg_conv.params, compose_layer.params, compose_type_layer.params
    ]

    params = []
    for p in param_list:
        params += p

    # the cost we minimize during training is the NLL of the model
    lambd1 = T.scalar('lambda1', dt)
    lambd2 = T.scalar('lambda2', dt)

    # L1 and L2 regularization possible
    reg2 = 0
    reg1 = 0
    for p in param_list:
        reg2 += T.sum(p[0]**2)
        reg1 += T.sum(abs(p[0]))

    print("reg1 ", reg1)
    print("reg2 ", reg2)

    cost += lambd2 * reg2
    cost += lambd1 * reg1

    lr = T.scalar('lr', dt)

    start = index * batch_size
    end = (index + 1) * batch_size

    validVariables = {}
    validVariables[x_content_arg] = valid_set_content_arg[start:end]
    validVariables[x_arg] = valid_set_arg[start:end]
    validVariables[x_role] = train_set_role
    validVariables[x_role_structure] = train_set_role_structure
    validVariables[x_relation_binary_arg] = valid_set_relation_binary_arg[
        start:end]
    validVariables[arg_y] = valid_set_arg_y[start:end]
    validVariables[arg_y_vector] = valid_set_arg_y_vector[start:end]
    validVariables[x_train_roles] = train_roles
    validVariables[x_pos_neg_flag_arg] = valid_set_posneg_arg[start:end]
    validVariables[arg_limited_role] = valid_set_arg_limited_role[start:end]

    testVariables = {}
    testVariables[x_content_arg] = test_set_content_arg[start:end]
    testVariables[x_arg] = test_set_arg[start:end]
    testVariables[x_role] = train_set_role
    testVariables[x_role_structure] = train_set_role_structure
    testVariables[x_relation_binary_arg] = test_set_relation_binary_arg[
        start:end]
    testVariables[arg_y] = test_set_arg_y[start:end]
    testVariables[arg_y_vector] = test_set_arg_y_vector[start:end]
    testVariables[x_train_roles] = train_roles
    testVariables[x_pos_neg_flag_arg] = test_set_posneg_arg[start:end]
    testVariables[arg_limited_role] = test_set_arg_limited_role[start:end]

    trainVariables = {}
    trainVariables[x_content_arg] = train_set_content_arg[start:end]
    trainVariables[x_arg] = train_set_arg[start:end]
    trainVariables[x_role] = train_set_role
    trainVariables[x_role_structure] = train_set_role_structure
    trainVariables[x_relation_binary_arg] = train_set_relation_binary_arg[
        start:end]
    trainVariables[arg_y] = train_set_arg_y[start:end]
    trainVariables[arg_y_vector] = train_set_arg_y_vector[start:end]
    trainVariables[x_train_roles] = train_roles
    trainVariables[x_pos_neg_flag_arg] = train_set_posneg_arg[start:end]
    trainVariables[arg_limited_role] = train_set_arg_limited_role[start:end]

    print("length of train variables ", trainVariables)

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by SGD Since this model has many parameters,
    # it would be tedious to manually create an update rule for each model parameter. We thus create the updates
    # list by automatically looping over all (params[i],grads[i]) pairs.
    updates = []
    rho = 0.9
    epsilon = 1e-6
    # for param_i, grad_i in zip(params, grads):
    #     updates.append((param_i, param_i - lr * grad_i))
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g**2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))

    test_model_confidence = theano.function([index],
                                            layer3.results(arg_y),
                                            on_unused_input='ignore',
                                            givens=testVariables)
    eval_model_confidence = theano.function([index],
                                            layer3.results(arg_y),
                                            on_unused_input='ignore',
                                            givens=validVariables)
    train_model = theano.function([index, lr, lambd1, lambd2],
                                  [cost, layer3.loss],
                                  updates=updates,
                                  on_unused_input='ignore',
                                  givens=trainVariables)

    time2 = time.time()
    print("time for building the model: ", time2 - time1)

    # train the model
    print('start training ... ')
    time1 = time.time()

    validation_frequency = num_examples_per_epoch / batch_size  # validate after each epoch
    best_params = []
    best_fscore = -1
    last_fscore = -1
    best_fscore_m1 = -1
    best_iter = 0
    best_fscoreEval = -1
    best_fscore_m1Eval = -1
    best_iterEval = 0

    start_time = time.clock()

    epoch = 0
    done_looping = False

    maxNoImprovement = 5
    noImprovement = 0

    while (epoch < n_epochs) and (not done_looping):
        print('epoch = ', epoch)
        epoch = epoch + 1

        this_n_train_batches = num_examples_per_epoch / batch_size

        for minibatch_index in xrange(this_n_train_batches):

            iter = (epoch - 1) * this_n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)

            cost_ij, loss = train_model(minibatch_index, learning_rate, l1, l2)

            print("cost: ", cost_ij)
            print("loss1:   ", loss)

            if (iter + 1) % validation_frequency == 0:

                # test data
                confidence_eval = [
                    test_model_confidence(i) for i in xrange(n_test_batches)
                ]
                confidence_list_eval = []
                for r in range(0, len(confidence_eval)):
                    for r1 in range(0, batch_size):
                        hypo_result_eval = confidence_eval[r][0].item(r1)
                        confidence_list_eval.append(hypo_result_eval)

                y_pred_eval = confidence_list_eval
                y_true_eval = role_index_test_matrix[:n_test_batches *
                                                     batch_size]
                y_true_eval_2 = []
                for i in range(len(y_true_eval)):
                    y_true_eval_2.append(int(y_true_eval[i]))

                labels1 = [13, 14, 15, 16, 17]
                this_fscore_eval = f1_score(y_true_eval_2,
                                            y_pred_eval,
                                            labels=labels1,
                                            average='micro')
                this_fscore_macro_eval = f1_score(y_true_eval_2,
                                                  y_pred_eval,
                                                  labels=labels1,
                                                  average='macro')
                print(
                    'EVAL: ***   epoch %i, best_validation %f, best_validation_m1 %f, learning_rate %f, '
                    'minibatch %i/%i, validation fscore %f %%' %
                    (epoch, best_fscoreEval * 100., best_fscore_m1Eval * 100,
                     learning_rate, minibatch_index + 1, this_n_train_batches,
                     this_fscore_eval * 100.))

                if this_fscore_eval > best_fscoreEval:
                    best_fscoreEval = this_fscore_eval
                    best_fscore_m1Eval = this_fscore_macro_eval
                    best_iterEval = iter

                # dev data
                confidence = [
                    eval_model_confidence(i) for i in xrange(n_valid_batches)
                ]

                confidence_list = []
                for r in range(0, len(confidence)):
                    for r1 in range(0, batch_size):
                        hypo_result = confidence[r][0].item(r1)
                        confidence_list.append(hypo_result)

                y_pred = confidence_list
                y_true = role_index_dev_matrix[:n_valid_batches * batch_size]
                y_true_2 = []
                for i in range(len(y_true)):
                    y_true_2.append(int(y_true[i]))

                labels = []
                for i in range(1, role_size):
                    labels.append(i)
                this_fscore = f1_score(y_true_2,
                                       y_pred,
                                       labels=labels,
                                       average='micro')
                this_fscore_macro = f1_score(y_true_2,
                                             y_pred,
                                             labels=labels,
                                             average='macro')
                print(
                    'epoch %i, best_validation %f, best_validation_m1 %f, learning_rate %f, minibatch %i/%i, '
                    'validation fscore %f %%' %
                    (epoch, best_fscore * 100., best_fscore_m1 * 100,
                     learning_rate, minibatch_index + 1, this_n_train_batches,
                     this_fscore * 100.))

                # if we got the best validation score until now
                if this_fscore > best_fscore:
                    best_fscore = this_fscore
                    best_fscore_m1 = this_fscore_macro
                    best_iter = iter

                    best_params = []
                    for p in param_list:
                        p_param = []
                        for part in p:
                            p_param.append(part.get_value(borrow=False))
                        best_params.append(p_param)
                    noImprovement = 0
                else:
                    if this_fscore > last_fscore:
                        noImprovement -= 1
                        noImprovement = max(noImprovement, 0)
                    else:
                        noImprovement += 1
                        updatestep = minibatch_index + this_n_train_batches * (
                            epoch - 1)
                        if newbob:  # learning rate schedule depending on dev result
                            learning_rate /= 1.2
                            print("reducing learning rate to ", learning_rate)
                last_fscore = this_fscore
            if newbob:  # learning rate schedule depending on dev result
                if noImprovement > maxNoImprovement or learning_rate < 0.0000001:
                    done_looping = True
                    break

        if not newbob:
            if epoch + 1 > 10:
                learning_rate /= 1.2
                print("reducing learning rate to ", learning_rate)
            if epoch + 1 > 50:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained for c=%i, nk=%i, f=%i, h=%i  at iteration %i,'
        % (best_fscore * 100., arg_context_size, nkerns[0], filter_size[1],
           hidden_units, best_iter + 1))

    time2 = time.time()
    print("time for training: ", time2 - time1)

    print('Saving net.')
    save_file = open(arg_network_file, 'wb')
    for p in best_params:
        for p_part in p:
            cPickle.dump(p_part, save_file, -1)
    save_file.close()

Example #16

Show file

    def __init__(self,
                 nkerns=[48, 48, 48],
                 miniBatchSize=200,
                 nHidden=200,
                 nClasses=2,
                 nMaxPool=2,
                 nChannels=1):
        """
        nClasses : the number of target classes (e.g. 2 for binary classification)
        nMaxPool : number of pixels to max pool
        nChannels : number of input channels (e.g. 1 for single grayscale channel)
        """
        rng = numpy.random.RandomState(23455)

        self.p = 65
        self.miniBatchSize = miniBatchSize

        # Note: self.x and self.y will be re-bound to a subset of the
        # training/validation/test data dynamically by the update
        # stage of the appropriate function.
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels

        # We now assume the input will already be reshaped to the
        # proper size (i.e. we don't need a theano resize op here).
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # conv. filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, nChannels,
                                                 self.p, self.p),
                                    filter_shape=(nkerns[0], nChannels, fs0,
                                                  fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 31 -> 28 -> 14
        #--------------------------------------------------
        fs1 = 4  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 14)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 14 -> 10 -> 5
        #--------------------------------------------------
        fs2 = 5
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 5)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[1],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # Fully connected sigmoidal layer, goes from
        # 5*5*48  -> 200
        #--------------------------------------------------
        layer3_input = layer2.output.flatten(2)
        layer3 = HiddenLayer(rng,
                             input=layer3_input,
                             n_in=nkerns[2] * os2 * os2,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 4
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer4 = LogisticRegression(input=layer3.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4)

Example #17

Show file

File: live_rep_refactor.py Project: live-wire/DeepRepICCV2015

def start(inputfile):
    global in_time, out_time, cooldown_in_time, cooldown_out_time, classify
    global global_counter, winner_stride, cur_state, in_frame_num, actions_counter
    global test_set_x, test_set_y, shared_test_set_y
    rng = numpy.random.RandomState(23455)

    # ####################### build start ########################

    # create an empty shared variables to be filled later

    data_x = numpy.zeros([1, 20 * 50 * 50])
    data_y = numpy.zeros(20)
    train_set = (data_x, data_y)
    (test_set_x, test_set_y, shared_test_set_y) = \
        shared_dataset(train_set)

    print 'building ... '
    batch_size = 1

    # allocate symbolic variables for the data

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # image size

    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) / 2
    layer1_h = (layer0_h - 4) / 2
    layer2_w = (layer1_w - 2) / 2
    layer2_h = (layer1_h - 2) / 2
    layer3_w = (layer2_w - 2) / 2
    layer3_h = (layer2_h - 2) / 2

    # #####################
    # BUILD ACTUAL MODEL #
    # #####################

    # image sizes

    batchsize = batch_size
    in_channels = 20
    in_width = 50
    in_height = 50

    # filter sizes

    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=signals_shape,
                                filter_shape=filters_shape,
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, flt_channels,
                                             layer1_w, layer1_h),
                                filter_shape=(60, flt_channels, 3, 3),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batch_size, 60, layer2_w,
                                             layer2_h),
                                filter_shape=(90, 60, 3, 3),
                                poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=90 * layer3_w * layer3_h,
                         n_out=500,
                         activation=T.tanh)

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # load weights

    print 'loading weights state'
    f = file('weights.save', 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(cPickle.load(f))
    f.close()
    layer0.__setstate__(loaded_objects[0])
    layer1.__setstate__(loaded_objects[1])
    layer2.__setstate__(loaded_objects[2])
    layer3.__setstate__(loaded_objects[3])
    layer4.__setstate__(loaded_objects[4])

    # ####################### build done ########################

    fromCam = False

    if fromCam:
        print 'using camera input'
        cap = cv2.VideoCapture(0)
    else:
        print 'using input file: ', inputfile
        cap = cv2.VideoCapture(inputfile)

    # my timing

    frame_rate = 5
    frame_interval_ms = 1000 / frame_rate

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video_writer = cv2.VideoWriter('../out/live_out.avi', fourcc, frame_rate,
                                   (640, 480))

    frame_counter = 0
    (ret, frame) = cap.read()

    proFrame = process_single_frame(frame)

    # init detectors

    st_a_det = RepDetector(proFrame, detector_strides[0])
    st_b_det = RepDetector(proFrame, detector_strides[1])
    st_c_det = RepDetector(proFrame, detector_strides[2])

    frame_wise_counts = []
    while True:

        in_frame_num += 1
        if in_frame_num % 2 == 1:
            continue

        (ret, frame) = cap.read()
        if ret == 0:
            print 'unable to read frame'
            break
        proFrame = process_single_frame(frame)

        # handle stride A....
        if frame_counter % st_a_det.stride_number == 0:
            st_a_det.count(proFrame)

    # handle stride B

        if frame_counter % st_b_det.stride_number == 0:
            st_b_det.count(proFrame)

    # handle stride C

        if frame_counter % st_c_det.stride_number == 0:
            st_c_det.count(proFrame)

    # display result on video................

        blue_color = (130, 0, 0)
        green_color = (0, 130, 0)
        red_color = (0, 0, 130)
        orange_color = (0, 140, 0xFF)

        out_time = in_frame_num / 60
        if cur_state == state.IN_REP and (out_time - in_time < 4
                                          or global_counter < 5):
            draw_str(frame, (20, 120),
                     ' new hypothesis (%d) ' % global_counter, orange_color,
                     1.5)
        if cur_state == state.IN_REP and out_time - in_time >= 4 \
            and global_counter >= 5:
            draw_str(
                frame, (20, 120), 'action %d: counting... %d' %
                (actions_counter, global_counter), green_color, 2)
        if cur_state == state.COOLDOWN and global_counter >= 5:
            draw_str(
                frame, (20, 120), 'action %d: done. final counting: %d' %
                (actions_counter, global_counter), blue_color, 2)
        # print "pls", global_counter
        frame_wise_counts.append(global_counter)

        # print 'action %d: done. final counting: %d' % (actions_counter, global_counter)
    print "Dhruv", frame_wise_counts, global_counter
    return frame_wise_counts

Example #18

Show file

File: testCNN_multiClass_withNERinput.py Project: heikeadel/slot_filling_system

  def __init__(self, configfile, train = False):

    self.config = readConfig(configfile)

    self.addInputSize = 1
    logger.info("additional mlp input")

    wordvectorfile = self.config["wordvectors"]
    logger.info("wordvectorfile " + str(wordvectorfile))
    networkfile = self.config["net"]
    logger.info("networkfile " + str(networkfile))
    hiddenunits = int(self.config["hidden"])
    logger.info("hidden units " + str(hiddenunits))
    hiddenunitsNER = 50
    if "hiddenunitsNER" in self.config:
      hiddenunitsNER = int(self.config["hiddenunitsNER"])
    logger.info("hidden units NER " + str(hiddenunitsNER))
    learning_rate = float(self.config["lrate"])
    logger.info("learning rate " + str(learning_rate))
    if train:
      self.batch_size = int(self.config["batchsize"])
    else:
      self.batch_size = 1
    logger.info("batch size " + str(self.batch_size))
    self.filtersize = [1,int(self.config["filtersize"])]
    nkerns = [int(self.config["nkerns"])]
    logger.info("nkerns " + str(nkerns))
    pool = [1, int(self.config["kmax"])]

    self.contextsize = int(self.config["contextsize"])
    logger.info("contextsize " + str(self.contextsize))

    if self.contextsize < self.filtersize[1]:
      logger.info("setting filtersize to " + str(self.contextsize))
      self.filtersize[1] = self.contextsize
    logger.info("filtersize " + str(self.filtersize))

    sizeAfterConv = self.contextsize - self.filtersize[1] + 1

    sizeAfterPooling = -1
    if sizeAfterConv < pool[1]:
      logger.info("setting poolsize to " + str(sizeAfterConv))
      pool[1] = sizeAfterConv
    sizeAfterPooling = pool[1]
    logger.info("kmax pooling: k = " + str(pool[1]))

    # reading word vectors
    self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

    self.representationsize = self.vectorsize + 1

    rng = numpy.random.RandomState(23455)
    if train:
      seed = rng.get_state()[1][0]
      logger.info("seed: " + str(seed))

    # allocate symbolic variables for the data
    self.index = T.lscalar()  # index to a [mini]batch
    self.xa = T.matrix('xa')   # left context
    self.xb = T.matrix('xb')   # middle context
    self.xc = T.matrix('xc')   # right context
    self.y = T.imatrix('y')   # label (only present in training)
    self.yNER1 = T.imatrix('yNER1') # label for first entity
    self.yNER2 = T.imatrix('yNER2') # label for second entity
    ishape = [self.representationsize, self.contextsize]  # this is the size of context matrizes

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    logger.info('... building the model')

    # Reshape input matrix to be compatible with our LeNetConvPoolLayer
    layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1]))

    self.y_reshaped = self.y.reshape((self.batch_size, 1))
    yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
    yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

    # Construct convolutional pooling layer:
    filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1])
    poolsize=(pool[0], pool[1])
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
              numpy.prod(poolsize))
    W_bound = numpy.sqrt(6. / (fan_in + fan_out))
    # the convolution weight matrix
    convW = theano.shared(numpy.asarray(
           rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
           dtype=theano.config.floatX),
                               borrow=True)
    # the bias is a 1D tensor -- one bias per output feature map
    b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
    convB = theano.shared(value=b_values, borrow=True)

    self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)

    #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3)
    layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1)

    self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh)
    self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b)


    layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1)
    layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER

    self.additionalFeatures = T.matrix('additionalFeatures')
    additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1))
    layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1)
    layer2_inputSize += self.addInputSize

    self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize,
                         n_out=hiddenunits, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23)

    # create a list of all model parameters
    self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params]
    self.params = []
    for p in self.paramList:
      self.params += p
      logger.info(p)

    if not train:
      self.gotNetwork = 1
      # load parameters
      if not os.path.isfile(networkfile):
        logger.error("network file does not exist")
        self.gotNetwork = 0
      else:
        save_file = open(networkfile, 'rb')
        for p in self.params:
          p.set_value(cPickle.load(save_file), borrow=False)
        save_file.close()

Example #19

Show file

File: live_rep_YTIO.py Project: olevy3/repcount

    in_channels   = 20
    in_width      = 50
    in_height     = 50
    #filter sizes
    flt_channels  = 40
    flt_time      = 20
    flt_width     = 5
    flt_height    = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)
        
    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=signals_shape,
                filter_shape=filters_shape, poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
            filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2))


    layer2 = LeNetConvPoolLayer(rng, input=layer1.output,
                image_shape=(batch_size, 60, layer2_w, layer2_h),
                filter_shape=(90, 60, 3, 3), poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)


    layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h  ,
                         n_out=500, activation=T.tanh)