예제 #1
0
파일: models.py 프로젝트: mesnilgr/sarco
    def __init__(self,
                 cropsize,
                 batch_size,
                 nkerns=[10, 10, 10],
                 filters=[11, 6, 4]):
        self.X_batch, self.y_batch = T.tensor4('x'), T.matrix('y')
        self.layers, self.params = [], []
        rng = np.random.RandomState(23455)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=self.X_batch,
                                    image_shape=(batch_size, 1, cropsize,
                                                 cropsize),
                                    filter_shape=(nkerns[0], 1, filters[0],
                                                  filters[0]),
                                    poolsize=(2, 2))
        self.layers += [layer0]
        self.params += layer0.params
        # 400 - 11 + 1 = 390 / 2 = 195
        map_size = (cropsize - filters[0] + 1) / 2
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(batch_size, nkerns[0],
                                                 map_size, map_size),
                                    filter_shape=(nkerns[1], nkerns[0],
                                                  filters[1], filters[1]),
                                    poolsize=(2, 2))
        self.layers += [layer1]
        self.params += layer1.params

        # 195 - 6 + 1 = 190 / 2 = 95
        map_size = (map_size - filters[1] + 1) / 2
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(batch_size, nkerns[1],
                                                 map_size, map_size),
                                    filter_shape=(nkerns[2], nkerns[1],
                                                  filters[2], filters[2]),
                                    poolsize=(2, 2))
        self.layers += [layer2]
        self.params += layer2.params

        # 95 - 4 + 1 = 92 / 2 = 46
        map_size = (map_size - filters[2] + 1) / 2
        layer3_input = layer2.output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer3 = HiddenLayer(rng,
                             input=layer3_input,
                             n_in=nkerns[2] * map_size * map_size,
                             n_out=1,
                             activation=None)
        self.layers += [layer3]
        self.params += layer3.params

        nparams = np.sum(
            [p.get_value().flatten().shape[0] for p in self.params])
        print "model contains %i parameters" % nparams
        self.output = self.layers[-1].output
예제 #2
0
    def __init__(self, nkerns=[48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 65
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # Fully connected sigmoidal layer, goes from
        # X  -> 200
        #--------------------------------------------------
        layer1_input = layer0.output.flatten(2)
        layer1 = HiddenLayer(rng,
                             input=layer1_input,
                             n_in=nkerns[0] * os0 * os0,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 2
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer2 = LogisticRegression(input=layer1.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2)
예제 #3
0
    def __init__(self, nkerns=[48, 48, 48, 48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 95
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # labels := 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 95 -> 92 -> 46
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 46)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 46 -> 42 -> 21
        #--------------------------------------------------
        fs1 = 5  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 21)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 21 -> 18 -> 9
        #--------------------------------------------------
        fs2 = 4
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 9)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # layer3 convolution+max pool reduces image dimensions by:
        # 9 -> 6 -> 3
        #--------------------------------------------------
        fs3 = 4
        os3 = (os2 - fs3 + 1) / nMaxPool
        assert (os3 == 3)
        layer3 = LeNetConvPoolLayer(rng,
                                    input=layer2.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os2, os2),
                                    filter_shape=(nkerns[3], nkerns[2], fs3,
                                                  fs3),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 4
        # Fully connected sigmoidal layer, goes from
        # 3*3*48 ~ 450 -> 200
        #--------------------------------------------------
        layer4_input = layer3.output.flatten(2)
        layer4 = HiddenLayer(rng,
                             input=layer4_input,
                             n_in=nkerns[3] * os3 * os3,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 5
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer5 = LogisticRegression(input=layer4.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4, layer5)
예제 #4
0
def cifar_fast_net(batch_size=128,n_epochs=300,test_frequency=13, learning_rate=0.001):

    rng1 = numpy.random.RandomState(23455)
    rng2 = numpy.random.RandomState(12423)
    rng3 = numpy.random.RandomState(23245)
    rng4 = numpy.random.RandomState(12123)
    rng5 = numpy.random.RandomState(25365)
    rng6 = numpy.random.RandomState(15323)
    train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4'])
    valid_set_x, valid_set_y = load_cifar_data(['data_batch_5'],WHICHSET='valid')
    test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test')

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    index = T.lscalar()

    x = T.matrix('x')
    y = T.ivector('y')

    img_input = x.reshape((batch_size,3,32,32))
    img_input = img_input.dimshuffle(1,2,3,0)
####define the layers:
    conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input,
                                    filter_shape=(3,5,5,32),
                                    image_shape=(3,32,32,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.0001,initB=0,partial_sum=4,
                                    pooling='max',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004
                                    )

    conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool1.output,
                                    filter_shape=(32,5,5,32),
                                    image_shape=(32,16,16,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)
    conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool2.output,
                                    filter_shape=(32,5,5,64),
                                    image_shape=(32,8,8,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)

    layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2)
    #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0)
    fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0,
                        epsW=0.001,
                        epsB=0.002,
                        momW=0.9,
                        momB=0.9,
                        wc=0.03)
    fc_10 = LogisticRegression(input=fc_64.output,rng=rng5,n_in=64,n_out=10,initW=0.1,
                               epsW=0.001,
                                epsB=0.002,
                                momW=0.9,
                                momB=0.9,
                                wc=0.03)
####build the models:
    cost = fc_10.negative_log_likelihood(y)
    test_model = theano.function([index], fc_10.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], fc_10.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    Ws = [conv_pool1.W, conv_pool2.W, conv_pool3.W, fc_64.W, fc_10.W]
    pgradWs = [conv_pool1.grad_W, conv_pool2.grad_W, conv_pool3.grad_W, fc_64.grad_W, fc_10.grad_W]

    bs = [conv_pool1.b, conv_pool2.b, conv_pool3.b, fc_64.b, fc_10.b]
    pgradbs = [conv_pool1.grad_b, conv_pool2.grad_b, conv_pool3.grad_b, fc_64.grad_b, fc_10.grad_b]

    momWs = [conv_pool1.momW, conv_pool2.momW, conv_pool3.momW, fc_64.momW, fc_10.momW]
    momBs = [conv_pool1.momB, conv_pool2.momB, conv_pool3.momB, fc_64.momB, fc_10.momB]
    wcs = [conv_pool1.wc, conv_pool2.wc, conv_pool3.wc, fc_64.wc, fc_10.wc]
    epsWs = [conv_pool1.epsW, conv_pool2.epsW, conv_pool3.epsW, fc_64.epsW, fc_10.epsW]
    epsBs = [conv_pool1.epsB, conv_pool2.epsB, conv_pool3.epsB, fc_64.epsB, fc_10.epsB]

    gradWs = T.grad(cost, Ws)
    gradbs = T.grad(cost, bs)
    updates = []
    for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
        grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
        updates.append((W_i, W_i+grad_i))
        updates.append((pgW_i,grad_i))

    for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
        grad_i = - epsB_i*gradb_i + momB_i*pgB_i
        updates.append((b_i, b_i+grad_i))
        updates.append((pgB_i,grad_i))







    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        #below is the code for reduce learning_rate
        ###########################################

        if epoch == 50:
            epsWs = [k/10.0 for k in epsWs]
            epsBs = [k/10.0 for k in epsBs]
            print 'reduce eps by a factor of 10'
            updates = []
            for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
                grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
                updates.append((W_i, W_i+grad_i))
                updates.append((pgW_i,grad_i))

            for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
                grad_i = - epsB_i*gradb_i + momB_i*pgB_i
                updates.append((b_i, b_i+grad_i))
                updates.append((pgB_i,grad_i))
            train_model = theano.function([index], cost, updates=updates,
              givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]})

        ##############################################
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    conv_pool1.bestW = conv_pool1.W.get_value().copy()
                    conv_pool1.bestB = conv_pool1.b.get_value().copy()
                    conv_pool2.bestW = conv_pool2.W.get_value().copy()
                    conv_pool2.bestB = conv_pool2.b.get_value().copy()
                    conv_pool3.bestW = conv_pool3.W.get_value().copy()
                    conv_pool3.bestB = conv_pool3.b.get_value().copy()
                    fc_64.bestW = fc_64.W.get_value().copy()
                    fc_64.bestB = fc_64.b.get_value().copy()
                    fc_10.bestW = fc_10.W.get_value().copy()
                    fc_10.bestB = fc_10.b.get_value().copy()

                    ##saving current best
                    print 'saving current best params..'
                    current_params = (conv_pool1.bestW,conv_pool1.bestB,conv_pool2.bestW,
                    conv_pool2.bestB,conv_pool3.bestW,conv_pool3.bestB,fc_64.bestW,fc_64.bestB,
                    fc_10.bestW,fc_10.bestB,momWs,momBs,epsWs,epsBs,wcs)
                    outfile = file('current_best_params.pkl','wb')
                    cPickle.dump(current_params,outfile)
                    outfile.close()


                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #5
0
embeddings = theano.shared(numpy.array(wordvectors,
                                       dtype=theano.config.floatX)).dimshuffle(
                                           1, 0)

batchsizeVar = numSamples.shape[0]
y_resh = y.reshape((batchsizeVar, ))  # rel:e1->e2
y1ET_resh = y1ET.reshape((batchsizeVar, ))
y2ET_resh = y2ET.reshape((batchsizeVar, ))

numSamples_resh = numSamples.reshape((batchsizeVar, ))

layers = []

cnnContext = LeNetConvPoolLayer(rng=rng,
                                filter_shape=(nkernsContext, 1,
                                              representationsize,
                                              filtersizeContext),
                                poolsize=(1, kmaxContext))
layers.append(cnnContext)
if "middleContext" in config:
    hidden_in = nkernsContext * kmaxContext
else:
    cnnEntities = LeNetConvPoolLayer(rng=rng,
                                     filter_shape=(nkernsEntities, 1,
                                                   representationsize,
                                                   filtersizeEntities),
                                     poolsize=(1, kmaxEntities))
    layers.append(cnnEntities)
    hidden_in = 2 * (2 * nkernsContext * kmaxContext +
                     nkernsEntities * kmaxEntities)
hiddenLayer = HiddenLayer(rng=rng, n_in=hidden_in, n_out=hiddenUnits)
예제 #6
0
    in_height = 50
    # filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=signals_shape,
        filter_shape=filters_shape,
        poolsize=(2, 2),
    )

    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
        filter_shape=(60, flt_channels, 3, 3),
        poolsize=(2, 2),
    )

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
def prepare_network():

    rng = numpy.random.RandomState(23455)

    print('Preparing Theano model...')

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    # allocate symbolic variables for the data
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) // 2
    layer1_h = (layer0_h - 4) // 2
    layer2_w = (layer1_w - 2) // 2
    layer2_h = (layer1_h - 2) // 2
    layer3_w = (layer2_w - 2) // 2
    layer3_h = (layer2_h - 2) // 2

    ######################
    # BUILD NETWORK #
    ######################
    # image sizes
    batchsize = 1
    in_channels = 20
    in_width = 50
    in_height = 50
    #filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=signals_shape,
                                filter_shape=filters_shape,
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batchsize, flt_channels, layer1_w,
                                             layer1_h),
                                filter_shape=(60, flt_channels, 3, 3),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batchsize, 60, layer2_w,
                                             layer2_h),
                                filter_shape=(90, 60, 3, 3),
                                poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=90 * layer3_w * layer3_h,
                         n_out=500,
                         activation=T.tanh)

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batchsize:(index + 1) * batchsize],
            y: test_set_y[index * batchsize:(index + 1) * batchsize]
        })

    print('Loading network weights...')
    weightFile = '../live_count/weights.save'
    f = open(weightFile, 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(pickle.load(f))
    f.close()
    layer0.__setstate__(loaded_objects[0])
    layer1.__setstate__(loaded_objects[1])
    layer2.__setstate__(loaded_objects[2])
    layer3.__setstate__(loaded_objects[3])
    layer4.__setstate__(loaded_objects[4])

    return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize
    def __init__(self, configfile, train=False):

        self.slotList = [
            "N", "per:age", "per:alternate_names", "per:children",
            "per:cause_of_death", "per:date_of_birth", "per:date_of_death",
            "per:employee_or_member_of", "per:location_of_birth",
            "per:location_of_death", "per:locations_of_residence",
            "per:origin", "per:schools_attended", "per:siblings", "per:spouse",
            "per:title", "org:alternate_names", "org:date_founded",
            "org:founded_by", "org:location_of_headquarters", "org:members",
            "org:parents", "org:top_members_employees"
        ]

        typeList = [
            "O", "PERSON", "LOCATION", "ORGANIZATION", "DATE", "NUMBER"
        ]

        self.config = readConfig(configfile)

        self.addInputSize = 1
        logger.info("additional mlp input")

        wordvectorfile = self.config["wordvectors"]
        logger.info("wordvectorfile " + wordvectorfile)
        networkfile = self.config["net"]
        logger.info("networkfile " + networkfile)
        hiddenunits = int(self.config["hidden"])
        logger.info("hidden units " + str(hiddenunits))
        hiddenunitsNer = hiddenunits
        if "hiddenunitsNER" in self.config:
            hiddenunitsNer = int(self.config["hiddenunitsNER"])
        representationsizeNER = 50
        if "representationsizeNER" in self.config:
            representationsizeNER = int(self.config["representationsizeNER"])
        learning_rate = float(self.config["lrate"])
        logger.info("learning rate " + str(learning_rate))
        if train:
            self.batch_size = int(self.config["batchsize"])
        else:
            self.batch_size = 1
        logger.info("batch size " + str(self.batch_size))
        self.filtersize = [1, int(self.config["filtersize"])]
        nkerns = [int(self.config["nkerns"])]
        logger.info("nkerns " + str(nkerns))
        pool = [1, int(self.config["kmax"])]

        self.contextsize = int(self.config["contextsize"])
        logger.info("contextsize " + str(self.contextsize))

        if self.contextsize < self.filtersize[1]:
            logger.info("setting filtersize to " + str(self.contextsize))
            self.filtersize[1] = self.contextsize
        logger.info("filtersize " + str(self.filtersize))

        sizeAfterConv = self.contextsize - self.filtersize[1] + 1

        sizeAfterPooling = -1
        if sizeAfterConv < pool[1]:
            logger.info("setting poolsize to " + str(sizeAfterConv))
            pool[1] = sizeAfterConv
        sizeAfterPooling = pool[1]
        logger.info("kmax pooling: k = " + str(pool[1]))

        # reading word vectors
        self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

        self.representationsize = self.vectorsize + 1

        rng = numpy.random.RandomState(
            23455
        )  # not relevant, parameters will be overwritten by stored model anyways
        if train:
            seed = rng.get_state()[1][0]
            logger.info("seed: " + str(seed))

        numSFclasses = 23
        numNERclasses = 6

        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.xa = T.matrix('xa')  # left context
        self.xb = T.matrix('xb')  # middle context
        self.xc = T.matrix('xc')  # right context
        self.y = T.imatrix('y')  # label (only present in training)
        self.yNER1 = T.imatrix(
            'yNER1')  # label for first entity (only present in training)
        self.yNER2 = T.imatrix(
            'yNER2')  # label for second entity (only present in training)
        ishape = [self.representationsize,
                  self.contextsize]  # this is the size of context matrizes

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        logger.info('... building the model')

        # Reshape input matrix to be compatible with LeNetConvPoolLayer
        layer0a_input = self.xa.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))
        layer0b_input = self.xb.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))
        layer0c_input = self.xc.reshape(
            (self.batch_size, 1, ishape[0], ishape[1]))

        y_reshaped = self.y.reshape((self.batch_size, 1))
        yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
        yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

        # Construct convolutional pooling layer:
        filter_shape = (nkerns[0], 1, self.representationsize,
                        self.filtersize[1])
        poolsize = (pool[0], pool[1])
        fan_in = numpy.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        # the convolution weight matrix
        convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                        high=W_bound,
                                                        size=filter_shape),
                                            dtype=theano.config.floatX),
                              borrow=True)
        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        convB = theano.shared(value=b_values, borrow=True)

        self.layer0a = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0a_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)
        self.layer0b = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0b_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)
        self.layer0c = LeNetConvPoolLayer(rng,
                                          W=convW,
                                          b=convB,
                                          input=layer0c_input,
                                          image_shape=(self.batch_size, 1,
                                                       ishape[0], ishape[1]),
                                          filter_shape=filter_shape,
                                          poolsize=poolsize)

        layer0aflattened = self.layer0a.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0bflattened = self.layer0b.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0cflattened = self.layer0c.output.flatten(2).reshape(
            (self.batch_size, nkerns[0] * sizeAfterPooling))
        layer0outputSF = T.concatenate(
            [layer0aflattened, layer0bflattened, layer0cflattened], axis=1)
        layer0outputSFsize = 3 * (nkerns[0] * sizeAfterPooling)

        layer0outputNER1 = T.concatenate([layer0aflattened, layer0bflattened],
                                         axis=1)
        layer0outputNER2 = T.concatenate([layer0bflattened, layer0cflattened],
                                         axis=1)
        layer0outputNERsize = 2 * (nkerns[0] * sizeAfterPooling)

        layer2ner1 = HiddenLayer(rng,
                                 input=layer0outputNER1,
                                 n_in=layer0outputNERsize,
                                 n_out=hiddenunitsNer,
                                 activation=T.tanh)
        layer2ner2 = HiddenLayer(rng,
                                 input=layer0outputNER2,
                                 n_in=layer0outputNERsize,
                                 n_out=hiddenunitsNer,
                                 activation=T.tanh,
                                 W=layer2ner1.W,
                                 b=layer2ner1.b)

        # concatenate additional features to sentence representation
        self.additionalFeatures = T.matrix('additionalFeatures')
        self.additionalFeatsShaped = self.additionalFeatures.reshape(
            (self.batch_size, 1))

        layer2SFinput = T.concatenate(
            [layer0outputSF, self.additionalFeatsShaped], axis=1)
        layer2SFinputSize = layer0outputSFsize + self.addInputSize

        layer2SF = HiddenLayer(rng,
                               input=layer2SFinput,
                               n_in=layer2SFinputSize,
                               n_out=hiddenunits,
                               activation=T.tanh)

        # classify the values of the fully-connected sigmoidal layer
        layer3rel = LogisticRegression(input=layer2SF.output,
                                       n_in=hiddenunits,
                                       n_out=numSFclasses)
        layer3et = LogisticRegression(input=layer2ner1.output,
                                      n_in=hiddenunitsNer,
                                      n_out=numNERclasses)

        scoresForR1 = layer3rel.getScores(layer2SF.output)
        scoresForE1 = layer3et.getScores(layer2ner1.output)
        scoresForE2 = layer3et.getScores(layer2ner2.output)

        self.crfLayer = CRF(numClasses=numSFclasses + numNERclasses,
                            rng=rng,
                            batchsizeVar=self.batch_size,
                            sequenceLength=3)

        scores = T.zeros((self.batch_size, 3, numSFclasses + numNERclasses))
        scores = T.set_subtensor(scores[:, 0, numSFclasses:], scoresForE1)
        scores = T.set_subtensor(scores[:, 1, :numSFclasses], scoresForR1)
        scores = T.set_subtensor(scores[:, 2, numSFclasses:], scoresForE2)
        self.scores = scores

        self.y_conc = T.concatenate([
            yNER1reshaped + numSFclasses, y_reshaped,
            yNER2reshaped + numSFclasses
        ],
                                    axis=1)

        # create a list of all model parameters
        self.paramList = [
            self.crfLayer.params, layer3rel.params, layer3et.params,
            layer2SF.params, layer2ner1.params, self.layer0a.params
        ]
        self.params = []
        for p in self.paramList:
            self.params += p
            logger.info(p)

        if not train:
            self.gotNetwork = 1
            # load parameters
            if not os.path.isfile(networkfile):
                logger.error("network file does not exist")
                self.gotNetwork = 0
            else:
                save_file = open(networkfile, 'rb')
                for p in self.params:
                    p.set_value(cPickle.load(save_file), borrow=False)
                save_file.close()

        self.relation_scores_global = self.crfLayer.getProbForClass(
            self.scores, numSFclasses)
        self.predictions_global = self.crfLayer.getPrediction(self.scores)
예제 #9
0
def train_rep(
    learning_rate=0.002,
    L1_reg=0.0002,
    L2_reg=0.005,
    n_epochs=200,
    nkerns=[20, 50],
    batch_size=25,
):

    rng = numpy.random.RandomState(23455)

    train_dir = "../out/h5/"
    valid_dir = "../out/h5/"

    weights_dir = "./weights/"

    print("... load input data")
    filename = train_dir + "rep_train_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    train_set_x, train_set_y, shared_train_set_y = datasets

    filename = valid_dir + "rep_valid_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    valid_set_x, valid_set_y, shared_valid_set_y = datasets

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets

    # compute number of minibatches for training, validation and testing
    n_all_train_batches = 30000
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_all_train_batches /= batch_size
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of
    # [int] labels

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) / 2
    layer1_h = (layer0_h - 4) / 2
    layer2_w = (layer1_w - 2) / 2
    layer2_h = (layer1_h - 2) / 2
    layer3_w = (layer2_w - 2) / 2
    layer3_h = (layer2_h - 2) / 2

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # image sizes
    batchsize = batch_size
    in_channels = 20
    in_width = 50
    in_height = 50
    # filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=signals_shape,
        filter_shape=filters_shape,
        poolsize=(2, 2),
    )

    # TODO: incase of flt_time < in_time the output dimension will be different
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
        filter_shape=(60, flt_channels, 3, 3),
        poolsize=(2, 2),
    )

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, 60, layer2_w, layer2_h),
        filter_shape=(90, 60, 3, 3),
        poolsize=(2, 2),
    )
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=90 * layer3_w * layer3_h,
        n_out=500,
        activation=T.tanh,
    )

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size : (index + 1) * batch_size],
            y: test_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size : (index + 1) * batch_size],
            y: valid_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = (
        layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    )

    # symbolic Theano variable that represents the L1 regularization term
    L1 = (
        T.sum(abs(layer4.params[0]))
        + T.sum(abs(layer3.params[0]))
        + T.sum(abs(layer2.params[0]))
        + T.sum(abs(layer1.params[0]))
        + T.sum(abs(layer0.params[0]))
    )
    # symbolic Theano variable that represents the squared L2 term
    L2_sqr = (
        T.sum(layer4.params[0] ** 2)
        + T.sum(layer3.params[0] ** 2)
        + T.sum(layer2.params[0] ** 2)
        + T.sum(layer1.params[0] ** 2)
        + T.sum(layer0.params[0] ** 2)
    )
    # the loss
    cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size : (index + 1) * batch_size],
            y: train_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")

    start_time = time.clock()

    epoch = 0
    done_looping = False
    cost_ij = 0
    train_files_num = 600
    val_files_num = 100

    startc = time.clock()
    while (epoch < n_epochs) and (not done_looping):
        endc = time.clock()
        print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0)))
        startc = time.clock()
        epoch = epoch + 1
        for nTrainSet in range(1, train_files_num + 1):
            # load next train data
            if nTrainSet % 50 == 0:
                print("training @ nTrainSet =  ", nTrainSet, ", cost = ", cost_ij)
            filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_train_set_x, ns_train_set_y = datasets
            train_set_x.set_value(ns_train_set_x, borrow=True)
            shared_train_set_y.set_value(
                numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_train_batches = train_set_x.get_value(borrow=True).shape[0]
            n_train_batches /= batch_size

            # train
            for minibatch_index in range(n_train_batches):

                # training itself
                # --------------------------------------
                cost_ij = train_model(minibatch_index)
                # -------------------------

        # at the end of each epoch run validation
        this_validation_loss = 0
        for nValSet in range(1, val_files_num + 1):
            filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_valid_set_x, ns_valid_set_y = datasets
            valid_set_x.set_value(ns_valid_set_x, borrow=True)
            shared_valid_set_y.set_value(
                numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
            n_valid_batches /= batch_size

            # compute zero-one loss on validation set
            validation_losses = [validate_model(i) for i in range(n_valid_batches)]
            this_validation_loss += numpy.mean(validation_losses)
        this_validation_loss /= val_files_num
        print((
            "epoch %i, minibatch %i/%i, validation error %f %%"
            % (
                epoch,
                minibatch_index + 1,
                n_train_batches,
                this_validation_loss * 100.0,
            )
        ))

        # save snapshots
        print("saving weights state, epoch = ", epoch)
        f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb")
        state_L0 = layer0.__getstate__()
        pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L1 = layer1.__getstate__()
        pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L2 = layer2.__getstate__()
        pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L3 = layer3.__getstate__()
        pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L4 = layer4.__getstate__()
        pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL)
        f.close()

    end_time = time.clock()
    print ("Optimization complete.")
    print((
        "The code for file "
        + os.path.split(__file__)[1]
        + " ran for %.2fm" % ((end_time - start_time) / 60.0)
    ), file=sys.stderr)
예제 #10
0
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
# the convolution weight matrix
convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                high=W_bound,
                                                size=filter_shape),
                                    dtype=theano.config.floatX),
                      name='conv_W',
                      borrow=True)

# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
convB = theano.shared(value=b_values, name='conv_b', borrow=True)

layer0 = LeNetConvPoolLayer(rng,
                            W=convW,
                            b=convB,
                            input=layer0_input,
                            filter_shape=filter_shape,
                            poolsize=poolsize)

layer0flattened = layer0.output.flatten(2).reshape(
    (batch_size_var, nkerns[0] * sizeAfterPooling))
layer0outputsize = nkerns[0] * sizeAfterPooling

if "internalOnH" in attentionMethod:
    layer1 = AttentionLayer(rng,
                            thisInput=layer0.conv_out_tanh,
                            batchsize=batch_size_var,
                            dim1=nkerns[0],
                            dim2=sizeAfterConv,
                            method=attentionMethod,
                            k=kattention)
예제 #11
0
def main(args):
    # initial parameters
    embedding_size = args.embedding_size
    mention_context_size = args.mention_context_size
    type_context_size = args.type_context_size
    embedding_file = args.embedding_path
    hidden_units = args.hidden_units
    learning_rate = args.learning_rate
    margin = args.margin
    batch_size = args.batch_size
    n_epochs = args.num_epochs

    relation_size = 82
    nkerns = [500]
    filter_size = [1, 1]
    pool = [1, 1]
    l1 = 0.000001
    l2 = 0.000002

    newbob = False
    network_file = args.model_path
    test_file = args.test
    test_result_file = args.test_result
    label_file = args.ontology_path
    label_file_norm = args.norm_ontology_path
    relation_file = args.relation_path
    train_type_flag = args.seen_types

    tup_representation_size = embedding_size * 2

    # load word vectors
    word_vectors, vector_size = load_word_vec(embedding_file)

    # read train and dev file
    print("start loading train and dev file ... ")
    doc_id_list_test, type_list_test, trigger_list_test, left_word_list_test, relation_list_test, \
        right_word_list_test = load_training_data(test_file)

    print("start loading arg and relation files ... ")
    all_type_list, all_type_structures = load_types_1(label_file_norm)
    rel_index, index_rel = read_relation_index(relation_file)
    type_size = len(all_type_list)

    # using a matrix to represent each relation
    relation_matrix = random_init_rel_vec_factor(
        relation_file, tup_representation_size * tup_representation_size)

    train_types = get_types_for_train(train_type_flag, label_file)

    # prepare data structure
    print("start preparing data structures ... ")
    curSeed = 23455
    rng = numpy.random.RandomState(curSeed)
    seed = rng.get_state()[1][0]
    print("seed: ", seed)

    result_index_test_matrix, result_vector_test_matrix, input_context_test_matrix, input_trigger_test_matrix, \
        relation_binary_test_matrix, pos_neg_test_matrix = input_matrix_1_test(
            type_list_test, trigger_list_test, left_word_list_test, relation_list_test, right_word_list_test,
            embedding_size, mention_context_size, relation_size, label_file, word_vectors, rel_index, train_type_flag)

    input_type_matrix, input_type_structure_matrix = type_matrix(
        all_type_list, all_type_structures, embedding_file, type_context_size)

    time1 = time.time()
    dt = theano.config.floatX
    test_set_content = theano.shared(
        numpy.matrix(input_context_test_matrix, dtype=dt))
    test_set_trigger = theano.shared(
        numpy.matrix(input_trigger_test_matrix, dtype=dt))
    test_set_relation_binary = theano.shared(
        numpy.matrix(relation_binary_test_matrix, dtype=dt))
    test_set_posneg = theano.shared(numpy.matrix(pos_neg_test_matrix,
                                                 dtype=dt))
    test_set_y = theano.shared(
        numpy.array(result_index_test_matrix, dtype=numpy.dtype(numpy.int32)))
    test_set_y_vector = theano.shared(
        numpy.matrix(result_vector_test_matrix, dtype=dt))

    train_set_type = theano.shared(numpy.matrix(input_type_matrix, dtype=dt))
    train_set_type_structure = theano.shared(
        numpy.matrix(input_type_structure_matrix, dtype=dt))

    train_types = theano.shared(numpy.matrix(train_types, dtype=dt))

    # compute number of minibatches for training, validation and testing
    n_test_batches = input_trigger_test_matrix.shape[0]
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x_content = T.matrix(
        'x_content')  # the data is presented as rasterized images
    x_trigger = T.matrix(
        'x_trigger')  # the data is presented as rasterized images
    x_relation_binary = T.matrix('x_relation_binary')
    x_pos_neg_flag = T.matrix('x_pos_neg_flag')
    x_type = T.matrix('x_type')
    x_type_structure = T.matrix('x_type_structure')
    y = T.ivector('y')  # the labels are presented as 1D vector of
    y_vector = T.matrix('y_vector')  # the labels are presented as 1D vector of
    x_train_types = T.matrix('x_train_types')

    # [int] labels
    i_shape = [tup_representation_size,
               mention_context_size]  # this is the size of context matrizes

    time2 = time.time()
    print("time for preparing data structures: ", time2 - time1)

    # build actual model

    print('start building the model ... ')
    time1 = time.time()

    rel_w = theano.shared(value=relation_matrix, borrow=True)  ## 26*400

    # Construct the mention structure input Layer
    layer0_input = x_content.reshape((batch_size, 1, i_shape[0], i_shape[1]))
    layer0_input_binary_relation = x_relation_binary.reshape(
        (batch_size, 1, relation_size, i_shape[1]))  ## 100*1*26*5

    # compose amr relation matrix to each tuple
    compose_layer = ComposeLayerMatrix(
        input=layer0_input,
        input_binary_relation=layer0_input_binary_relation,
        rel_w=rel_w,
        rel_vec_size=tup_representation_size)
    layer1_input = compose_layer.output

    # initialize the convolution weight matrix
    filter_shape = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size = (pool[0], pool[1])

    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
               numpy.prod(pool_size))

    w_bound = numpy.sqrt(6. / (fan_in + fan_out))

    conv_w = theano.shared(numpy.asarray(rng.uniform(low=-w_bound,
                                                     high=w_bound,
                                                     size=filter_shape),
                                         dtype=theano.config.floatX),
                           borrow=True)

    b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
    conv_b = theano.shared(value=b_values, borrow=True)

    # conv with pool layer
    layer1_conv = LeNetConvPoolLayer(rng,
                                     W=conv_w,
                                     b=conv_b,
                                     input=layer1_input,
                                     image_shape=(batch_size, 1, i_shape[0],
                                                  i_shape[1]),
                                     filter_shape=filter_shape,
                                     poolsize=pool_size)

    layer1_output = layer1_conv.output
    layer1_flattened = layer1_output.flatten(2)

    trigger_features_shaped = x_trigger.reshape((batch_size, embedding_size))

    layer2_input = T.concatenate([layer1_flattened, trigger_features_shaped],
                                 axis=1)

    # Construct the type structure input Layer
    layer_type_input = x_type_structure.reshape(
        (type_size, 1, tup_representation_size, type_context_size))
    filter_shape_type = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size_type = (pool[0], pool[1])

    # initialize the implicit relation tensor
    type_tensor_shape = (tup_representation_size, tup_representation_size,
                         tup_representation_size)
    type_tensor_w = theano.shared(numpy.asarray(rng.uniform(
        low=-w_bound, high=w_bound, size=type_tensor_shape),
                                                dtype=theano.config.floatX),
                                  borrow=True)

    # compose relation tensor to each tuple
    compose_type_layer = ComposeLayerTensor(input=layer_type_input,
                                            tensor=type_tensor_w)
    layer_type_input1 = compose_type_layer.output

    # conv with pool layer
    layer1_conv_type = LeNetConvPoolLayer(rng,
                                          W=conv_w,
                                          b=conv_b,
                                          input=layer_type_input1,
                                          image_shape=(type_size, 1,
                                                       tup_representation_size,
                                                       type_context_size),
                                          filter_shape=filter_shape_type,
                                          poolsize=pool_size_type)

    layer1_type_output = layer1_conv_type.output
    layer1_type_flattened = layer1_type_output.flatten(2)

    types_shaped = x_type.reshape((type_size, embedding_size))

    layer2_type_input = T.concatenate([layer1_type_flattened, types_shaped],
                                      axis=1)
    layer2_type_input_size = nkerns[0]**pool[1] + embedding_size

    # ranking based max margin loss layer
    train_types_signal = x_train_types.reshape((type_size, 1))
    pos_neg_flag = x_pos_neg_flag.reshape((batch_size, 1))

    layer3 = MaxRankingMarginCosine1(rng=rng,
                                     input=layer2_input,
                                     input_label=layer2_type_input,
                                     true_label=y_vector,
                                     n_in=layer2_type_input_size,
                                     margin=margin,
                                     batch_size=batch_size,
                                     type_size=type_size,
                                     train_type_signal=train_types_signal,
                                     pos_neg_flag=pos_neg_flag)
    cost = layer3.loss

    # create a list of all model parameters to be fit by gradient descent
    param_list = [
        compose_layer.params, layer1_conv.params, compose_type_layer.params
    ]

    params = []
    for p in param_list:
        params += p

    # the cost we minimize during training is the NLL of the model
    lambd1 = T.scalar('lambda1', dt)
    lambd2 = T.scalar('lambda2', dt)

    # L1 and L2 regularization possible
    reg2 = 0
    reg1 = 0
    for p in param_list:
        reg2 += T.sum(p[0]**2)
        reg1 += T.sum(abs(p[0]))

    cost += lambd2 * reg2
    cost += lambd1 * reg1

    lr = T.scalar('lr', dt)

    start = index * batch_size
    end = (index + 1) * batch_size

    testVariables = {}
    testVariables[x_content] = test_set_content[start:end]
    testVariables[x_trigger] = test_set_trigger[start:end]
    testVariables[x_relation_binary] = test_set_relation_binary[start:end]
    testVariables[x_type] = train_set_type
    testVariables[x_type_structure] = train_set_type_structure
    testVariables[y] = test_set_y[start:end]
    testVariables[y_vector] = test_set_y_vector[start:end]
    testVariables[x_train_types] = train_types
    testVariables[x_pos_neg_flag] = test_set_posneg[start:end]

    print("length of train variables ", len(testVariables))

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by SGD Since this model has many parameters,
    # it would be tedious to manually create an update rule for each model parameter. We thus create the updates
    # list by automatically looping over all (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - lr * grad_i))

    test_model_confidence = theano.function([index],
                                            layer3.results(y),
                                            on_unused_input='ignore',
                                            givens=testVariables)

    time2 = time.time()
    print("time for building the model: ", time2 - time1)

    print("loading saved network")
    netfile = open(network_file)

    relW = cPickle.load(netfile)
    compose_layer.params[0].set_value(relW, borrow=True)

    convolW = cPickle.load(netfile)
    convolB = cPickle.load(netfile)
    layer1_conv.params[0].set_value(convolW, borrow=True)
    layer1_conv.params[1].set_value(convolB, borrow=True)
    layer1_conv_type.params[0].set_value(convolW, borrow=True)
    layer1_conv_type.params[1].set_value(convolB, borrow=True)

    typeW = cPickle.load(netfile)
    compose_type_layer.params[0].set_value(typeW, borrow=True)
    netfile.close()

    print("finish loading network")

    test_batch_size = 100
    all_batches = len(result_index_test_matrix) / test_batch_size

    confidence_prob = []
    confidence_value = []
    confidence_list = []
    confidence = [test_model_confidence(i) for i in xrange(all_batches)]
    for r in range(0, len(confidence)):
        for r1 in range(0, test_batch_size):
            hypo_result = confidence[r][0].item(r1)
            confidence_prob.append(confidence[r][2][r1])
            confidence_value.append(confidence[r][1][r1])
            confidence_list.append(hypo_result)

    y_pred = confidence_list

    f = open(test_result_file, "w")
    for i in range(0, len(y_pred)):
        f.write(str(y_pred[i]) + "\t" + str(confidence_value[i]) + "\t")
        for j in range(0, type_size):
            f.write(str(confidence_prob[i][j]) + " ")
        f.write("\n")

    f.close()
예제 #12
0
    def build_model(self, flag_preserve_params=False):
    
        
        ###################
        # build the model #
        logging.info('... building the model')
        
        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')   # the data is presented as rasterized images
        
        # self.y = T.ivector('y')  
        # the labels are presented as 1D vector of
        # [int] labels, used to represent labels given by 
        # data
        
        # the y as features, used for taking in intermediate layer "y" values                    
        self.y = T.matrix('y')   
        

        
        # Reshape matrix of rasterized images of shape (batch_size,28*28)
        # to a 4D tensor, compatible with our LeNetConvPoolLayer
        self.layer0_input = self.x.reshape((self.batch_size, self.img_dim, self.img_size, self.img_size))

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
        # maxpooling reduces this further to (24/2,24/2) = (12,12)
        # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
        self.layer0 = LeNetConvPoolLayer(self.rng, input=self.layer0_input,
                                         image_shape=(self.batch_size, self.img_dim, self.img_size, self.img_size),
                                         filter_shape=(self.nkerns[0], self.img_dim, 
                                                       self.filtersize[0], self.filtersize[0]),
                                         poolsize=(self.poolsize[0], self.poolsize[0]),
                                         activation=self.conv_activation)

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
        # maxpooling reduces this further to (8/2,8/2) = (4,4)
        # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
        
        self.img_size1 = (self.img_size - self.filtersize[0] + 1) / self.poolsize[0]
        
        self.layer1 = LeNetConvPoolLayer(self.rng, input=self.layer0.output,
                                         image_shape=(self.batch_size, self.nkerns[0], 
                                                      self.img_size1, self.img_size1),
                                         filter_shape=(self.nkerns[1], self.nkerns[0], 
                                                       self.filtersize[1], self.filtersize[1]), 
                                         poolsize=(self.poolsize[1], self.poolsize[1]),
                                         activation=self.conv_activation)

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (20,32*4*4) = (20,512)
        self.layer2_input = self.layer1.output.flatten(2)
        
        self.img_size2 = (self.img_size1 - self.filtersize[1] + 1) / self.poolsize[1]
        # construct a fully-connected sigmoidal layer
        self.layer2 = HiddenLayer(self.rng, input=self.layer2_input, 
                                  n_in=self.nkerns[1] * self.img_size2 * self.img_size2,
                                  n_out=self.num_hidden, 
                                  activation=self.hidden_activation)

        # classify the values of the fully-connected sigmoidal layer
        self.layer3 = LogisticRegression(input=self.layer2.output, 
                                         n_in=self.num_hidden, 
                                         n_out=self.num_class,
                                         activation=self.logreg_activation)
        
        
        # regularization term
        self.decay_hidden = self.alpha_l1 * abs(self.layer2.W).sum() + \
            self.alpha_l2 * (self.layer2.W ** 2).sum()
            
        self.decay_softmax = self.alpha_l1 * abs(self.layer3.W).sum() + \
            self.alpha_l2 * (self.layer3.W ** 2).sum()
        
        
        # there's different choices of cost models
        if self.cost_type == 'nll_softmax':
            # the cost we minimize during training is the NLL of the model
            self.y = T.ivector('y')  # index involved so has to use integer
            self.cost = self.layer3.negative_log_likelihood(self.y) + \
                self.decay_hidden + self.decay_softmax + \
                self.alpha_entropy * self.layer3.p_y_entropy
                
                
        elif self.cost_type == 'ssd_softmax':
            self.cost = T.mean((self.layer3.p_y_given_x - self.y) ** 2) + \
                self.decay_hidden + self.decay_softmax
            
        elif self.cost_type == 'ssd_hidden':
            self.cost = T.mean((self.layer2.output - self.y) ** 2) + \
                self.decay_hidden
        
        elif self.cost_type == 'ssd_conv':
            self.cost = T.mean((self.layer2_input - self.y) ** 2)
        
        # create a list of all model parameters to be fit by gradient descent
        
        # preserve parameters if the exist, used for keep parameter while 
        # changing
        # some of the theano functions
        # but the user need to be aware that if the parameters should be kept 
        # only if the network structure doesn't change
        
        if flag_preserve_params and hasattr(self, 'params'):
            pass
            params_temp = copy.deepcopy(self.params)
        else:
            params_temp = None
        
        self.params = self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params
            
        # if needed, assign old parameters
        if flag_preserve_params and (params_temp is not None):
            for ind in range(len(params_temp)):
                self.params[ind].set_value(params_temp[ind].get_value(), borrow=True)


        # create a list of gradients for all model parameters
        self.grads = T.grad(self.cost, self.params, disconnected_inputs='warn')
        
        # error function from the last layer logistic regression
        self.errors = self.layer3.errors 
예제 #13
0
def build_lenet(config):
    rng = np.random.RandomState(23455)

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector

    image_width = config.image_width
    batch_size = config.batch_size
    image_size = image_width**2

    x_shared = T.cast(theano.shared(np.random.rand(batch_size, image_size),
                                    borrow=True), theano.config.floatX)
    y_shared = T.cast(theano.shared(np.random.randint(config.ydim,
                                                      size=batch_size),
                                    borrow=True), 'int32')

    layer0_input = x.reshape((batch_size, 1, image_width, image_width))

    # construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, image_width, image_width),
        filter_shape=(config.num_kerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, config.num_kerns[0], 12, 12),
        filter_shape=(config.num_kerns[1], config.num_kerns[0], 5, 5),
        poolsize=(2, 2)
    )

    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=config.num_kerns[1] * 4 * 4,
        n_out=500,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500,
                                n_out=config.ydim)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a list of all model parameters to be fit by gradient descent
    params_W = [layer3.W, layer2.W, layer1.W, layer0.W]
    params_b = [layer3.b, layer2.b, layer1.b, layer0.b]
    params = params_W + params_b

    shared_cost = theano.shared(np.float32(0.0))
    grads_temp = T.grad(cost, params)
    start_compilation = time.time()
    forward_step = theano.function([], [], updates=[(shared_cost, cost)],
                                   givens={x: x_shared, y: y_shared})
    forward_backward_step = theano.function([], grads_temp,
                                            givens={x: x_shared, y: y_shared})
    print 'compilation time: %.4f s' % (time.time() - start_compilation)
    return forward_step, forward_backward_step
# the convolution weight matrix
convW = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                high=W_bound,
                                                size=filter_shape),
                                    dtype=theano.config.floatX),
                      name='conv_W',
                      borrow=True)

# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
convB = theano.shared(value=b_values, name='conv_b', borrow=True)

layer0a = LeNetConvPoolLayer(rng,
                             W=convW,
                             b=convB,
                             input=layer0a_input,
                             image_shape=(xa.shape[0], 1, ishape[0],
                                          ishape[1]),
                             filter_shape=filter_shape,
                             poolsize=poolsize)
layer0b = LeNetConvPoolLayer(rng,
                             W=convW,
                             b=convB,
                             input=layer0b_input,
                             image_shape=(xb.shape[0], 1, ishape[0],
                                          ishape[1]),
                             filter_shape=filter_shape,
                             poolsize=poolsize)
layer0c = LeNetConvPoolLayer(rng,
                             W=convW,
                             b=convB,
                             input=layer0c_input,
def main(args):
    # initial parameters
    embedding_size = args.embedding_size
    arg_context_size = args.arg_context_size
    role_context_size = args.role_context_size
    embedding_file = args.embedding_path
    hidden_units = args.hidden_units
    learning_rate = args.learning_rate
    margin = args.margin
    batch_size = args.batch_size
    n_epochs = args.num_epochs

    relation_size = 82
    nkerns = [500]
    filter_size = [1, 1]
    pool = [1, 1]
    l1 = 0.001
    l2 = 0.002

    newbob = False
    arg_network_file = args.model_path
    arg_train_file = args.train
    arg_dev_file = args.dev
    arg_test_file = args.test
    arg_label_file = args.ontology_path
    arg_label_file_norm = args.norm_ontology_path
    relation_file = args.relation_path
    train_role_flag = args.seen_args
    arg_path_file_merge = args.arg_path_file
    arg_path_file_universal = args.arg_path_file_universal
    trigger_role_matrix_file = args.trigger_role_matrix
    tup_representation_size = embedding_size * 2

    # load word vectors
    word_vectors, vector_size = load_word_vec(embedding_file)

    # read train and dev file
    print("start loading train and dev file ... ")

    arg_trigger_list_train, arg_trigger_type_list_train, arg_list_train, arg_path_left_list_train, \
        arg_path_rel_list_train, arg_path_right_list_train, arg_role_list_train = load_arg_data(arg_train_file)

    arg_trigger_list_dev, arg_trigger_type_list_dev, arg_list_dev, arg_path_left_list_dev, \
        arg_path_rel_list_dev, arg_path_right_list_dev, arg_role_list_dev = load_arg_data(arg_dev_file)

    arg_trigger_list_test, arg_trigger_type_list_test, arg_list_test, arg_path_left_list_test, \
        arg_path_rel_list_test, arg_path_right_list_test, arg_role_list_test = load_arg_data(arg_test_file)

    num_examples_per_epoch = len(arg_trigger_list_train)

    print("start loading arg and relation files ... ")
    all_type_list, all_type_structures = load_types(arg_label_file_norm)
    type_size = len(all_type_list)

    all_arg_role_list, all_type_role_structures, index_2_role, trigger_role_2_index, index_2_norm_role, \
    trigger_norm_role_2_index = load_roles_1(arg_path_file_merge)
    role_size = len(all_arg_role_list)

    trigger_role_matrix = get_trigger_arg_matrix(trigger_role_matrix_file,
                                                 type_size, role_size)
    train_roles = get_roles_for_train_1(train_role_flag, arg_path_file_merge)

    rel_2_index, index_2_rel = read_relation_index(relation_file)
    relation_matrix = random_init_rel_vec_factor(
        relation_file, tup_representation_size * tup_representation_size)

    print("start preparing data structures ... ")
    curSeed = 23455
    rng = numpy.random.RandomState(curSeed)
    seed = rng.get_state()[1][0]
    print("seed: ", seed)

    # arg data matrix
    role_index_train_matrix, role_vector_train_matrix, input_arg_context_train_matrix, input_arg_train_matrix, \
        arg_relation_binary_train_matrix, pos_neg_role_train_matrix, limited_roles_train_matrix = \
        input_arg_matrix(arg_trigger_list_train, arg_trigger_type_list_train, arg_list_train, arg_path_left_list_train,
                         arg_path_rel_list_train, arg_path_right_list_train, arg_role_list_train, word_vectors,
                         all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size,
                         rel_2_index, train_roles, trigger_role_matrix, arg_label_file)

    role_index_dev_matrix, role_vector_dev_matrix, input_arg_context_dev_matrix, input_arg_dev_matrix, \
        arg_relation_binary_dev_matrix, pos_neg_role_dev_matrix, limited_roles_dev_matrix = \
        input_arg_matrix_test(arg_trigger_list_dev, arg_trigger_type_list_dev, arg_list_dev, arg_path_left_list_dev,
                         arg_path_rel_list_dev,arg_path_right_list_dev, arg_role_list_dev, word_vectors,
                         all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size,
                         rel_2_index, train_roles, trigger_role_matrix, arg_label_file)

    role_index_test_matrix, role_vector_test_matrix, input_arg_context_test_matrix, input_arg_test_matrix, \
        arg_relation_binary_test_matrix, pos_neg_role_test_matrix, limited_roles_test_matrix = \
        input_arg_matrix_test(arg_trigger_list_test, arg_trigger_type_list_test, arg_list_test, arg_path_left_list_test,
                              arg_path_rel_list_test, arg_path_right_list_test, arg_role_list_test, word_vectors,
                              all_arg_role_list, trigger_role_2_index, vector_size, arg_context_size, relation_size,
                              rel_2_index, train_roles, trigger_role_matrix, arg_label_file)

    input_role_matrix, input_role_structure_matrix = role_matrix_1(
        all_arg_role_list, all_type_role_structures, embedding_file,
        role_context_size)

    time1 = time.time()
    dt = theano.config.floatX

    ## arg data
    train_set_content_arg = theano.shared(
        numpy.matrix(input_arg_context_train_matrix, dtype=dt))
    valid_set_content_arg = theano.shared(
        numpy.matrix(input_arg_context_dev_matrix, dtype=dt))
    test_set_content_arg = theano.shared(
        numpy.matrix(input_arg_context_test_matrix, dtype=dt))

    train_set_arg = theano.shared(
        numpy.matrix(input_arg_train_matrix, dtype=dt))
    valid_set_arg = theano.shared(numpy.matrix(input_arg_dev_matrix, dtype=dt))
    test_set_arg = theano.shared(numpy.matrix(input_arg_test_matrix, dtype=dt))

    train_set_relation_binary_arg = theano.shared(
        numpy.matrix(arg_relation_binary_train_matrix, dtype=dt))
    valid_set_relation_binary_arg = theano.shared(
        numpy.matrix(arg_relation_binary_dev_matrix, dtype=dt))
    test_set_relation_binary_arg = theano.shared(
        numpy.matrix(arg_relation_binary_test_matrix, dtype=dt))

    train_set_posneg_arg = theano.shared(
        numpy.matrix(pos_neg_role_train_matrix, dtype=dt))
    valid_set_posneg_arg = theano.shared(
        numpy.matrix(pos_neg_role_dev_matrix, dtype=dt))
    test_set_posneg_arg = theano.shared(
        numpy.matrix(pos_neg_role_test_matrix, dtype=dt))

    train_set_arg_y = theano.shared(
        numpy.array(role_index_train_matrix, dtype=numpy.dtype(numpy.int32)))
    valid_set_arg_y = theano.shared(
        numpy.array(role_index_dev_matrix, dtype=numpy.dtype(numpy.int32)))
    test_set_arg_y = theano.shared(
        numpy.array(role_index_test_matrix, dtype=numpy.dtype(numpy.int32)))

    train_set_arg_y_vector = theano.shared(
        numpy.matrix(role_vector_train_matrix, dtype=dt))
    valid_set_arg_y_vector = theano.shared(
        numpy.matrix(role_vector_dev_matrix, dtype=dt))
    test_set_arg_y_vector = theano.shared(
        numpy.matrix(role_vector_test_matrix, dtype=dt))

    train_set_arg_limited_role = theano.shared(
        numpy.matrix(limited_roles_train_matrix, dtype=dt))
    valid_set_arg_limited_role = theano.shared(
        numpy.matrix(limited_roles_dev_matrix, dtype=dt))
    test_set_arg_limited_role = theano.shared(
        numpy.matrix(limited_roles_test_matrix, dtype=dt))

    train_set_role = theano.shared(numpy.matrix(input_role_matrix, dtype=dt))
    train_set_role_structure = theano.shared(
        numpy.matrix(input_role_structure_matrix, dtype=dt))

    train_roles = theano.shared(numpy.matrix(train_roles, dtype=dt))

    # compute number of minibatches for training, validation and testing
    n_train_batches = input_arg_train_matrix.shape[0]
    n_valid_batches = input_arg_dev_matrix.shape[0]
    n_test_batches = input_arg_test_matrix.shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x_content_arg = T.matrix('x_content_arg')
    x_arg = T.matrix('x_arg')
    x_relation_binary_arg = T.matrix('x_relation_binary_arg')
    x_pos_neg_flag_arg = T.matrix('x_pos_neg_flag_arg')
    x_role = T.matrix('x_role')
    x_role_structure = T.matrix('x_role_structure')
    x_train_roles = T.matrix('x_train_roles')
    arg_y = T.ivector('arg_y')
    arg_y_vector = T.matrix('arg_y_vector')
    arg_limited_role = T.matrix('arg_limited_role')

    # [int] labels
    ishape = [tup_representation_size,
              arg_context_size]  # this is the size of context matrizes

    time2 = time.time()
    print("time for preparing data structures: ", time2 - time1)

    # build the actual model
    print('start building the model ... ')
    time1 = time.time()

    # argument representation layer
    layer0_arg_input = x_content_arg.reshape(
        (batch_size, 1, ishape[0], ishape[1]))
    layer0_input_binary_relation = x_relation_binary_arg.reshape(
        (batch_size, 1, relation_size, ishape[1]))  ## 100*1*26*5

    # compose amr relation matrix to each tuple
    rel_w = theano.shared(value=relation_matrix, borrow=True)  ## 26*400
    compose_layer = ComposeLayerMatrix(
        input=layer0_arg_input,
        input_binary_relation=layer0_input_binary_relation,
        rel_w=rel_w,
        rel_vec_size=tup_representation_size)

    layer1_input = compose_layer.output

    filter_shape = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size = (pool[0], pool[1])
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
               numpy.prod(pool_size))

    w_bound = numpy.sqrt(6. / (fan_in + fan_out))
    conv_w = theano.shared(numpy.asarray(rng.uniform(low=-w_bound,
                                                     high=w_bound,
                                                     size=filter_shape),
                                         dtype=theano.config.floatX),
                           borrow=True)
    b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
    conv_b = theano.shared(value=b_values, borrow=True)

    layer1_arg_conv = LeNetConvPoolLayer(rng,
                                         W=conv_w,
                                         b=conv_b,
                                         input=layer1_input,
                                         image_shape=(batch_size, 1, ishape[0],
                                                      arg_context_size),
                                         filter_shape=filter_shape,
                                         poolsize=pool_size)

    layer1_arg_output = layer1_arg_conv.output
    layer1_arg_flattened = layer1_arg_output.flatten(2)
    arg_features_shaped = x_arg.reshape((batch_size, embedding_size))
    layer2_arg_input = T.concatenate(
        [layer1_arg_flattened, arg_features_shaped], axis=1)
    layer2_arg_input_size = nkerns[0] * pool[1] + embedding_size

    # arg role representation layer
    layer_role_input = x_role_structure.reshape(
        (role_size, 1, tup_representation_size, role_context_size))
    filter_shape_role = (nkerns[0], 1, tup_representation_size, filter_size[1])
    pool_size_role = (pool[0], pool[1])

    # initialize the implicit relation tensor
    type_tensor_shape = (tup_representation_size, tup_representation_size,
                         tup_representation_size)
    type_tensor_w = theano.shared(numpy.asarray(rng.uniform(
        low=-w_bound, high=w_bound, size=type_tensor_shape),
                                                dtype=theano.config.floatX),
                                  borrow=True)

    # compose relation tensor to each tuple
    compose_type_layer = ComposeLayerTensor(input=layer_role_input,
                                            tensor=type_tensor_w)
    layer_type_input1 = compose_type_layer.output

    layer1_conv_role = LeNetConvPoolLayer(rng,
                                          W=conv_w,
                                          b=conv_b,
                                          input=layer_type_input1,
                                          image_shape=(role_size, 1,
                                                       tup_representation_size,
                                                       role_context_size),
                                          filter_shape=filter_shape_role,
                                          poolsize=pool_size_role)

    layer1_role_output = layer1_conv_role.output
    layer1_role_flattened = layer1_role_output.flatten(2)

    role_shaped = x_role.reshape((role_size, embedding_size))

    layer2_role_input = T.concatenate([layer1_role_flattened, role_shaped],
                                      axis=1)
    layer2_role_input_size = nkerns[0]**pool[1] + embedding_size

    # ranking based max margin loss layer
    train_roles_signal = x_train_roles.reshape((role_size, 1))
    pos_neg_flag_arg = x_pos_neg_flag_arg.reshape((batch_size, 1))
    limited_role = arg_limited_role.reshape((batch_size, role_size))

    layer3 = MaxRankingMarginCosine1Arg1(rng=rng,
                                         input=layer2_arg_input,
                                         input_label=layer2_role_input,
                                         true_label=arg_y_vector,
                                         n_in=layer2_arg_input_size,
                                         n_in2=layer2_role_input_size,
                                         margin=margin,
                                         batch_size=batch_size,
                                         type_size=role_size,
                                         train_type_signal=train_roles_signal,
                                         pos_neg_flag=pos_neg_flag_arg,
                                         limited_role=limited_role)

    # cost and parameters update
    cost = layer3.loss
    # create a list of all model parameters to be fit by gradient descent
    param_list = [
        layer1_arg_conv.params, compose_layer.params, compose_type_layer.params
    ]

    params = []
    for p in param_list:
        params += p

    # the cost we minimize during training is the NLL of the model
    lambd1 = T.scalar('lambda1', dt)
    lambd2 = T.scalar('lambda2', dt)

    # L1 and L2 regularization possible
    reg2 = 0
    reg1 = 0
    for p in param_list:
        reg2 += T.sum(p[0]**2)
        reg1 += T.sum(abs(p[0]))

    print("reg1 ", reg1)
    print("reg2 ", reg2)

    cost += lambd2 * reg2
    cost += lambd1 * reg1

    lr = T.scalar('lr', dt)

    start = index * batch_size
    end = (index + 1) * batch_size

    validVariables = {}
    validVariables[x_content_arg] = valid_set_content_arg[start:end]
    validVariables[x_arg] = valid_set_arg[start:end]
    validVariables[x_role] = train_set_role
    validVariables[x_role_structure] = train_set_role_structure
    validVariables[x_relation_binary_arg] = valid_set_relation_binary_arg[
        start:end]
    validVariables[arg_y] = valid_set_arg_y[start:end]
    validVariables[arg_y_vector] = valid_set_arg_y_vector[start:end]
    validVariables[x_train_roles] = train_roles
    validVariables[x_pos_neg_flag_arg] = valid_set_posneg_arg[start:end]
    validVariables[arg_limited_role] = valid_set_arg_limited_role[start:end]

    testVariables = {}
    testVariables[x_content_arg] = test_set_content_arg[start:end]
    testVariables[x_arg] = test_set_arg[start:end]
    testVariables[x_role] = train_set_role
    testVariables[x_role_structure] = train_set_role_structure
    testVariables[x_relation_binary_arg] = test_set_relation_binary_arg[
        start:end]
    testVariables[arg_y] = test_set_arg_y[start:end]
    testVariables[arg_y_vector] = test_set_arg_y_vector[start:end]
    testVariables[x_train_roles] = train_roles
    testVariables[x_pos_neg_flag_arg] = test_set_posneg_arg[start:end]
    testVariables[arg_limited_role] = test_set_arg_limited_role[start:end]

    trainVariables = {}
    trainVariables[x_content_arg] = train_set_content_arg[start:end]
    trainVariables[x_arg] = train_set_arg[start:end]
    trainVariables[x_role] = train_set_role
    trainVariables[x_role_structure] = train_set_role_structure
    trainVariables[x_relation_binary_arg] = train_set_relation_binary_arg[
        start:end]
    trainVariables[arg_y] = train_set_arg_y[start:end]
    trainVariables[arg_y_vector] = train_set_arg_y_vector[start:end]
    trainVariables[x_train_roles] = train_roles
    trainVariables[x_pos_neg_flag_arg] = train_set_posneg_arg[start:end]
    trainVariables[arg_limited_role] = train_set_arg_limited_role[start:end]

    print("length of train variables ", trainVariables)

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by SGD Since this model has many parameters,
    # it would be tedious to manually create an update rule for each model parameter. We thus create the updates
    # list by automatically looping over all (params[i],grads[i]) pairs.
    updates = []
    rho = 0.9
    epsilon = 1e-6
    # for param_i, grad_i in zip(params, grads):
    #     updates.append((param_i, param_i - lr * grad_i))
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g**2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))

    test_model_confidence = theano.function([index],
                                            layer3.results(arg_y),
                                            on_unused_input='ignore',
                                            givens=testVariables)
    eval_model_confidence = theano.function([index],
                                            layer3.results(arg_y),
                                            on_unused_input='ignore',
                                            givens=validVariables)
    train_model = theano.function([index, lr, lambd1, lambd2],
                                  [cost, layer3.loss],
                                  updates=updates,
                                  on_unused_input='ignore',
                                  givens=trainVariables)

    time2 = time.time()
    print("time for building the model: ", time2 - time1)

    # train the model
    print('start training ... ')
    time1 = time.time()

    validation_frequency = num_examples_per_epoch / batch_size  # validate after each epoch
    best_params = []
    best_fscore = -1
    last_fscore = -1
    best_fscore_m1 = -1
    best_iter = 0
    best_fscoreEval = -1
    best_fscore_m1Eval = -1
    best_iterEval = 0

    start_time = time.clock()

    epoch = 0
    done_looping = False

    maxNoImprovement = 5
    noImprovement = 0

    while (epoch < n_epochs) and (not done_looping):
        print('epoch = ', epoch)
        epoch = epoch + 1

        this_n_train_batches = num_examples_per_epoch / batch_size

        for minibatch_index in xrange(this_n_train_batches):

            iter = (epoch - 1) * this_n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)

            cost_ij, loss = train_model(minibatch_index, learning_rate, l1, l2)

            print("cost: ", cost_ij)
            print("loss1:   ", loss)

            if (iter + 1) % validation_frequency == 0:

                # test data
                confidence_eval = [
                    test_model_confidence(i) for i in xrange(n_test_batches)
                ]
                confidence_list_eval = []
                for r in range(0, len(confidence_eval)):
                    for r1 in range(0, batch_size):
                        hypo_result_eval = confidence_eval[r][0].item(r1)
                        confidence_list_eval.append(hypo_result_eval)

                y_pred_eval = confidence_list_eval
                y_true_eval = role_index_test_matrix[:n_test_batches *
                                                     batch_size]
                y_true_eval_2 = []
                for i in range(len(y_true_eval)):
                    y_true_eval_2.append(int(y_true_eval[i]))

                labels1 = [13, 14, 15, 16, 17]
                this_fscore_eval = f1_score(y_true_eval_2,
                                            y_pred_eval,
                                            labels=labels1,
                                            average='micro')
                this_fscore_macro_eval = f1_score(y_true_eval_2,
                                                  y_pred_eval,
                                                  labels=labels1,
                                                  average='macro')
                print(
                    'EVAL: ***   epoch %i, best_validation %f, best_validation_m1 %f, learning_rate %f, '
                    'minibatch %i/%i, validation fscore %f %%' %
                    (epoch, best_fscoreEval * 100., best_fscore_m1Eval * 100,
                     learning_rate, minibatch_index + 1, this_n_train_batches,
                     this_fscore_eval * 100.))

                if this_fscore_eval > best_fscoreEval:
                    best_fscoreEval = this_fscore_eval
                    best_fscore_m1Eval = this_fscore_macro_eval
                    best_iterEval = iter

                # dev data
                confidence = [
                    eval_model_confidence(i) for i in xrange(n_valid_batches)
                ]

                confidence_list = []
                for r in range(0, len(confidence)):
                    for r1 in range(0, batch_size):
                        hypo_result = confidence[r][0].item(r1)
                        confidence_list.append(hypo_result)

                y_pred = confidence_list
                y_true = role_index_dev_matrix[:n_valid_batches * batch_size]
                y_true_2 = []
                for i in range(len(y_true)):
                    y_true_2.append(int(y_true[i]))

                labels = []
                for i in range(1, role_size):
                    labels.append(i)
                this_fscore = f1_score(y_true_2,
                                       y_pred,
                                       labels=labels,
                                       average='micro')
                this_fscore_macro = f1_score(y_true_2,
                                             y_pred,
                                             labels=labels,
                                             average='macro')
                print(
                    'epoch %i, best_validation %f, best_validation_m1 %f, learning_rate %f, minibatch %i/%i, '
                    'validation fscore %f %%' %
                    (epoch, best_fscore * 100., best_fscore_m1 * 100,
                     learning_rate, minibatch_index + 1, this_n_train_batches,
                     this_fscore * 100.))

                # if we got the best validation score until now
                if this_fscore > best_fscore:
                    best_fscore = this_fscore
                    best_fscore_m1 = this_fscore_macro
                    best_iter = iter

                    best_params = []
                    for p in param_list:
                        p_param = []
                        for part in p:
                            p_param.append(part.get_value(borrow=False))
                        best_params.append(p_param)
                    noImprovement = 0
                else:
                    if this_fscore > last_fscore:
                        noImprovement -= 1
                        noImprovement = max(noImprovement, 0)
                    else:
                        noImprovement += 1
                        updatestep = minibatch_index + this_n_train_batches * (
                            epoch - 1)
                        if newbob:  # learning rate schedule depending on dev result
                            learning_rate /= 1.2
                            print("reducing learning rate to ", learning_rate)
                last_fscore = this_fscore
            if newbob:  # learning rate schedule depending on dev result
                if noImprovement > maxNoImprovement or learning_rate < 0.0000001:
                    done_looping = True
                    break

        if not newbob:
            if epoch + 1 > 10:
                learning_rate /= 1.2
                print("reducing learning rate to ", learning_rate)
            if epoch + 1 > 50:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained for c=%i, nk=%i, f=%i, h=%i  at iteration %i,'
        % (best_fscore * 100., arg_context_size, nkerns[0], filter_size[1],
           hidden_units, best_iter + 1))

    time2 = time.time()
    print("time for training: ", time2 - time1)

    print('Saving net.')
    save_file = open(arg_network_file, 'wb')
    for p in best_params:
        for p_part in p:
            cPickle.dump(p_part, save_file, -1)
    save_file.close()
예제 #16
0
    def __init__(self,
                 nkerns=[48, 48, 48],
                 miniBatchSize=200,
                 nHidden=200,
                 nClasses=2,
                 nMaxPool=2,
                 nChannels=1):
        """
        nClasses : the number of target classes (e.g. 2 for binary classification)
        nMaxPool : number of pixels to max pool
        nChannels : number of input channels (e.g. 1 for single grayscale channel)
        """
        rng = numpy.random.RandomState(23455)

        self.p = 65
        self.miniBatchSize = miniBatchSize

        # Note: self.x and self.y will be re-bound to a subset of the
        # training/validation/test data dynamically by the update
        # stage of the appropriate function.
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels

        # We now assume the input will already be reshaped to the
        # proper size (i.e. we don't need a theano resize op here).
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # conv. filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, nChannels,
                                                 self.p, self.p),
                                    filter_shape=(nkerns[0], nChannels, fs0,
                                                  fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 31 -> 28 -> 14
        #--------------------------------------------------
        fs1 = 4  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 14)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 14 -> 10 -> 5
        #--------------------------------------------------
        fs2 = 5
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 5)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[1],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # Fully connected sigmoidal layer, goes from
        # 5*5*48  -> 200
        #--------------------------------------------------
        layer3_input = layer2.output.flatten(2)
        layer3 = HiddenLayer(rng,
                             input=layer3_input,
                             n_in=nkerns[2] * os2 * os2,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 4
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer4 = LogisticRegression(input=layer3.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4)
예제 #17
0
def start(inputfile):
    global in_time, out_time, cooldown_in_time, cooldown_out_time, classify
    global global_counter, winner_stride, cur_state, in_frame_num, actions_counter
    global test_set_x, test_set_y, shared_test_set_y
    rng = numpy.random.RandomState(23455)

    # ####################### build start ########################

    # create an empty shared variables to be filled later

    data_x = numpy.zeros([1, 20 * 50 * 50])
    data_y = numpy.zeros(20)
    train_set = (data_x, data_y)
    (test_set_x, test_set_y, shared_test_set_y) = \
        shared_dataset(train_set)

    print 'building ... '
    batch_size = 1

    # allocate symbolic variables for the data

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # image size

    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) / 2
    layer1_h = (layer0_h - 4) / 2
    layer2_w = (layer1_w - 2) / 2
    layer2_h = (layer1_h - 2) / 2
    layer3_w = (layer2_w - 2) / 2
    layer3_h = (layer2_h - 2) / 2

    # #####################
    # BUILD ACTUAL MODEL #
    # #####################

    # image sizes

    batchsize = batch_size
    in_channels = 20
    in_width = 50
    in_height = 50

    # filter sizes

    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=signals_shape,
                                filter_shape=filters_shape,
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, flt_channels,
                                             layer1_w, layer1_h),
                                filter_shape=(60, flt_channels, 3, 3),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batch_size, 60, layer2_w,
                                             layer2_h),
                                filter_shape=(90, 60, 3, 3),
                                poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=90 * layer3_w * layer3_h,
                         n_out=500,
                         activation=T.tanh)

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # load weights

    print 'loading weights state'
    f = file('weights.save', 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(cPickle.load(f))
    f.close()
    layer0.__setstate__(loaded_objects[0])
    layer1.__setstate__(loaded_objects[1])
    layer2.__setstate__(loaded_objects[2])
    layer3.__setstate__(loaded_objects[3])
    layer4.__setstate__(loaded_objects[4])

    # ####################### build done ########################

    fromCam = False

    if fromCam:
        print 'using camera input'
        cap = cv2.VideoCapture(0)
    else:
        print 'using input file: ', inputfile
        cap = cv2.VideoCapture(inputfile)

    # my timing

    frame_rate = 5
    frame_interval_ms = 1000 / frame_rate

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video_writer = cv2.VideoWriter('../out/live_out.avi', fourcc, frame_rate,
                                   (640, 480))

    frame_counter = 0
    (ret, frame) = cap.read()

    proFrame = process_single_frame(frame)

    # init detectors

    st_a_det = RepDetector(proFrame, detector_strides[0])
    st_b_det = RepDetector(proFrame, detector_strides[1])
    st_c_det = RepDetector(proFrame, detector_strides[2])

    frame_wise_counts = []
    while True:

        in_frame_num += 1
        if in_frame_num % 2 == 1:
            continue

        (ret, frame) = cap.read()
        if ret == 0:
            print 'unable to read frame'
            break
        proFrame = process_single_frame(frame)

        # handle stride A....
        if frame_counter % st_a_det.stride_number == 0:
            st_a_det.count(proFrame)

    # handle stride B

        if frame_counter % st_b_det.stride_number == 0:
            st_b_det.count(proFrame)

    # handle stride C

        if frame_counter % st_c_det.stride_number == 0:
            st_c_det.count(proFrame)

    # display result on video................

        blue_color = (130, 0, 0)
        green_color = (0, 130, 0)
        red_color = (0, 0, 130)
        orange_color = (0, 140, 0xFF)

        out_time = in_frame_num / 60
        if cur_state == state.IN_REP and (out_time - in_time < 4
                                          or global_counter < 5):
            draw_str(frame, (20, 120),
                     ' new hypothesis (%d) ' % global_counter, orange_color,
                     1.5)
        if cur_state == state.IN_REP and out_time - in_time >= 4 \
            and global_counter >= 5:
            draw_str(
                frame, (20, 120), 'action %d: counting... %d' %
                (actions_counter, global_counter), green_color, 2)
        if cur_state == state.COOLDOWN and global_counter >= 5:
            draw_str(
                frame, (20, 120), 'action %d: done. final counting: %d' %
                (actions_counter, global_counter), blue_color, 2)
        # print "pls", global_counter
        frame_wise_counts.append(global_counter)

        # print 'action %d: done. final counting: %d' % (actions_counter, global_counter)
    print "Dhruv", frame_wise_counts, global_counter
    return frame_wise_counts
  def __init__(self, configfile, train = False):

    self.config = readConfig(configfile)

    self.addInputSize = 1
    logger.info("additional mlp input")

    wordvectorfile = self.config["wordvectors"]
    logger.info("wordvectorfile " + str(wordvectorfile))
    networkfile = self.config["net"]
    logger.info("networkfile " + str(networkfile))
    hiddenunits = int(self.config["hidden"])
    logger.info("hidden units " + str(hiddenunits))
    hiddenunitsNER = 50
    if "hiddenunitsNER" in self.config:
      hiddenunitsNER = int(self.config["hiddenunitsNER"])
    logger.info("hidden units NER " + str(hiddenunitsNER))
    learning_rate = float(self.config["lrate"])
    logger.info("learning rate " + str(learning_rate))
    if train:
      self.batch_size = int(self.config["batchsize"])
    else:
      self.batch_size = 1
    logger.info("batch size " + str(self.batch_size))
    self.filtersize = [1,int(self.config["filtersize"])]
    nkerns = [int(self.config["nkerns"])]
    logger.info("nkerns " + str(nkerns))
    pool = [1, int(self.config["kmax"])]

    self.contextsize = int(self.config["contextsize"])
    logger.info("contextsize " + str(self.contextsize))

    if self.contextsize < self.filtersize[1]:
      logger.info("setting filtersize to " + str(self.contextsize))
      self.filtersize[1] = self.contextsize
    logger.info("filtersize " + str(self.filtersize))

    sizeAfterConv = self.contextsize - self.filtersize[1] + 1

    sizeAfterPooling = -1
    if sizeAfterConv < pool[1]:
      logger.info("setting poolsize to " + str(sizeAfterConv))
      pool[1] = sizeAfterConv
    sizeAfterPooling = pool[1]
    logger.info("kmax pooling: k = " + str(pool[1]))

    # reading word vectors
    self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

    self.representationsize = self.vectorsize + 1

    rng = numpy.random.RandomState(23455)
    if train:
      seed = rng.get_state()[1][0]
      logger.info("seed: " + str(seed))

    # allocate symbolic variables for the data
    self.index = T.lscalar()  # index to a [mini]batch
    self.xa = T.matrix('xa')   # left context
    self.xb = T.matrix('xb')   # middle context
    self.xc = T.matrix('xc')   # right context
    self.y = T.imatrix('y')   # label (only present in training)
    self.yNER1 = T.imatrix('yNER1') # label for first entity
    self.yNER2 = T.imatrix('yNER2') # label for second entity
    ishape = [self.representationsize, self.contextsize]  # this is the size of context matrizes

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    logger.info('... building the model')

    # Reshape input matrix to be compatible with our LeNetConvPoolLayer
    layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1]))

    self.y_reshaped = self.y.reshape((self.batch_size, 1))
    yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
    yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

    # Construct convolutional pooling layer:
    filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1])
    poolsize=(pool[0], pool[1])
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
              numpy.prod(poolsize))
    W_bound = numpy.sqrt(6. / (fan_in + fan_out))
    # the convolution weight matrix
    convW = theano.shared(numpy.asarray(
           rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
           dtype=theano.config.floatX),
                               borrow=True)
    # the bias is a 1D tensor -- one bias per output feature map
    b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
    convB = theano.shared(value=b_values, borrow=True)

    self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)

    #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3)
    layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1)

    self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh)
    self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b)


    layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1)
    layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER

    self.additionalFeatures = T.matrix('additionalFeatures')
    additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1))
    layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1)
    layer2_inputSize += self.addInputSize

    self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize,
                         n_out=hiddenunits, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23)

    # create a list of all model parameters
    self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params]
    self.params = []
    for p in self.paramList:
      self.params += p
      logger.info(p)

    if not train:
      self.gotNetwork = 1
      # load parameters
      if not os.path.isfile(networkfile):
        logger.error("network file does not exist")
        self.gotNetwork = 0
      else:
        save_file = open(networkfile, 'rb')
        for p in self.params:
          p.set_value(cPickle.load(save_file), borrow=False)
        save_file.close()
예제 #19
0
    in_channels   = 20
    in_width      = 50
    in_height     = 50
    #filter sizes
    flt_channels  = 40
    flt_time      = 20
    flt_width     = 5
    flt_height    = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)
        
    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=signals_shape,
                filter_shape=filters_shape, poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
            filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2))


    layer2 = LeNetConvPoolLayer(rng, input=layer1.output,
                image_shape=(batch_size, 60, layer2_w, layer2_h),
                filter_shape=(90, 60, 3, 3), poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)


    layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h  ,
                         n_out=500, activation=T.tanh)