Example #1
0
    def addDropoutLayer(self, **kwargs):
        """
        Add Dropout layer.
        """

        input_layer = self.input_layer if not self.all_layers \
            else self.all_layers[-1]

        self.n_dropout_layers += 1
        name = "dropout%i" % self.n_dropout_layers

        new_layer = DropoutLayer(input_layer, name=name, **kwargs)

        self.all_layers += (new_layer, )
def main():
    train, test, vadilation = load_mnist_simple()
    # x, y = train[0]
    # print("x: ", x.shape)
    # print("y: ", y)

    with timing(f""):
        # dnn = DNN(input=28 * 28, layers=[Layer(30, LQ), Layer(10, LCE)], eta=0.05)  # 96%
        # dnn = DNN(input=28 * 28, layers=[Layer(30, LQ), Layer(10, SM)], eta=0.001)  # 68%
        # dnn = DNN(input=28 * 28, layers=[Layer(100, LQ), Layer(10, LCE)], eta=0.05, lmbda=5)  # 98%
        # dnn = DNN(input=28 * 28, layers=[DropoutLayer(100, LQ), Layer(10, LCE)], eta=0.05)  # 97.5%
        dnn = DNN(input=28 * 28, layers=[DropoutLayer(160, LQ), Layer(10, LCE)], eta=0.05, lmbda=3)
        dnn.initialize_rand()
        dnn.learn(train, epochs=30, test=vadilation, batch_size=29)

    print('test:', dnn.test(test))
    print(dnn.stats())
Example #3
0
    def __init__(self, num_input=256, num_hidden=[512,512], num_output=256, clip_at=0.0, scale_norm=0.0):
        X = T.fmatrix()
        Y = T.imatrix()
        lr = T.fscalar()
        alpha = T.fscalar()
        reg = T.fscalar()
        dropout_prob = T.fscalar()

        self.num_input = num_input
        self.num_hidden = num_hidden
        self.num_output = num_output
        self.clip_at = clip_at
        self.scale_norm = scale_norm

        inputs = InputLayer(X, name='inputs')
        num_prev = num_input
        prev_layer = inputs

        self.layers = [inputs]
        if type(num_hidden) is types.IntType:
            lstm = LSTMLayer(num_prev, num_hidden, input_layers=[prev_layer], name="lstm", go_backwards=False)
            num_prev = num_hidden
            prev_layer = lstm
            self.layers.append(prev_layer)
            prev_layer = DropoutLayer(prev_layer, dropout_prob=dropout_prob)
            self.layers.append(prev_layer)

        FC = FullyConnectedLayer(num_prev, num_output, input_layers=[prev_layer], name="yhat")
        self.layers.append(FC)
        Y_hat = FC.output()
	
	# change to probilities
        Y_hat = T.nnet.softmax(Y_hat)

        params = get_params(self.layers)
        caches = make_caches(params)
	
        updates, grads = momentum(loss, params, lr, reg)
	
        self.train_func = theano.function([X, Y, lr, reg, dropout_prob, alpha], loss, updates=updates, allow_input_downcast=True)

        self.predict_sequence_func = theano.function([X, dropout_prob], [Y_hat], allow_input_downcast=True)
def main2():
    dnn = DNN(input=28 * 28, layers=[DropoutLayer(160, LQ), Layer(10, LCE)], eta=0.05, lmbda=1)  # 98%
    dnn.initialize_rand()
    train, test, vadilation = load_mnist_simple()

    f_names = [f'mnist_expaned_k0{i}.pkl.gz' for i in range(50)]
    shuffle(f_names)
    for f_name in f_names:
        print(f_name)
        with timing("load"):
            raw_data = load_data(f_name)
        with timing("shuffle"):
            shuffle(raw_data)
        with timing("reshape"):
            data = [(x.reshape((784, 1)), y) for x, y in islice(raw_data, 100000)]
            del raw_data
        with timing("learn"):
            dnn.learn(data)
        del data
        print('TEST:', dnn.test(test))
Example #5
0
    def __init__(self, config):

        self.config = config

        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']

        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x = T.ftensor4('x')
        y = T.ivector('y')
        rand = T.fvector('rand')

        print '... building the model'
        self.layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x, image_shape=(3, 256, 256,
                                                         batch_size),
                                   cropsize=227, rand=rand, mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(input=layer1_input,
                                        image_shape=(3, 227, 227, batch_size), 
                                        filter_shape=(3, 11, 11, 96), 
                                        convstride=4, padsize=0, group=1, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=True,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, 27, 27, batch_size),
                                        filter_shape=(96, 5, 5, 256), 
                                        convstride=1, padsize=2, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.1, lrn=True,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, 13, 13, batch_size),
                                        filter_shape=(256, 3, 3, 384), 
                                        convstride=1, padsize=1, group=1, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 384), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.1, lrn=False,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 256), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        )
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096)

        fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096)

        softmax_layer8 = SoftmaxLayer(
            input=dropout_layer7.output, n_in=4096, n_out=1000)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.errors = softmax_layer8.errors(y)
        self.errors_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params
        self.x = x
        self.y = y
        self.rand = rand
        self.weight_types = weight_types
        self.batch_size = batch_size
Example #6
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.matrix(name="input", dtype=dtype)  # batch of sequence of vector
        Y = T.matrix(name="output", dtype=dtype)  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "same"
        cnn_batch_size = batch_size
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (128, 1, 10, 10)
        input_shape = (cnn_batch_size, 1, 144, 176
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train)

        #Layer2: conv2+pool
        subsample = (1, 1)
        filter_shape = (256, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       dl1.output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (256, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: conv2+pool
        filter_shape = (128, p3.output_shape[1], 3, 3)
        c4 = ConvLayer(rng,
                       p3.output,
                       filter_shape,
                       p3.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p4 = PoolLayer(c4.output,
                       pool_size=pool_size,
                       input_shape=c4.output_shape)

        #Layer5: hidden
        n_in = reduce(lambda x, y: x * y, p4.output_shape[1:])
        x_flat = p4.output.flatten(2)

        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)

        #Layer6: hidden
        lreg = LogisticRegression(rng, h1.output, 1024, params['n_output'])
        self.output = lreg.y_pred

        self.params = c1.params + c2.params + c3.params + c4.params + h1.params + lreg.params

        cost = get_err_fn(self, cost_function, Y)
        L2_reg = 0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param[0]**2) + T.sum(param[1]**2))

        cost += L2_reg * L2_sqr

        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
Example #7
0
    def __init__(self, config):

        self.config = config

        batch_size = config.batch_size
        lib_conv = config.lib_conv
        group = (2 if config.grouping else 1)
        LRN = (True if config.LRN else False)
        print 'LRN, group', LRN, group

        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        x = T.ftensor4('x')
        y = T.lvector('y')


        print '... building the model with ConvLib %s, LRN %s, grouping %i ' \
              % (lib_conv, LRN, group)
        self.layers = []
        params = []
        weight_types = []

        layer1_input = x

        convpool_layer1 = ConvPoolLayer(
            input=layer1_input,
            image_shape=((3, 224, 224,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 3, 227, 227)),
            filter_shape=((3, 11, 11, 96) if lib_conv == 'cudaconvnet' else
                          (96, 3, 11, 11)),
            convstride=4,
            padsize=(0 if lib_conv == 'cudaconvnet' else 3),
            group=1,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            lrn=LRN,
            lib_conv=lib_conv)
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(
            input=convpool_layer1.output,
            image_shape=((96, 27, 27,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 96, 27, 27)),
            filter_shape=((96, 5, 5, 256) if lib_conv == 'cudaconvnet' else
                          (256, 96, 5, 5)),
            convstride=1,
            padsize=2,
            group=group,
            poolsize=3,
            poolstride=2,
            bias_init=0.1,
            lrn=LRN,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(
            input=convpool_layer2.output,
            image_shape=((256, 13, 13,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 256, 13, 13)),
            filter_shape=((256, 3, 3, 384) if lib_conv == 'cudaconvnet' else
                          (384, 256, 3, 3)),
            convstride=1,
            padsize=1,
            group=1,
            poolsize=1,
            poolstride=0,
            bias_init=0.0,
            lrn=False,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(
            input=convpool_layer3.output,
            image_shape=((384, 13, 13,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 384, 13, 13)),
            filter_shape=((384, 3, 3, 384) if lib_conv == 'cudaconvnet' else
                          (384, 384, 3, 3)),
            convstride=1,
            padsize=1,
            group=group,
            poolsize=1,
            poolstride=0,
            bias_init=0.1,
            lrn=False,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(
            input=convpool_layer4.output,
            image_shape=((384, 13, 13,
                          batch_size) if lib_conv == 'cudaconvnet' else
                         (batch_size, 384, 13, 13)),
            filter_shape=((384, 3, 3, 256) if lib_conv == 'cudaconvnet' else
                          (256, 384, 3, 3)),
            convstride=1,
            padsize=1,
            group=group,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            lrn=False,
            lib_conv=lib_conv,
        )
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        if lib_conv == 'cudaconvnet':
            fc_layer6_input = T.flatten(
                convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        else:
            fc_layer6_input = convpool_layer5.output.flatten(2)

        fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output)

        fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output)

        softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output,
                                      n_in=4096,
                                      n_out=1000)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.errors = softmax_layer8.errors(y)
        self.errors_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params
        self.x = x
        self.y = y
        # self.rand = rand
        self.weight_types = weight_types
        self.batch_size = batch_size
Example #8
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        n_lstm=params['n_hidden']
        n_out=params['n_output']
        batch_size=params["batch_size"]
        sequence_length=params["seq_length"]

        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample=(1,1)
        p_1=0.5
        border_mode="valid"
        cnn_batch_size=batch_size*sequence_length
        pool_size=(2,2)

        #Layer1: conv2+pool+drop
        filter_shape=(64,1,9,9)
        input_shape=(cnn_batch_size,1,120,60) #input_shape= (samples, channels, rows, cols)
        input= X.reshape(input_shape)
        c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu)
        p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape)
        dl1=DropoutLayer(rng,input=p1.output,prob=p_1,is_train=is_train)

        #Layer2: conv2+pool
        filter_shape=(128,p1.output_shape[1],3,3)
        c2=ConvLayer(rng, dl1.output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu)
        p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape)


        #Layer3: conv2+pool
        filter_shape=(128,p2.output_shape[1],3,3)
        c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu)
        p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape)

        #Layer4: hidden
        n_in= reduce(lambda x, y: x*y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu)
        n_in=1024
        rnn_input = h1.output.reshape((batch_size,sequence_length, n_in))


        #Layer5: LSTM
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
        self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

        layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)

        self.params = layer1.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t,h_tm1,c_tm1):
           [h_t,c_t,y_t]=layer1.run(x_t,h_tm1,c_tm1)
           y = T.dot(y_t, self.W_hy) + self.b_y
           return [h_t,c_t,y]

        H = T.matrix(name="H",dtype=dtype) # initial hidden state
        C = T.matrix(name="C",dtype=dtype) # initial hidden state

        [h_t,c_t,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[rnn_input.dimshuffle(1,0,2)],
                                         outputs_info=[H, C, None])

        self.output = y_vals.dimshuffle(1,0,2)

        self.params =c1.params+c2.params+c3.params+h1.params+self.params

        cost=get_err_fn(self,cost_function,Y)
        L2_reg=0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param ** 2))

        cost += L2_reg*L2_sqr
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t[-1],c_t[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t[-1],c_t[-1]],allow_input_downcast=True)
        self.n_param=count_params(self.params)
Example #9
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        n_lstm = params['n_hidden']
        n_out = params['n_output']
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "valid"
        cnn_batch_size = batch_size * sequence_length
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (64, 1, 9, 9)
        input_shape = (cnn_batch_size, 1, 120, 60
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train)
        retain_prob = 1. - p_1
        test_output = p1.output * retain_prob
        d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output)

        #Layer2: conv2+pool
        filter_shape = (128, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       d1_output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (128, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: hidden
        n_in = reduce(lambda x, y: x * y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)
        n_in = 1024
        rnn_input = h1.output.reshape((batch_size, sequence_length, n_in))

        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_hy = init_weight((self.n_lstm, self.n_out),
                                rng=rng,
                                name='W_hy',
                                sample='glorot')
        self.b_y = init_bias(self.n_out, rng=rng, sample='zero')

        layer1 = LSTMLayer(rng, 0, self.n_in, self.n_lstm)
        layer2 = LSTMLayer(rng, 1, self.n_lstm, self.n_lstm)
        layer3 = LSTMLayer(rng, 2, self.n_lstm, self.n_lstm)

        self.params = layer1.params + layer2.params + layer3.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t, mask, h_tm1_1, c_tm1_1, h_tm1_2, c_tm1_2, h_tm1_3,
                      c_tm1_3):
            [h_t_1, c_t_1, y_t_1] = layer1.run(x_t, h_tm1_1, c_tm1_1)
            dl1 = DropoutLayer(rng,
                               input=y_t_1,
                               prob=0.5,
                               is_train=is_train,
                               mask=mask)
            [h_t_2, c_t_2, y_t_2] = layer2.run(dl1.output, h_tm1_2, c_tm1_2)
            [h_t_3, c_t_3, y_t_3] = layer3.run(y_t_2, h_tm1_3, c_tm1_3)
            y = T.dot(y_t_3, self.W_hy) + self.b_y
            return [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y]

        h0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state
        h0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state
        h0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state

        mask_shape = (sequence_length, batch_size, self.n_lstm)
        p_1 = 0.5
        mask = rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype)

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y_vals], _ = theano.scan(
            fn=step_lstm,
            sequences=[rnn_input.dimshuffle(1, 0, 2), mask],
            outputs_info=[h0_1, c0_1, h0_2, c0_2, h0_3, c0_3, None])

        self.output = y_vals.dimshuffle(1, 0, 2)

        self.params = c1.params + c2.params + c3.params + h1.params + self.params

        cost = get_err_fn(self, cost_function, Y)
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
Example #10
0
    def __init__(self, config, testMode):

        self.config = config

        batch_size = config['batch_size']
        lib_conv = config['lib_conv']
        useLayers = config['useLayers']
        #imgWidth = config['imgWidth']
        #imgHeight = config['imgHeight']
        initWeights = config['initWeights']  #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights
        if initWeights:
            weightsDir = config['weightsDir']
            weightFileTag = config['weightFileTag']
        prob_drop = config['prob_drop']

        # ##################### BUILD NETWORK ##########################
        x = T.ftensor4('x')
        mean = T.ftensor4('mean')
        #y = T.lvector('y')

        print '... building the model'
        self.layers = []
        params = []
        weight_types = []

        if useLayers >= 1:
            convpool_layer1 = ConvPoolLayer(input=x-mean,
                                        image_shape=(3, None, None, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4, padsize=0, group=1, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag]
                                        )
            self.layers.append(convpool_layer1)
            params += convpool_layer1.params
            weight_types += convpool_layer1.weight_type

        if useLayers >= 2:
            convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, None, None, batch_size),    #change from 27 to appropriate value sbased on conv1's output
                                        filter_shape=(96, 5, 5, 256), 
                                        convstride=1, padsize=2, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.1, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag]
                                        )
            self.layers.append(convpool_layer2)
            params += convpool_layer2.params
            weight_types += convpool_layer2.weight_type

        if useLayers >= 3:
            convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, None, None, batch_size),
                                        filter_shape=(256, 3, 3, 384), 
                                        convstride=1, padsize=1, group=1, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag]
                                        )
            self.layers.append(convpool_layer3)
            params += convpool_layer3.params
            weight_types += convpool_layer3.weight_type

        if useLayers >= 4:
            convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 384), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.1, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag]
                                        )
            self.layers.append(convpool_layer4)
            params += convpool_layer4.params
            weight_types += convpool_layer4.weight_type

        if useLayers >= 5:
            convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 256), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag]
                                        )
            self.layers.append(convpool_layer5)
            params += convpool_layer5.params
            weight_types += convpool_layer5.weight_type

        if useLayers >= 6:
            fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
            fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag])
            self.layers.append(fc_layer6)
            params += fc_layer6.params
            weight_types += fc_layer6.weight_type
            if testMode:
                dropout_layer6 = fc_layer6
            else:
                dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 7:
            fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag])
            self.layers.append(fc_layer7)
            params += fc_layer7.params
            weight_types += fc_layer7.weight_type
            if testMode:
                dropout_layer6 = fc_layer7
            else:
                dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 8:
            softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag])
            self.layers.append(softmax_layer8)
            params += softmax_layer8.params
            weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.output = self.layers[useLayers-1]
        self.params = params
        self.x = x
        self.mean = mean
        self.weight_types = weight_types
        self.batch_size = batch_size
        self.useLayers = useLayers
        self.outLayer = self.layers[useLayers-1]

        meanVal = np.load(config['mean_file'])
        meanVal = meanVal[:, :, :, np.newaxis].astype('float32')   #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work.
        meanVal = np.tile(meanVal,(1,1,1,batch_size))
        self.meanVal = meanVal
        #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32')

        if useLayers >= 8:  #if last layer is softmax, then its output is y_pred
            finalOut = self.outLayer.y_pred
        else:
            finalOut = self.outLayer.output
        self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])
        BranchedLayer([None, ConvLayer(64, [1, 7])]),
        BranchedLayer([None, ConvLayer(96, 3, padding='valid')]),
        MergeLayer(axis=3),
        BranchedLayer([
            ConvLayer(192, 3, strides=2, padding='valid'),
            MaxPoolLayer(3, strides=2, padding='valid')
        ]),
        MergeLayer(axis=3),
        *([inception_a] * args.na),  # x4
        ConvLayer(1024, 3, strides=2),  # reduction_a
        *([inception_b] * args.nb),  # x7
        ConvLayer(1536, 3, strides=2),  # reduction_b
        *([inception_c] * args.nc),  # x3
        GlobalAvgPoolLayer(),
        FlattenLayer(),
        DropoutLayer(rate=args.drop_prob)
    ]

    data_params = {
        'na': args.na,
        'nb': args.nb,
        'nc': args.nc,
        'batch_norm': batch_norm,
        'drop_prob': args.drop_prob,
        'augmentation': True
    }

    cnn = CNN(layers,
              n_classes=n_classes,
              batch_size=128,
              l2_lambda=args.l2_lambda,
Example #12
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None, ng_embs=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None, None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim*pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])
                pix_out = tf.unpack(pix_out, axis=1)

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unpack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)


            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)
                emb_out = tf.unpack(emb_out)

            else:
                emb_out = emb_set[0]

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)

            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Example #13
0
    def __init__(self, config):

        self.config = config

        batch_size = config['batch_size']
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']

        layers = []
        params = []
        weight_types = []

        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x1 = T.ftensor4('x1')
        x2 = T.ftensor4('x2')
        y = T.lvector('y')  # The ground truth to be compared with will go here
        rand1 = T.fvector('rand1')
        rand2 = T.fvector('rand2')

        print '... building the model'

        if flag_datalayer:
            data_layerA = DataLayer(input=x1,
                                    image_shape=(3, 256, 256, batch_size),
                                    cropsize=227,
                                    rand=rand,
                                    mirror=True,
                                    flag_rand=config['rand_crop'])

            layer1A_input = data_layerA.output
        else:
            layer1A_input = x1

        if flag_datalayer:
            data_layerB = DataLayer(input=x2,
                                    image_shape=(3, 256, 256, batch_size),
                                    cropsize=227,
                                    rand=rand,
                                    mirror=True,
                                    flag_rand=config['rand_crop'])

            layer1B_input = data_layerB.output
        else:
            layer1B_input = x2

        fc_layer2_input = T.concatenate(
            (T.flatten(layer1A_input.dimshuffle(3, 0, 1, 2),
                       2), T.flatten(layer1B_input.dimshuffle(3, 0, 1, 2), 2)),
            axis=1)
        fc_layer2 = FCLayer(input=fc_layer2_input, n_in=154587 * 2, n_out=4096)
        layers.append(fc_layer2)
        params += fc_layer2.params
        weight_types += fc_layer2.weight_type

        dropout_layer2 = DropoutLayer(fc_layer2.output, n_in=4096, n_out=4096)

        fc_layer3 = FCLayer(input=dropout_layer2.output, n_in=4096, n_out=4096)
        layers.append(fc_layer3)
        params += fc_layer3.params
        weight_types += fc_layer3.weight_type

        dropout_layer3 = DropoutLayer(fc_layer3.output, n_in=4096, n_out=4096)

        # Final softmax layer
        softmax_layer3 = SoftmaxLayer(
            input=dropout_layer3.output, n_in=4096,
            n_out=2)  # Only a single binary output is required!
        layers.append(softmax_layer3)
        params += softmax_layer3.params
        weight_types += softmax_layer3.weight_type

        # #################### NETWORK BUILT #######################

        self.cost = softmax_layer3.negative_log_likelihood(y)
        self.errors = softmax_layer3.errors(y)
        self.errors_top_5 = softmax_layer3.errors_top_x(y, 5)
        self.x1 = x1
        self.x2 = x2
        self.y = y
        self.rand1 = rand1
        self.rand2 = rand2
        self.layers = layers
        self.params = params
        self.weight_types = weight_types
        self.batch_size = batch_size
Example #14
0
    def __init__(self,
                 input_size=(1, 28, 28),
                 activation_type=ActivationType.ReLU,
                 hidden_size=50,
                 output_size=10):

        # basic paramters
        self.__activation_type__ = activation_type
        self.params = {}
        self.layers = []

        # set layers
        channel_num = input_size[0]
        for i, param in enumerate([
            {
                'filter_num': 16,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 16,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 32,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 32,
                'filter_size': 3,
                'pad': 2,
                'stride': 1
            },
            {
                'filter_num': 64,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'filter_num': 64,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            },
            {
                'pre_node_num': 64 * 4 * 4,
                'next_node_num': hidden_size
            },
            {
                'pre_node_num': hidden_size,
                'next_node_num': output_size
            },
        ]):

            # layer 1 ~ 6 Convolution Layer & ReLU Layer
            if i + 1 in range(1, 7):
                # create convolution layer
                convolution_layer = ConvolutionLayer(
                    index=i + 1,
                    activation_type=self.__activation_type__,
                    filter_num=param['filter_num'],
                    channel_num=channel_num,
                    filter_size=param['filter_size'],
                    stride=param['stride'],
                    padding=param['pad'])
                self.layers.append(convolution_layer)
                self.layers.append(
                    self.activationLayerFromType(activation_type, index=i + 1))
                # layer 2, 4, 6 Pooling Layer
                if i + 1 in (2, 4, 6):
                    self.layers.append(
                        PoolingLayer(index=i + 1, pool_h=2, pool_w=2,
                                     stride=2))
                # update next channel num
                channel_num = convolution_layer.filter_num
                layer = convolution_layer

            # layer 7, 8 Hidden Layer & ReLU Layer & Dropout Layer
            if i + 1 in (7, 8):
                hidden_layer = HiddenLayer(
                    index=i + 1,
                    activation_type=self.__activation_type__,
                    pre_node_num=param['pre_node_num'],
                    next_node_num=param['next_node_num'])
                self.layers.append(hidden_layer)
                if i + 1 == 7:
                    self.layers.append(
                        self.activationLayerFromType(activation_type,
                                                     index=i + 1))
                self.layers.append(DropoutLayer(index=i + 1,
                                                dropout_ratio=0.5))
                layer = hidden_layer

            # set W,b
            self.params['W{}'.format(i + 1)] = layer.W
            self.params['b{}'.format(i + 1)] = layer.b

            print('layer {} created'.format(i + 1))

            if Config.IS_DEBUG:
                print('W{} shape : {}'.format(
                    i + 1, self.params['W{}'.format(i + 1)].shape))
                print('b{} shape : {}'.format(
                    i + 1, self.params['W{}'.format(i + 1)].shape))

        # output created layer structures
        for layer in self.layers:
            print(layer.name)

        # keep weight required layer indexes
        self.weight_layer_indexes = []
        for j, layer in enumerate(self.layers):
            if isinstance(layer, (ConvolutionLayer, HiddenLayer)):
                self.weight_layer_indexes.append(j)
        self.debug('weight_layer_indexes {}'.format(self.weight_layer_indexes))

        print('{} layers created'.format(len(self.layers)))
        # last layer SoftmaxWithLoss Layer
        self.lastLayer = SoftmaxWithLossLayer()
Example #15
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None,
                   ngram_embedding=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        """

        :param trained_model:
        :param scope:
        :param emb_dim:
        :param gru:
        :param rnn_dim:
        :param rnn_num:
        :param drop_out:
        :param rad_dim: n
        :param emb:
        :param ngram_embedding: 预训练 ngram embeddig 文件
        :param pixels:
        :param con_width:
        :param filters:
        :param pooling_size:
        :return:
        """
        # trained_model: 模型存储路径
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'tag_scheme': self.tag_scheme,
                         'graphic': self.graphic, 'pic_size': self.pic_size, 'word_vec': self.word_vec,
                         'radical': self.radical, 'crf': self.crf, 'emb_dim': emb_dim, 'gru': gru, 'rnn_dim': rnn_dim,
                         'rnn_num': rnn_num, 'drop_out': drop_out, 'filter_size': con_width, 'filters': filters,
                         'pooling_size': pooling_size, 'font': self.font, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram}
            print "RNN dimension is %d" % rnn_dim
            print "RNN number is %d" % rnn_num
            print "Character embedding size is %d" % emb_dim
            print "Ngram embedding dimension is %d" % emb_dim
            # 存储模型超参数
            if self.metric == 'All':
                # rindex() 返回子字符串 str 在字符串中最后出现的位置
                # 截取模型文件名
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(trained_model[:pindex] + m + '_' + trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        # 字向量层
        # 为什么字符数要加 500 ?
        # emb_dim 是每个字符的特征向量维度,可以通过命令行参数设置
        # weights 表示预训练的字向量,可以通过命令行参数设置
        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        # 偏旁部首向量
        # 依照《康熙字典》,共有 214 个偏旁部首。
        # 只用了常见汉字的偏旁部首,非常见汉字和非汉字的偏旁部首用其他两个特殊符号代替,
        # 所以共有 216 个偏旁部首
        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ngram_embedding is not None:
                assert len(ngram_embedding) == len(self.ngram)
            else:
                ngram_embedding = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ngram_embedding[i],
                                                       name=str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = \
            None, None, None, None, None, None

        if self.graphic:
            # 使用图像信息,需要用到 CNN
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(
                HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell] * rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell] * rnn_num, state_is_tuple=True)

        # 隐藏层,输入是前向 RNN 的输出加上 后向 RNN 的输出,所以输入维度为 rnn_dim * 2
        # 输出维度即标签个数
        output_wrapper = TimeDistributed(
            HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'),
            name='wrapper')

        # define model for each bucket
        # 每一个 bucket 中的句子长度不一样,所以需要定义单独的模型
        # bucket: bucket 中的句子长度
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                # scope 是 tf.variable_scope("tagger", reuse=None, initializer=initializer)
                # 只需要设置一次 reuse,后面就都 reuse 了
                scope.reuse_variables()
            t1 = time()

            # 输入的句子,one-hot 向量
            # shape = (batch_size, 句子长度)
            input_sentences = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_sentences])

            emb_set = []

            if self.word_vec:
                # 根据 one-hot 向量查找对应的字向量
                # word_out: shape=(batch_size, 句子长度,字向量维度(64))
                word_out = self.emb_layer(input_sentences)
                emb_set.append(word_out)

            if self.radical:
                # 嵌入偏旁部首信息,shape = (batch_size, 句子长度)
                input_radicals = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_radicals)
                radical_out = self.radical_layer(input_radicals)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unstack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if self.window_size > 1:

                padding_size = int(np.floor(self.window_size / 2))
                word_padded = tf.pad(word_out, [[0, 0], [padding_size, padding_size], [0, 0]], 'CONSTANT')

                Ws = []
                for q in range(1, self.window_size + 1):
                    Ws.append(tf.get_variable("W_%d" % q, shape=[q * emb_dim, self.filters_number]))
                b = tf.get_variable("b", shape=[self.filters_number])

                z = [None for _ in range(0, bucket)]

                for q in range(1, self.window_size + 1):
                    for i in range(padding_size, bucket + padding_size):
                        low = i - int(np.floor((q - 1) / 2))
                        high = i + int(np.ceil((q + 1) / 2))
                        x = word_padded[:, low, :]
                        for j in range(low + 1, high):
                            x = tf.concat(values=[x, word_padded[:, j, :]], axis=1)
                        z_iq = tf.tanh(tf.nn.xw_plus_b(x, Ws[q - 1], b))
                        if z[i - padding_size] is None:
                            z[i - padding_size] = z_iq
                        else:
                            z[i - padding_size] = tf.concat([z[i - padding_size], z_iq], axis=1)

                z = tf.stack(z, axis=1)
                values, indices = tf.nn.top_k(z, sorted=False, k=emb_dim)

                # highway layer
                X = tf.unstack(word_out, axis=1)
                Conv_X = tf.unstack(values, axis=1)
                X_hat = []
                W_t = tf.get_variable("W_t", shape=[emb_dim, emb_dim])
                b_t = tf.get_variable("b_t", shape=[emb_dim])
                for x, conv_x in zip(X, Conv_X):
                    T_x = tf.sigmoid(tf.nn.xw_plus_b(x, W_t, b_t))
                    X_hat.append(tf.multiply(conv_x, T_x) + tf.multiply(x, 1 - T_x))
                X_hat = tf.stack(X_hat, axis=1)
                emb_set.append(X_hat)
            if len(emb_set) > 1:
                # 各种字向量直接 concat 起来(字向量、偏旁部首、n-gram、图像信息等)
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            # rnn_out 是前向 RNN 的输出和后向 RNN 的输出 concat 之后的值
            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr,
                             name='BiLSTM' + str(bucket), scope='BiRNN')(self.highway(emb_out, "tag"), input_sentences)

            # 应用全连接层,Wx+b 得到最后的输出
            output = output_wrapper(rnn_out)
            # 为什么要 [output] 而不是 output 呢?
            self.output.append([output])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            # language model
            lm_rnn_dim = rnn_dim
            with tf.variable_scope('LM-BiRNN'):
                if gru:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.GRUCell(lm_rnn_dim)
                else:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(lm_rnn_dim, state_is_tuple=True)

                if rnn_num > 1:
                    lm_fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    lm_bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([lm_bw_rnn_cell] * rnn_num, state_is_tuple=True)
            lm_rnn_output = BiLSTM(lm_rnn_dim, fw_cell=lm_fw_rnn_cell,
                                   bw_cell=lm_bw_rnn_cell, p=dr,
                                   name='LM-BiLSTM' + str(bucket),
                                   scope='LM-BiRNN')(self.highway(emb_set[0]), input_sentences)

            lm_output_wrapper = TimeDistributed(
                HiddenLayer(lm_rnn_dim * 2, self.nums_chars + 2, activation='linear', name='lm_hidden'),
                name='lm_wrapper')
            lm_final_output = lm_output_wrapper(lm_rnn_output)
            self.lm_predictions.append([lm_final_output])
            self.lm_groundtruthes.append([tf.placeholder(tf.int32, [None, bucket], name='lm_targets' + str(bucket))])

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert \
            len(self.input_v) == len(self.output) and \
            len(self.output) == len(self.output_) and \
            len(self.lm_predictions) == len(self.lm_groundtruthes) and \
            len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Example #16
0
    def image_repr(self, x, rand, config):
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']

        layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x,
                                   image_shape=(3, 256, 256, batch_size),
                                   cropsize=227,
                                   rand=rand,
                                   mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(
            input=layer1_input,
            image_shape=(3, 227, 227, batch_size),
            filter_shape=(3, 11, 11, 96),
            convstride=4,
            padsize=0,
            group=1,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            lrn=True,
            lib_conv=lib_conv,
        )
        layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(
            input=convpool_layer1.output,
            image_shape=(96, 27, 27, batch_size),
            filter_shape=(96, 5, 5, 256),
            convstride=1,
            padsize=2,
            group=2,
            poolsize=3,
            poolstride=2,
            bias_init=0.1,
            lrn=True,
            lib_conv=lib_conv,
        )
        layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(
            input=convpool_layer2.output,
            image_shape=(256, 13, 13, batch_size),
            filter_shape=(256, 3, 3, 384),
            convstride=1,
            padsize=1,
            group=1,
            poolsize=1,
            poolstride=0,
            bias_init=0.0,
            lrn=False,
            lib_conv=lib_conv,
        )
        layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(
            input=convpool_layer3.output,
            image_shape=(384, 13, 13, batch_size),
            filter_shape=(384, 3, 3, 384),
            convstride=1,
            padsize=1,
            group=2,
            poolsize=1,
            poolstride=0,
            bias_init=0.1,
            lrn=False,
            lib_conv=lib_conv,
        )
        layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(
            input=convpool_layer4.output,
            image_shape=(384, 13, 13, batch_size),
            filter_shape=(384, 3, 3, 256),
            convstride=1,
            padsize=1,
            group=2,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            lrn=False,
            lib_conv=lib_conv,
        )
        layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = MaxoutLayer(input=fc_layer6_input, n_in=9216, n_out=4096)
        layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096)

        fc_layer7 = MaxoutLayer(input=dropout_layer6.output,
                                n_in=4096,
                                n_out=4096)
        layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        #dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096)

        # Rename weight types so that weights can be shared
        new_weight_types = []
        counter_W = 0
        counter_b = 0
        for w in weight_types:
            if w == 'W':
                new_weight_types.append('W' + str(counter_W))
                counter_W += 1
            elif w == 'b':
                new_weight_types.append('b' + str(counter_b))
                counter_b += 1
        weight_types = new_weight_types

        return fc_layer7, layers, params, weight_types
Example #17
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        n_lstm = params['n_hidden']
        n_out = params['n_output']
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "valid"
        cnn_batch_size = batch_size * sequence_length
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (64, 1, 9, 9)
        input_shape = (cnn_batch_size, 1, 120, 60
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1)
        retain_prob = 1. - p_1
        test_output = p1.output * retain_prob
        d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output)

        #Layer2: conv2+pool
        filter_shape = (128, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       d1_output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (128, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: hidden
        n_in = reduce(lambda x, y: x * y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)
        n_in = 1024
        rnn_input = h1.output.reshape((batch_size, sequence_length, n_in))

        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xr = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xi',
                                sample='glorot')
        self.W_hr = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hr',
                                sample='glorot')
        self.b_r = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_xz = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xz',
                                sample='glorot')
        self.W_hz = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hz',
                                sample='glorot')
        self.b_z = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_xh = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xh',
                                sample='glorot')
        self.W_hh = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hh',
                                sample='glorot')
        self.b_h = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_hy = init_weight((self.n_lstm, self.n_out),
                                rng=rng,
                                name='W_hy',
                                sample='glorot')
        self.b_y = init_bias(self.n_out, rng=rng, sample='zero')

        self.params = [
            self.W_xr, self.W_hr, self.b_r, self.W_xz, self.W_hz, self.b_z,
            self.W_xh, self.W_hh, self.b_h, self.W_hy, self.b_y
        ]

        def step_lstm(x_t, h_tm1):
            r_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r)
            z_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz) + self.b_z)
            h_t = T.tanh(
                T.dot(x_t, self.W_xh) + T.dot((r_t * h_tm1), self.W_hh) +
                self.b_h)
            hh_t = z_t * h_t + (1 - z_t) * h_tm1
            y_t = T.dot(hh_t, self.W_hy) + self.b_y
            return [hh_t, y_t]

        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_vals,
         y_vals], _ = theano.scan(fn=step_lstm,
                                  sequences=rnn_input.dimshuffle(1, 0, 2),
                                  outputs_info=[h0, None])

        self.output = y_vals.dimshuffle(1, 0, 2)

        self.params = c1.params + c2.params + c3.params + h1.params + self.params

        cost = get_err_fn(self, cost_function, Y)
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
Example #18
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, emb=None):
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'crf': self.crf, 'emb_dim': emb_dim,
                         'gru': gru, 'rnn_dim': rnn_dim, 'rnn_num': rnn_num, 'drop_out': drop_out, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram, 'is_space': self.is_space, 'sent_seg': self.sent_seg, 'emb_path': self.emb_path,
                         'tag_scheme': self.tag_scheme}
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        self.emb_layer = EmbeddingLayer(self.nums_chars + 20, emb_dim, weights=emb, name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 5000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags, activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            word_out = self.emb_layer(input_v)
            emb_set.append(word_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)

            else:
                emb_out = emb_set[0]

            emb_out = DropoutLayer(dr)(emb_out)
            emb_out = tf.unpack(emb_out)

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)
            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Example #19
0
    model.add_layer(
        Convolution(32, (3, 3),
                    input_shape=(batch_size, X_tr.shape[1], X_tr.shape[2],
                                 X_tr.shape[3]),
                    weight_initializer=NormalInitializer(std)))
    model.add_layer(ReLuActivation())
    model.add_layer(BatchNormalization())
    model.add_layer(
        Convolution(32, (3, 3),
                    weight_initializer=NormalInitializer(std),
                    padding='same'))

    model.add_layer(ReLuActivation())
    model.add_layer(MaxPool((2, 2)))
    model.add_layer(Flatten())

    model.add_layer(
        Affine(100, weight_initializer=NormalInitializer(std), reg=reg))
    model.add_layer(ReLuActivation())
    model.add_layer(DropoutLayer(drop_rate=0.3))
    model.add_layer(
        Affine(n_classes, weight_initializer=NormalInitializer(std), reg=reg))

    model.initialize(loss=CrossEntropyLoss(),
                     optimizer=Adam(learning_rate=0.001,
                                    decay_fst_mom=0.9,
                                    decay_sec_mom=0.999))
    # with open('model_90_49.14262959724404', 'rb') as file:
    #     model = pickle.load(file)
    model.fit(batch_size, X_tr, y_tr, n_epochs=100, metric=accuracy_metric)
Example #20
0
    def __init__(self, config):
        ModelBase.__init__(self)

        self.config = config
        self.verbose = self.config['verbose']
        self.name = 'alexnet'
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']
        n_softmax_out = config['n_softmax_out']
        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x = T.ftensor4('x')
        y = T.lvector('y')
        rand = T.fvector('rand')
        lr = T.scalar('lr')

        if self.verbose: print 'AlexNet 2/16'
        self.layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x,
                                   image_shape=(3, 256, 256, batch_size),
                                   cropsize=227,
                                   rand=rand,
                                   mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(input=layer1_input,
                                        image_shape=(3, 227, 227, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4,
                                        padsize=0,
                                        group=1,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, 27, 27, batch_size),
                                        filter_shape=(96, 5, 5, 256),
                                        convstride=1,
                                        padsize=2,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.1,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, 13, 13, batch_size),
                                        filter_shape=(256, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=1,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.1,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 256),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = FCLayer(input=fc_layer6_input,
                            n_in=9216,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        fc_layer7 = FCLayer(input=dropout_layer6.output,
                            n_in=4096,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output,
                                      n_in=4096,
                                      n_out=n_softmax_out,
                                      verbose=self.verbose)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################
        self.p_y_given_x = softmax_layer8.p_y_given_x
        self.y_pred = softmax_layer8.y_pred

        self.output = self.p_y_given_x

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.error = softmax_layer8.errors(y)
        if n_softmax_out < 5:
            self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out)
        else:
            self.error_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params

        # inputs
        self.x = x
        self.y = y
        self.rand = rand
        self.lr = lr
        self.shared_x = theano.shared(
            np.zeros(
                (3, config['input_width'], config['input_height'],
                 config['file_batch_size']),  # for loading large batch
                dtype=theano.config.floatX),
            borrow=True)

        self.shared_y = theano.shared(np.zeros((config['file_batch_size'], ),
                                               dtype=int),
                                      borrow=True)
        self.shared_lr = theano.shared(np.float32(config['learning_rate']))

        # training related
        self.base_lr = np.float32(config['learning_rate'])
        self.step_idx = 0
        self.mu = config['momentum']  # def: 0.9 # momentum
        self.eta = config['weight_decay']  #0.0002 # weight decay
        self.weight_types = weight_types
        self.batch_size = batch_size

        self.grads = T.grad(self.cost, self.params)

        subb_ind = T.iscalar('subb')  # sub batch index
        #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval()
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:, :, :, subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]
Example #21
0
    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   gru,
                   rnn_dim,
                   rnn_num,
                   fnn_dim,
                   window_size,
                   drop_out=0.5,
                   rad_dim=30,
                   emb=None,
                   ng_embs=None,
                   pixels=None,
                   con_width=None,
                   filters=None,
                   pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['fnn_dim'] = fnn_dim
            param_dic['window_size'] = window_size
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            param_dic['mode'] = self.mode
            #print param_dic
            if self.metric == 'All':
                pindex = trained_model.rindex('/') + 1
                for m in self.all_metrics:
                    f_model = open(
                        trained_model[:pindex] + m + '_' +
                        trained_model[pindex:], 'w')
                    pickle.dump(param_dic, f_model)
                    f_model.close()
            else:
                f_model = open(trained_model, 'w')
                pickle.dump(param_dic, f_model)
                f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        #concat_emb_dim = emb_dim * 2
        concat_emb_dim = 0

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500,
                                            emb_dim,
                                            weights=emb,
                                            name='emb_layer')
            concat_emb_dim += emb_dim

        if self.radical:
            self.radical_layer = EmbeddingLayer(216,
                                                rad_dim,
                                                name='radical_layer')
            concat_emb_dim += rad_dim

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 1000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))
                concat_emb_dim += emb_dim

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2 = None, None, None
        wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = Convolution(con_width, 1, filters, name='conv_1')
            wrapper_mp_1 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = Convolution(con_width,
                                         filters,
                                         filters,
                                         name='conv_2')
            wrapper_mp_2 = Maxpooling(pooling_size,
                                      pooling_size,
                                      name='pooling_2')
            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = HiddenLayer(p_size_2 * p_size_2 * filters,
                                        100,
                                        activation='tanh',
                                        name='conv_dense')
            wrapper_dr = DropoutLayer(self.drop_out)

            concat_emb_dim += 100

        fw_rnn_cell, bw_rnn_cell = None, None

        if self.mode == 'RNN':
            with tf.variable_scope('BiRNN'):

                if gru:
                    fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                    bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                else:
                    fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim,
                                                          state_is_tuple=True)

                if rnn_num > 1:
                    fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [fw_rnn_cell] * rnn_num, state_is_tuple=True)
                    bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(
                        [bw_rnn_cell] * rnn_num, state_is_tuple=True)

            output_wrapper = HiddenLayer(rnn_dim * 2,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')
            fnn_weights, fnn_bias = None, None

        else:

            with tf.variable_scope('FNN'):
                fnn_weights = tf.get_variable(
                    'conv_w',
                    [2 * window_size + 1, concat_emb_dim, 1, fnn_dim])
                fnn_bias = tf.get_variable(
                    'conv_b', [fnn_dim],
                    initializer=tf.constant_initializer(0.1))

            output_wrapper = HiddenLayer(fnn_dim,
                                         self.nums_tags[0],
                                         activation='linear',
                                         name='out_wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket],
                                     name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket],
                                         name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket],
                                             name='input_g' + str(i) +
                                             str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32,
                                         [None, bucket, pixel_dim * pixel_dim])
                self.input_p.append(input_p)
                pix_out = tf.reshape(input_p, [-1, pixel_dim, pixel_dim, 1])

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]

                pooling_out = tf.reshape(
                    pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(axis=2, values=emb_set)

            else:
                emb_out = emb_set[0]

            if self.mode == 'RNN':
                rnn_out = BiLSTM(rnn_dim,
                                 fw_cell=fw_rnn_cell,
                                 bw_cell=bw_rnn_cell,
                                 p=dr,
                                 name='BiLSTM' + str(bucket),
                                 scope='BiRNN')(emb_out, input_v)

                output = output_wrapper(rnn_out)

            else:
                emb_out = tf.pad(emb_out,
                                 [[0, 0], [window_size, window_size], [0, 0]])
                emb_out = tf.reshape(
                    emb_out, [-1, bucket + 2 * window_size, concat_emb_dim, 1])
                conv_out = tf.nn.conv2d(emb_out,
                                        fnn_weights, [1, 1, 1, 1],
                                        padding='VALID') + fnn_bias
                fnn_out = tf.nn.tanh(conv_out)
                fnn_out = tf.reshape(fnn_out, [-1, bucket, fnn_dim])

                output = output_wrapper(fnn_out)

            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket],
                               name='tags' + str(bucket))
            ])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) \
               and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Example #22
0
    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   cell,
                   rnn_dim,
                   rnn_num,
                   drop_out=0.5,
                   emb=None):
        if trained_model is not None:
            param_dic = {
                'nums_chars': self.nums_chars,
                'nums_tags': self.nums_tags,
                'crf': self.crf,
                'emb_dim': emb_dim,
                'cell': cell,
                'rnn_dim': rnn_dim,
                'rnn_num': rnn_num,
                'drop_out': drop_out,
                'buckets_char': self.buckets_char,
                'ngram': self.ngram,
                'is_space': self.is_space,
                'sent_seg': self.sent_seg,
                'emb_path': self.emb_path,
                'tag_scheme': self.tag_scheme
            }
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables
        batch_size_h = tf.placeholder(tf.int32, [], name='batch_size_holder')
        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.batch_size_h = batch_size_h
        self.drop_out = dr
        self.drop_out_v = drop_out
        # pdb.set_trace()
        self.emb_layer = EmbeddingLayer(self.nums_chars + 20,
                                        emb_dim,
                                        weights=emb,
                                        name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 5000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if cell == 'gru':
                fw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #forward
                bw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #backward
            else:
                fw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.contrib.rnn.MultiRNNCell([fw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.MultiRNNCell([bw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)

        output_wrapper = HiddenLayer(rnn_dim * 2,
                                     self.nums_tags,
                                     activation='linear',
                                     name='hidden')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()
            batch_size = self.real_batches[idx]

            input_v1 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_1' + str(bucket))
            input_v2 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_2' + str(bucket))
            self.input_v1.append([input_v1])
            self.input_v2.append([input_v2])
            #output = None
            output = []
            for i in range(self.num_gpus):
                with tf.device('/gpu:{}'.format(i)):
                    input_1 = input_v1[i * batch_size_h:(i + 1) * batch_size_h]

                    input_2 = input_v2[i * batch_size_h:(i + 1) * batch_size_h]

                    emb_set1 = []
                    emb_set2 = []

                    word_out1 = self.emb_layer(input_1)
                    word_out2 = self.emb_layer(input_2)
                    emb_set1.append(word_out1)
                    emb_set2.append(word_out2)

                    # if self.ngram is not None:
                    # 	for i in range(len(self.ngram)):
                    # 		input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    # 		self.input_v[-1].append(input_g)
                    # 		gram_out = self.gram_layers[i](input_g)
                    # 		emb_set.append(gram_out)

                    if len(emb_set1) > 1:
                        emb_out1 = tf.concat(axis=2, values=emb_set1)
                        emb_out2 = tf.concat(axis=2, values=emb_set2)

                    else:
                        emb_out1 = emb_set1[0]
                        emb_out2 = emb_set2[0]

                    emb_out1 = DropoutLayer(dr)(emb_out1)
                    emb_out2 = DropoutLayer(dr)(emb_out2)

                    rnn_out = BiLSTM(rnn_dim,
                                     fw_cell=fw_rnn_cell,
                                     bw_cell=bw_rnn_cell,
                                     p=dr,
                                     name='BiLSTM' + str(bucket),
                                     scope='BiRNN')(emb_out1, emb_out2,
                                                    input_v1)

                    output_g = output_wrapper(rnn_out)
                    # if output == None:
                    # output = output_g
                    # else:
                    # output = tf.concat([output,output_g],axis = 0)
                    #pdb.set_trace()
                    output.append(output_g)
            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket - 1],
                               name='tags' + str(bucket))
            ])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v1) == len(self.output)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Example #23
0
    def __init__(self,
                 input,
                 n_in=28**2,
                 n_hidden_1=1024,
                 n_hidden_2=1024,
                 n_hidden_3=1024,
                 n_hidden_4=1024,
                 n_out=10,
                 W_hidden_1=None,
                 W_hidden_2=None,
                 W_hidden_3=None,
                 W_hidden_4=None,
                 W_out=None,
                 dropout=0.0,
                 seed=None):

        relu_activation = lambda x: T.nnet.relu(x, 0.1)
        # relu_activation = T.nnet.relu

        seed = np.random.randint(int(1e5)) if seed is None else seed

        self.dropout_layer_1 = DropoutLayer(input=input,
                                            seed=seed,
                                            dropout=dropout)

        self.hidden_1 = HiddenLayer(
            seed=seed + 1,
            # input=input,
            input=self.dropout_layer_1.output,
            # input=self.dropout_layer.output,
            n_in=n_in,
            n_out=n_hidden_1,
            activation=relu_activation,
            W=W_hidden_1,
        )

        self.dropout_layer_2 = DropoutLayer(input=self.hidden_1.output,
                                            seed=seed + 2,
                                            dropout=dropout)

        self.hidden_2 = HiddenLayer(
            seed=seed + 3,
            # input=self.hidden_1.output,
            input=self.dropout_layer_2.output,
            n_in=n_hidden_1,
            n_out=n_hidden_2,
            activation=relu_activation,
            W=W_hidden_2)

        self.dropout_layer_3 = DropoutLayer(input=self.hidden_2.output,
                                            seed=seed + 4,
                                            dropout=dropout)

        self.hidden_3 = HiddenLayer(seed=seed + 5,
                                    input=self.dropout_layer_3.output,
                                    n_in=n_hidden_2,
                                    n_out=n_hidden_3,
                                    activation=relu_activation,
                                    W=W_hidden_3)

        self.dropout_layer_4 = DropoutLayer(input=self.hidden_3.output,
                                            seed=seed + 6,
                                            dropout=dropout)

        self.hidden_4 = HiddenLayer(seed=seed + 7,
                                    input=self.dropout_layer_4.output,
                                    n_in=n_hidden_3,
                                    n_out=n_hidden_4,
                                    activation=relu_activation,
                                    W=W_hidden_4)

        self.dropout_layer_5 = DropoutLayer(input=self.hidden_4.output,
                                            seed=seed + 8,
                                            dropout=dropout)

        self.linear_layer = HiddenLayer(
            seed=seed + 9,
            # input=self.hidden_1.output,
            # input=self.hidden_2.output,
            input=self.dropout_layer_5.output,
            n_in=n_hidden_4,
            n_out=n_out,
            activation=identity_map,
            W=W_out)

        self.softmax_layer = SoftmaxLayer(input=self.linear_layer.output)

        # keep track of model input
        self.input = input
        self.p_y_given_x = self.softmax_layer.p_y_given_x
        self.y_pred = self.softmax_layer.y_pred

        self.L1 = (abs(self.hidden_1.W).sum() + abs(self.hidden_2.W).sum() +
                   abs(self.hidden_3.W).sum() + abs(self.hidden_4.W).sum() +
                   abs(self.linear_layer.W).sum())

        self.L2_sqr = (T.sum(self.hidden_1.W**2) + T.sum(self.hidden_2.W**2) +
                       T.sum(self.hidden_3.W**2) + T.sum(self.hidden_4.W**2) +
                       T.sum(self.linear_layer.W**2))

        self.mean_log_likelihood = (self.softmax_layer.mean_log_likelihood)
        self.errors = self.softmax_layer.errors

        self.params = (self.hidden_1.params + self.hidden_2.params +
                       self.hidden_3.params + self.hidden_4.params +
                       self.linear_layer.params)