Words = theano.shared(value = U, name = "Words")
    zero_vec_tensor = T.vector()
    layer0_input = Words[T.cast(x.flatten(),dtype="int32")].reshape((x.shape[0],1,x.shape[1],Words.shape[1]))                                  
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, img_h, img_w),
                                filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs,1)
    hidden_units[0] = feature_maps*len(filter_hs)    
    classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate)
    classifier.params[0].set_value(savedparams[0])
    classifier.params[1].set_value(savedparams[1])
    k = 2
    for conv_layer in conv_layers:
        conv_layer.params[0].set_value( savedparams[k])
        conv_layer.params[1].set_value( savedparams[k+1])
        k = k + 2

    test_set_x = datasets[0][:,:img_h] 
    test_set_y = np.asarray(datasets[0][:,-1],"int32")



    test_pred_layers = []
    test_size = 1
Ejemplo n.º 2
0
def build_model(U,
                img_h,
                img_w=300,
                filter_hs=[1, 2, 3],
                hidden_units=[100, 10],
                dropout_rate=0.5,
                batch_size=50,
                conv_non_linear="relu",
                activation=Iden,
                sqr_norm_lim=9,
                non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = token vector length (300 for token2vec)
    filter_hs = filter window sizes    
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """
    rng = np.random.RandomState(3435)
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes), ("pool size", pool_sizes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("conv_non_linear", conv_non_linear),
                  ("sqr_norm_lim", sqr_norm_lim)]
    print(parameters)
    logging.info("start")
    logging.info('Records: %s', parameters)

    #define model architecture
    x = T.imatrix('x')
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    layer0_input = Words[x.flatten()].reshape(
        (x.shape[0], 1, x.shape[1], Words.shape[1]))

    conv_layers = []
    layer1_inputs = []
    for i in range(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=[activation],
                            dropout_rates=[dropout_rate])

    return x, y, Words, conv_layers, classifier
def train_conv_net(datasets,
                   U,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 2],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25,
                   batch_size=50,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0]) - 1
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch)]
    print parameters

    #define model architecture
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)
    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], 1, x.shape[1], Words.shape[1]))
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    #define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        #if word vectors are allowed to change, add them as model parameters
        params += [Words]
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)

    #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate
    #extra data (at random)
    np.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = np.random.permutation(datasets[0])
        extra_data = train_set[:extra_data_num]
        new_data = np.append(datasets[0], extra_data, axis=0)
    else:
        new_data = datasets[0]
    new_data = np.random.permutation(new_data)
    n_batches = new_data.shape[0] / batch_size
    n_train_batches = int(np.round(n_batches * 0.9))
    #divide train set into train/val sets
    test_set_x = datasets[1][:, :img_h]
    test_set_y = np.asarray(datasets[1][:, -1], "int32")
    train_set = new_data[:n_train_batches * batch_size, :]
    val_set = new_data[n_train_batches * batch_size:, :]
    train_set_x, train_set_y = shared_dataset(
        (train_set[:, :img_h], train_set[:, -1]))
    val_set_x, val_set_y = shared_dataset((val_set[:, :img_h], val_set[:, -1]))
    n_val_batches = n_batches - n_train_batches
    val_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)

    #compile theano functions to get train/val/test errors
    test_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)
    train_model = theano.function(
        [index],
        cost,
        updates=grad_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)
    test_pred_layers = []
    test_size = test_set_x.shape[0]
    test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (test_size, 1, img_h, Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function([x, y],
                                     test_error,
                                     allow_input_downcast=True)

    #start training over mini-batches
    print '... training'
    epoch = 0
    best_val_perf = 0
    val_perf = 0
    test_perf = 0
    cost_epoch = 0
    while (epoch < n_epochs):
        epoch = epoch + 1
        if shuffle_batch:
            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in xrange(n_train_batches)]
        train_perf = 1 - np.mean(train_losses)
        val_losses = [val_model(i) for i in xrange(n_val_batches)]
        val_perf = 1 - np.mean(val_losses)
        print('epoch %i, train perf %f %%, val perf %f' %
              (epoch, train_perf * 100., val_perf * 100.))
        if val_perf >= best_val_perf:
            best_val_perf = val_perf
            test_loss = test_model_all(test_set_x, test_set_y)
            test_perf = 1 - test_loss
    return test_perf, params
Ejemplo n.º 4
0
def build_model(U,
                img_h1,
                img_h2,
                img_w=100,
                x1_filter_hs=[1, 2, 3],
                x2_filter_hs=[1, 2, 3],
                hidden_units=[100, 2],
                dropout_rate=0.5,
                batch_size=50,
                conv_non_linear="relu",
                activation=Iden,
                sqr_norm_lim=9,
                non_static=True):

    rng = np.random.RandomState(3435)
    filter_w = img_w
    feature_maps = hidden_units[0]
    x1_filter_shapes = []
    x2_filter_shapes = []
    pool_x1_sizes = []
    pool_x2_sizes = []
    ''' for different filters set the pools for both CNN'''
    ''' (note - this code creates the structures in a way to be handled easily
        in the lenet code) '''

    for filter_h in x1_filter_hs:
        x1_filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_x1_sizes.append((img_h1 - filter_h + 1, img_w - filter_w + 1))

    for filter_h in x2_filter_hs:
        x2_filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_x2_sizes.append((img_h2 - filter_h + 1, img_w - filter_w + 1))

    parameters = [("image x1 shape", img_h1, img_w),
                  ("image x2 shape", img_h2, img_w),
                  ("x1 filter shape", x1_filter_shapes),
                  ("x2 filter shape", x2_filter_shapes),
                  ("pool x1 size", pool_x1_sizes),
                  ("pool x2 size", pool_x2_sizes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("conv_non_linear", conv_non_linear),
                  ("sqr_norm_lim", sqr_norm_lim)]

    print parameters

    logger.error("start")
    logger.error('Records: %s', parameters)

    #define model architecture
    x1 = T.imatrix('x1')
    x2 = T.imatrix('x2')
    y = T.ivector('y')

    Words = theano.shared(value=U, name="Words")
    '''for the first layer input'''

    layer0_x1_input = Words[x1.flatten()].reshape(
        (x1.shape[0], 1, x1.shape[1], Words.shape[1]))
    layer0_x2_input = Words[x2.flatten()].reshape(
        (x2.shape[0], 1, x2.shape[1], Words.shape[1]))

    conv_layers = []
    conv_layers1 = []
    conv_layers2 = []

    x1_layer1_inputs = []
    x2_layer1_inputs = []
    '''creating two LeNetConvPoolLayers - for both CNN'''

    for i in xrange(len(x1_filter_hs)):
        x1_conv_layer = LeNetConvPoolLayer(rng,
                                           input=layer0_x1_input,
                                           image_shape=(batch_size, 1, img_h1,
                                                        img_w),
                                           filter_shape=x1_filter_shapes[i],
                                           poolsize=pool_x1_sizes[i],
                                           non_linear=conv_non_linear)
        x1_layer1_input = x1_conv_layer.output.flatten(2)
        conv_layers1.append(x1_conv_layer)
        x1_layer1_inputs.append(x1_layer1_input)

    for i in xrange(len(x2_filter_hs)):
        x2_conv_layer = LeNetConvPoolLayer(rng,
                                           input=layer0_x2_input,
                                           image_shape=(batch_size, 1, img_h2,
                                                        img_w),
                                           filter_shape=x2_filter_shapes[i],
                                           poolsize=pool_x2_sizes[i],
                                           non_linear=conv_non_linear)
        x2_layer1_input = x2_conv_layer.output.flatten(2)
        conv_layers2.append(x2_conv_layer)
        x2_layer1_inputs.append(x2_layer1_input)
    ''' concatenating the output of the 2 CNN for softmax classification'''

    x2_layer1_inputs += x1_layer1_inputs
    layer1_input = T.concatenate(x2_layer1_inputs, 1)
    hidden_units[0] = feature_maps * (len(x2_filter_hs) + len(x1_filter_hs))
    #  conv_layers = conv_layers1 + conv_layers2

    #TODO - instead of concat, try another function to combine the layers?

    #x1_layer1_input = T.concatenate(x1_layer1_inputs,1)
    #x2_layer1_input = T.concatenate(x2_layer1_inputs,1)
    #outer_prod = x1_layer1_input.dimshuffle(0,1,'x') * x2_layer1_input.dimshuffle(0,'x',1)
    #layer1_input = outer_prod.flatten(2)
    #hidden_units[0] = feature_maps*len(x1_filter_hs) * feature_maps*len(x2_filter_hs)

    #layer1_input = x1_layer1_input * x2_layer1_input
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=[activation],
                            dropout_rates=[dropout_rate])

    return x1, x2, y, Words, conv_layers1, conv_layers2, classifier
Ejemplo n.º 5
0
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)
    classifier.params[0].set_value(savedparams[0])
    classifier.params[1].set_value(savedparams[1])
    k = 2
    for conv_layer in conv_layers:
        conv_layer.params[0].set_value(savedparams[k])
        conv_layer.params[1].set_value(savedparams[k + 1])
        k = k + 2

    test_set_x = datasets[0][:, :img_h]
    test_set_y = np.asarray(datasets[0][:, -1], "int32")

    test_pred_layers = []
    test_size = 1
Ejemplo n.º 6
0
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(batch_size, 1, img_h,
                                                     img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    # define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        # if word vectors are allowed to change, add them as model parameters
        params += [Words]
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)
def train_conv_net(datasets,
                   U,
                   img_w=300, 
                   filter_hs=[3,4,5],
                   hidden_units=[100,2], 
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25, 
                   batch_size=50, 
                   lr_decay = 0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """    
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0])-1
    filter_w = img_w    
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h-filter_h+1, img_w-filter_w+1))
    parameters = [("image shape",img_h,img_w),("filter shape",filter_shapes), ("hidden_units",hidden_units),
                  ("dropout", dropout_rate), ("batch_size",batch_size),("non_static", non_static),
                    ("learn_decay",lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static)
                    ,("sqr_norm_lim",sqr_norm_lim),("shuffle_batch",shuffle_batch)]
    print parameters    
    
    #define model architecture
    index = T.lscalar()
    x = T.matrix('x')   
    y = T.ivector('y')
    Words = theano.shared(value = U, name = "Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)
    set_zero = theano.function([zero_vec_tensor], updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))],allow_input_downcast=True)
    layer0_input = Words[T.cast(x.flatten(),dtype="int32")].reshape((x.shape[0],1,x.shape[1],Words.shape[1]))                                  
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, img_h, img_w),
                                filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs,1)
    hidden_units[0] = feature_maps*len(filter_hs)    
    classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate)
    
    #define parameters of the model and update functions using adadelta
    params = classifier.params     
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        #if word vectors are allowed to change, add them as model parameters
        params += [Words]
    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)           
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim)
    
    #shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate 
    #extra data (at random)
    np.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = np.random.permutation(datasets[0])   
        extra_data = train_set[:extra_data_num]
        new_data=np.append(datasets[0],extra_data,axis=0)
    else:
        new_data = datasets[0]
    new_data = np.random.permutation(new_data)
    n_batches = new_data.shape[0]/batch_size
    n_train_batches = int(np.round(n_batches*0.9))
    #divide train set into train/val sets 
    test_set_x = datasets[1][:,:img_h] 
    test_set_y = np.asarray(datasets[1][:,-1],"int32")
    train_set = new_data[:n_train_batches*batch_size,:]
    val_set = new_data[n_train_batches*batch_size:,:]     
    train_set_x, train_set_y = shared_dataset((train_set[:,:img_h],train_set[:,-1]))
    val_set_x, val_set_y = shared_dataset((val_set[:,:img_h],val_set[:,-1]))
    n_val_batches = n_batches - n_train_batches
    val_model = theano.function([index], classifier.errors(y),
         givens={
            x: val_set_x[index * batch_size: (index + 1) * batch_size],
            y: val_set_y[index * batch_size: (index + 1) * batch_size]},allow_input_downcast=True)
            
    #compile theano functions to get train/val/test errors
    test_model = theano.function([index], classifier.errors(y),
             givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]},allow_input_downcast=True)               
    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]},allow_input_downcast=True)     
    test_pred_layers = []
    test_size = test_set_x.shape[0]
    test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].reshape((test_size,1,img_h,Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function([x,y], test_error,allow_input_downcast=True)   
    
    #start training over mini-batches
    print '... training'
    epoch = 0
    best_val_perf = 0
    val_perf = 0
    test_perf = 0       
    cost_epoch = 0    
    while (epoch < n_epochs):        
        epoch = epoch + 1
        if shuffle_batch:
            for minibatch_index in np.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)  
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in xrange(n_train_batches)]
        train_perf = 1 - np.mean(train_losses)
        val_losses = [val_model(i) for i in xrange(n_val_batches)]
        val_perf = 1- np.mean(val_losses)                        
        print('epoch %i, train perf %f %%, val perf %f' % (epoch, train_perf * 100., val_perf*100.))
        if val_perf >= best_val_perf:
            best_val_perf = val_perf
            test_loss = test_model_all(test_set_x,test_set_y)        
            test_perf = 1- test_loss         
    return test_perf, params
Ejemplo n.º 8
0
def train_pos_cnn(datasets,
                  W,
                  P,
                  filter_hs,
                  hidden_units,
                  dropout_rates,
                  n_epochs,
                  batch_size,
                  lr_decay,
                  conv_non_linear,
                  activations,
                  sqr_norm_lim,
                  model):

    # print params
    parameters = [("num_filters", hidden_units[0]),
                  ("num_classes", hidden_units[1]),
                  ("filter_types", filter_hs),
                  ("dropout", dropout_rates),
                  ("num_epochs", n_epochs),
                  ("batch_size", batch_size),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("sqr_norm_lim", sqr_norm_lim),
                  ("model", model)]
    print parameters

    ##########################
    #   model architecture   #
    ##########################

    print 'building the model architecture...'
    index = T.lscalar()
    x = T.matrix('x')  # words
    y = T.ivector('y')  # labels
    z = T.matrix('z')  # tags
    curr_batch_size = T.lscalar()
    is_train = T.iscalar('is_train')  # 1=train, 0=test

    # set necessary variables
    rng = np.random.RandomState(3435)
    img_h = (len(datasets[0][0]) - 1) / 2  # input height = seq len
    feature_maps = hidden_units[0]  # num filters

    # EMBEDDING LAYER
    embedding_layer = EmbeddingLayer(rng, is_train, x, z, curr_batch_size, img_h, W, P, model, dropout_rates[0])
    layer0_input = embedding_layer.output
    img_w = embedding_layer.final_token_dim  # img w = filter width = input matrix width

    # set more variables
    filter_w = img_w  # filter width = input matrix width

    # construct filter shapes and pool sizes
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h-filter_h+1, img_w-filter_w+1))

    # CONV-POOL LAYER
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_shapes)):
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(None, 1, img_h, img_w),
                                        filter_shape=filter_shapes[i],
                                        poolsize=pool_sizes[i],
                                        non_linear=conv_non_linear)
        layer1_inputs.append(conv_layer.output.flatten(2))
        conv_layers.append(conv_layer)
    layer1_input = T.concatenate(layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_shapes)  # update the hidden units

    # OUTPUT LAYER (Dropout, Fully-Connected, Soft-Max)
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rate=dropout_rates[1])

    # UPDATE
    params = classifier.params + embedding_layer.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)  # use this to update
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim)

    ##########################
    #    dataset handling    #
    ##########################

    print 'handling dataset...'
    # train
    # if len(datasets[0]) % batch_size != 0:
    #     datasets[0] = np.random.permutation(datasets[0])
    #     to_add = batch_size - len(datasets[0]) % batch_size
    #     datasets[0] = np.concatenate((datasets[0], datasets[0][:to_add]))
    train_set_x, train_set_y, train_set_z = \
        shared_dataset((datasets[0][:, :img_h], datasets[0][:, -1], datasets[0][:, img_h:2*img_h]))
    n_train_batches = int(len(datasets[0]) / batch_size)
    if len(datasets[0]) % batch_size > 0:
        n_train_batches += 1

    # val
    # if len(datasets[1]) % batch_size != 0:
    #     datasets[1] = np.random.permutation(datasets[1])
    #     to_add = batch_size - len(datasets[1]) % batch_size
    #     datasets[1] = np.concatenate((datasets[1], datasets[1][:to_add]))
    val_set_x, val_set_y, val_set_z = \
        shared_dataset((datasets[1][:, :img_h], datasets[1][:, -1], datasets[1][:, img_h:2*img_h]))
    n_val_batches = int(len(datasets[1]) / batch_size)
    if len(datasets[1]) % batch_size > 0:
        n_val_batches += 1

    # test
    test_set_x, test_set_y, test_set_z = \
        shared_dataset((datasets[2][:, :img_h], datasets[2][:, -1], datasets[2][:, img_h:2*img_h]))
    n_test_batches = int(len(datasets[2]) / batch_size)
    if len(datasets[2]) % batch_size > 0:
        n_test_batches += 1

    ##########################
    #    theano functions    #
    ##########################

    print 'preparing theano functions...'
    zero_vec_tensor = T.vector()
    set_zero_word = theano.function([zero_vec_tensor],
                                    updates=[(embedding_layer.Words, T.set_subtensor(embedding_layer.Words[0, :], zero_vec_tensor))],
                                    allow_input_downcast=True)
    if model != 'notag':
        set_zero_pos = theano.function([zero_vec_tensor],
                                       updates=[(embedding_layer.Tags, T.set_subtensor(embedding_layer.Tags[0, :], zero_vec_tensor))],
                                       allow_input_downcast=True)
    val_model = theano.function([index, curr_batch_size], classifier.errors(y),
                                givens={
                                    x: val_set_x[index * batch_size: (index + 1) * batch_size],
                                    y: val_set_y[index * batch_size: (index + 1) * batch_size],
                                    z: val_set_z[index * batch_size: (index + 1) * batch_size],
                                    is_train: np.cast['int32'](0)},
                                allow_input_downcast=True, on_unused_input='ignore')
    train_eval_model = theano.function([index, curr_batch_size], classifier.errors(y),
                                       givens={
                                           x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                           y: train_set_y[index * batch_size: (index + 1) * batch_size],
                                           z: train_set_z[index * batch_size: (index + 1) * batch_size],
                                           is_train: np.cast['int32'](0)},
                                       allow_input_downcast=True, on_unused_input='ignore')
    train_model = theano.function([index, curr_batch_size], cost, updates=grad_updates,
                                  givens={
                                      x: train_set_x[index*batch_size:(index+1)*batch_size],
                                      y: train_set_y[index*batch_size:(index+1)*batch_size],
                                      z: train_set_z[index*batch_size:(index+1)*batch_size],
                                      is_train: np.cast['int32'](1)},
                                  allow_input_downcast=True, on_unused_input='ignore')
    test_model = theano.function([index, curr_batch_size], classifier.errors(y),
                                 givens={
                                     x: test_set_x[index * batch_size: (index + 1) * batch_size],
                                     y: test_set_y[index * batch_size: (index + 1) * batch_size],
                                     z: test_set_z[index * batch_size: (index + 1) * batch_size],
                                     is_train: np.cast['int32'](0)},
                                 allow_input_downcast=True, on_unused_input='ignore')

    ##########################
    #        training        #
    ##########################

    print 'training...'
    epoch = 0
    best_val_perf = 0
    best_test_perf = 0
    best_epoch = 0
    num_epochs_decrease = 0
    prev_val_perf = 0

    while epoch < n_epochs:
        start_time = time.time()
        epoch += 1
        step = 1
        for minibatch_index in np.random.permutation(range(n_train_batches)):
            cost = train_model(minibatch_index, min(batch_size, len(datasets[0])-minibatch_index*batch_size))
            set_zero_word(np.zeros(W.shape[1]))
            if model != 'notag':
                set_zero_pos(np.zeros(P.shape[1]))
            step += 1
        train_losses = [train_eval_model(i, min(batch_size, len(datasets[0])-i*batch_size)) for i in xrange(n_train_batches)]
        train_perf = 1 - np.mean(train_losses)
        val_losses = [val_model(i, min(batch_size, len(datasets[1])-i*batch_size)) for i in xrange(n_val_batches)]
        val_perf = 1 - np.mean(val_losses)
        test_losses = [test_model(i, min(batch_size, len(datasets[2])-i*batch_size)) for i in xrange(n_test_batches)]
        test_loss = np.mean(test_losses)
        test_perf = 1 - test_loss

        print 'epoch: {}, time: {} secs, train: {}, val: {}, test: {}'\
            .format(epoch, time.time() - start_time, train_perf * 100., val_perf * 100., test_perf * 100.)

        if val_perf > best_val_perf or (val_perf == best_val_perf and test_perf > best_test_perf):
            best_val_perf = val_perf
            best_test_perf = test_perf
            best_epoch = epoch

        # early stop
        if val_perf < prev_val_perf:
            num_epochs_decrease += 1
        else:
            num_epochs_decrease = 0
        if num_epochs_decrease >= 3:
            break
        prev_val_perf = val_perf

    return best_test_perf, best_val_perf, best_epoch
Ejemplo n.º 9
0
def train_conv_net(datasets,
                   U,
                   ofile,
                   cv=0,
                   attr=0,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 2],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25,
                   batch_size=50,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True):
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes
    hidden_units = [x,y] x is the number of feature maps (per filter window), and y is the penultimate layer
    sqr_norm_lim = s^2 in the paper
    lr_decay = adadelta decay parameter
    """
    rng = np.random.RandomState(3435)
    img_h = len(datasets[0][0][0])
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch)]
    print(parameters)

    # define model architecture
    index = T.iscalar()
    x = T.tensor3('x', dtype=theano.config.floatX)
    y = T.ivector('y')
    mair = T.matrix('mair')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector(dtype=theano.config.floatX)
    zero_vec = np.zeros(img_w, dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)

    conv_layers = []

    for i in range(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        image_shape=None,
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        conv_layers.append(conv_layer)

    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0], x.shape[1], x.shape[2], Words.shape[1]))

    def convolve_user_statuses(statuses):
        layer1_inputs = []

        def sum_mat(mat, out):
            z = ifelse(
                T.neq(T.sum(mat, dtype=theano.config.floatX),
                      T.constant(0, dtype=theano.config.floatX)),
                T.constant(1, dtype=theano.config.floatX),
                T.constant(0, dtype=theano.config.floatX))
            return out + z, theano.scan_module.until(
                T.eq(z, T.constant(0, dtype=theano.config.floatX)))

        status_count, _ = theano.scan(fn=sum_mat,
                                      sequences=statuses,
                                      outputs_info=T.constant(
                                          0, dtype=theano.config.floatX))

        # Slice-out dummy (zeroed) sentences
        relv_input = statuses[:T.cast(status_count[-1], dtype='int32'
                                      )].dimshuffle(0, 'x', 1, 2)

        for conv_layer in conv_layers:
            layer1_inputs.append(
                conv_layer.set_input(input=relv_input).flatten(2))

        features = T.concatenate(layer1_inputs, axis=1)

        avg_feat = T.max(features, axis=0)

        return avg_feat

    conv_feats, _ = theano.scan(fn=convolve_user_statuses,
                                sequences=layer0_input)

    # Add Mairesse features
    layer1_input = T.concatenate([conv_feats, mair], axis=1)  ##mairesse_change
    hidden_units[0] = feature_maps * len(filter_hs) + datasets[4].shape[
        1]  ##mairesse_change
    classifier = MLPDropout(rng,
                            input=layer1_input,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    svm_data = T.concatenate(
        [classifier.layers[0].output,
         y.dimshuffle(0, 'x')], axis=1)
    # define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        # if word vectors are allowed to change, add them as model parameters
        params += [Words]
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)

    # shuffle dataset and assign to mini batches. if dataset size is not a multiple of mini batches, replicate
    # extra data (at random)
    np.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        rand_perm = np.random.permutation(range(len(datasets[0])))
        train_set_x = datasets[0][rand_perm]
        train_set_y = datasets[1][rand_perm]
        train_set_m = datasets[4][rand_perm]
        extra_data_x = train_set_x[:extra_data_num]
        extra_data_y = train_set_y[:extra_data_num]
        extra_data_m = train_set_m[:extra_data_num]
        new_data_x = np.append(datasets[0], extra_data_x, axis=0)
        new_data_y = np.append(datasets[1], extra_data_y, axis=0)
        new_data_m = np.append(datasets[4], extra_data_m, axis=0)
    else:
        new_data_x = datasets[0]
        new_data_y = datasets[1]
        new_data_m = datasets[4]
    rand_perm = np.random.permutation(range(len(new_data_x)))
    new_data_x = new_data_x[rand_perm]
    new_data_y = new_data_y[rand_perm]
    new_data_m = new_data_m[rand_perm]
    n_batches = new_data_x.shape[0] / batch_size
    n_train_batches = int(np.round(n_batches * 0.9))
    # divide train set into train/val sets
    test_set_x = datasets[2]
    test_set_y = np.asarray(datasets[3], "int32")
    test_set_m = datasets[5]
    train_set_x, train_set_y, train_set_m = shared_dataset(
        (new_data_x[:n_train_batches * batch_size],
         new_data_y[:n_train_batches * batch_size],
         new_data_m[:n_train_batches * batch_size]))
    val_set_x, val_set_y, val_set_m = shared_dataset(
        (new_data_x[n_train_batches * batch_size:],
         new_data_y[n_train_batches * batch_size:],
         new_data_m[n_train_batches * batch_size:]))
    n_val_batches = n_batches - n_train_batches
    val_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size],
            mair: val_set_m[index * batch_size:(index + 1) * batch_size]
        },  ##mairesse_change
        allow_input_downcast=False)

    # compile theano functions to get train/val/test errors
    test_model = theano.function(
        [index],
        [classifier.errors(y), svm_data],
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            mair: train_set_m[index * batch_size:(index + 1) * batch_size]
        },
        ##mairesse_change
        allow_input_downcast=True)
    train_model = theano.function(
        [index],
        cost,
        updates=grad_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            mair: train_set_m[index * batch_size:(index + 1) * batch_size]
        },
        ##mairesse_change
        allow_input_downcast=True)

    test_y_pred = classifier.predict(layer1_input)
    test_error = T.sum(T.neq(test_y_pred, y), dtype=theano.config.floatX)
    true_p = T.sum(test_y_pred * y, dtype=theano.config.floatX)
    false_p = T.sum(test_y_pred *
                    T.mod(y + T.ones_like(y, dtype=theano.config.floatX),
                          T.constant(2, dtype='int32')))
    false_n = T.sum(
        y * T.mod(test_y_pred + T.ones_like(y, dtype=theano.config.floatX),
                  T.constant(2, dtype='int32')))
    test_model_all = theano.function(
        [
            x,
            y,
            mair  ##mairesse_change
        ],
        [test_error, true_p, false_p, false_n, svm_data],
        allow_input_downcast=True)

    test_batches = test_set_x.shape[0] / batch_size

    # start training over mini-batches
    print('... training')
    epoch = 0
    best_val_perf = 0
    val_perf = 0
    test_perf = 0
    fscore = 0
    cost_epoch = 0
    while (epoch < n_epochs):
        start_time = time.time()
        epoch = epoch + 1
        if shuffle_batch:
            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in range(int(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in range(int(n_train_batches))]
        train_perf = 1 - np.mean([loss[0] for loss in train_losses])
        val_losses = [val_model(i) for i in range(int(n_val_batches))]
        val_perf = 1 - np.mean(val_losses)
        epoch_perf = 'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%' % (
            epoch, time.time() - start_time, train_perf * 100.,
            val_perf * 100.)
        print(epoch_perf)
        ofile.write(epoch_perf + "\n")
        ofile.flush()
        if val_perf >= best_val_perf:
            best_val_perf = val_perf
            test_loss_list = [
                test_model_all(
                    test_set_x[idx * batch_size:(idx + 1) * batch_size],
                    test_set_y[idx * batch_size:(idx + 1) * batch_size],
                    test_set_m[idx * batch_size:(idx + 1) *
                               batch_size]  ##mairesse_change
                ) for idx in range(int(test_batches))
            ]
            if test_set_x.shape[0] > test_batches * batch_size:
                test_loss_list.append(
                    test_model_all(
                        test_set_x[int(test_batches * batch_size):],
                        test_set_y[int(test_batches * batch_size):],
                        test_set_m[int(test_batches *
                                       batch_size):]  ##mairesse_change
                    ))
            test_loss_list_temp = test_loss_list
            test_loss_list = np.asarray([t[:-1] for t in test_loss_list])
            test_loss = np.sum(test_loss_list[:, 0]) / float(
                test_set_x.shape[0])
            test_perf = 1 - test_loss
            tp = np.sum(test_loss_list[:, 1])
            fp = np.sum(test_loss_list[:, 2])
            fn = np.sum(test_loss_list[:, 3])
            tn = test_set_x.shape[0] - (tp + fp + fn)
            fscore = np.mean([
                2 * tp / float(2 * tp + fp + fn),
                2 * tn / float(2 * tn + fp + fn)
            ])
            svm_test = np.concatenate([t[-1] for t in test_loss_list_temp],
                                      axis=0)
            svm_train = np.concatenate([t[1] for t in train_losses], axis=0)
            output = "Test result: accu: " + str(
                test_perf) + ", macro_fscore: " + str(fscore) + "\ntp: " + str(
                    tp) + " tn:" + str(tn) + " fp: " + str(fp) + " fn: " + str(
                        fn)
            print(output)
            ofile.write(output + "\n")
            ofile.flush()
            # dump train and test features
            pickle.dump(svm_test,
                        open("cvte" + str(attr) + str(cv) + ".p", "wb"))
            pickle.dump(svm_train,
                        open("cvtr" + str(attr) + str(cv) + ".p", "wb"))
        updated_epochs = refresh_epochs()
        if updated_epochs != None and n_epochs != updated_epochs:
            n_epochs = updated_epochs
            print('Epochs updated to ' + str(n_epochs))
    return test_perf, fscore
Ejemplo n.º 10
0
  def __init__(self):
    mrppath = os.path.join(this_dir, "mr.p")
    x = cPickle.load(open(mrppath,"rb"))
    revs, W, W2, word_idx_map, vocab = x[0], x[1], x[2], x[3], x[4]
    self.word_idx_map = word_idx_map
            
    U = W
    classifierpath = os.path.join(this_dir, "classifier.save")
    savedparams = cPickle.load(open(classifierpath,'rb'))

    filter_hs=[3,4,5]
    conv_non_linear="relu"
    hidden_units=[100,2]
    dropout_rate=[0.5]
    activations=[Iden]
    img_h = 56 + 4 + 4
    img_w = 300
    rng = np.random.RandomState(3435)
    batch_size=50
    filter_w = img_w    
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h-filter_h+1, img_w-filter_w+1))

#define model architecture
    x = T.matrix('x')   
    Words = theano.shared(value = U, name = "Words")
    zero_vec_tensor = T.vector()
    layer0_input = Words[T.cast(x.flatten(),dtype="int32")].reshape((x.shape[0],1,x.shape[1],Words.shape[1]))                                  
    conv_layers = []
    layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, img_h, img_w),
                                filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs,1)
    hidden_units[0] = feature_maps*len(filter_hs)    
    classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate)
    classifier.params[0].set_value(savedparams[0])
    classifier.params[1].set_value(savedparams[1])
    k = 2
    for conv_layer in conv_layers:
        conv_layer.params[0].set_value( savedparams[k])
        conv_layer.params[1].set_value( savedparams[k+1])
        k = k + 2

    test_pred_layers = []
    test_size = 1
    test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].reshape((test_size,1,img_h,Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict_p(test_layer1_input)
    #test_error = T.mean(T.neq(test_y_pred, y))
    self.model = theano.function([x],test_y_pred,allow_input_downcast=True)