def SCAE_preTrain(batch_size=1, nkerns= [3, 4], dataset=None, n_epochs=70, k_Top=5, learning_rate=1e-1, binary=True):
    """
        Stacked Convolutional AutoEncoders.
    """

    with open('SCAE_MR_1e-1K34', 'r') as f:
        rval = cPickle.load(f)
    return rval

    if dataset is None:
        dataset = Preprocess().load_data(binary)
    train_set_x = dataset[0][0]
    train_set_z = dataset[0][2]
    n_train_batch = train_set_x.get_value(borrow=True).shape[0]
    n_train_batch /= batch_size

    print '... Building Stacked Conv. AutoEncoders'

    rng = numpy.random.RandomState(96813)
    index = T.lscalar('index')
    
    x = T.dmatrix('Input Sentence')
    z = T.iscalar('Sentence length')
    layer0_input = x[:, :z*50].reshape((batch_size, 1, 50, -1))
    
    layer0 = SCAE(rng, input=layer0_input, image_shape=None, filter_shape=(nkerns[0], 1, 1, 8), \
                                        factor=.5, s=z, k_Top=k_Top, do_fold=False)

    layer1_input = layer0.get_hidden_values(layer0_input)
    layer1 = SCAE(rng, input=layer1_input, filter_shape=(nkerns[1], nkerns[0], 1, 5), \
                        image_shape=None, factor = .0, s=z, k_Top=k_Top, do_fold=False)

    layer1_output = layer1.get_hidden_values(layer1_input)
    
    hidden_input = layer1_output.flatten(2)
    layer2 = AE(rng, input=hidden_input, n_visible=layer1.kshp[0]*50*k_Top, n_hidden=100)
    
    Y = layer2.get_hidden_values(hidden_input)
    
    ################
    #   DECODING   #
    ################

    decode_hidden_layer = layer2.get_reconstructed_input(Y)
    decode_input = decode_hidden_layer.reshape(layer1.shape)
    
    decode_layer1 = layer1.get_reconstructed_input(decode_input)
    Z = layer0.get_reconstructed_input(decode_layer1)

    params = layer2.params + layer1.params + layer0.params
    
    def get_cost_updates(X, Z, params, learning_rate):
        ''' Update the Stacked Convolutional Auto-Encoders. '''
        
        L = T.sum((X-Z) ** 2, axis=(1,2,3))
        cost = T.mean(L)
        
        gparams = T.grad(cost, params)
        
        rho = 1e-7
        G = [(theano.shared(value=numpy.zeros_like(param.get_value()), name="AdaGrad_" + param.name, borrow=True)) for param in params]
        G_update = [T.add(g_adag, T.sqr(grad_i)) for g_adag, grad_i in zip(G, gparams)]
        
        updates = []
        for param_i, g_update, grad_i, g in zip(params, G_update, gparams, G):
            updates.append((param_i, param_i - learning_rate * grad_i / T.sqrt(g_update) ))
            updates.append((g, g_update))
        
        return (cost, updates)
    
    cost, updates = get_cost_updates(layer0_input, Z, params, learning_rate)
    
    train_model = theano.function([index], cost, updates=updates, \
                        givens={x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                z: train_set_z[index]})

    print '... Pretraining model'

    plot_SCAE = []
    epoch = 0
    while epoch < n_epochs:
        epoch += 1
        for minibatch in xrange(n_train_batch):
            cost_ij = train_model(minibatch)
        print '\tepoch %i,\tcost  %f' % (epoch, cost_ij)
        plot_SCAE.append(cost_ij)
    plot('SCAE_Movie Results.', numpy.asarray(plot_SCAE), 74e-2)

    # Serialise the learned parameters
    with open('SCAE_MR_1e-1K%i%i'%(nkerns[0], nkerns[1]), 'wb') as f:
        cPickle.dump(params, f)
    return params
class SCAE(object):
    def __init__(self, rng, input, filter_shape, image_shape, factor, s, k_Top=5, do_fold=True):

        #   Input will be image_shape, filter_shape, input, rng
        self.kshp = filter_shape
        self.imshp = None
        self.i_kshp = (self.kshp[1], self.kshp[0], self.kshp[2], self.kshp[3])
        self.i_imshp = None
        self.do_fold = do_fold
        self.k_Top = k_Top
        self.factor = factor
        self.s = s
        self.rng = rng

        fan_in = numpy.prod(filter_shape[1:])
        fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])

        # initialize weights with random weights
        W_bound = numpy.sqrt(6.0 / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX  #   2, 1, 1, 3
            ),
            name="conv_W",
            borrow=True,
        )

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, name="conv_b", borrow=True)

        self.c = theano.shared(value=0.0, name="deconv_c")

        self.W_tilde = self.W[:, :, ::-1, ::-1].dimshuffle(1, 0, 2, 3)

        if input == None:
            self.x = T.dmatrix(name="input")
        else:
            self.x = input

        self.params = [self.W, self.b, self.c]

    def Fold(self, conv_out, ds=(2, 1)):
        """Fold into two. (Sum up vertical neighbours)"""
        imgs = images2neibs(
            conv_out, T.as_tensor_variable(ds), mode="ignore_borders"
        )  # Correct 'mode' if there's a typo!
        orig = conv_out.shape
        shp = (orig[0], orig[1], T.cast(orig[2] / 2, "int32"), orig[3])
        res = T.reshape(T.sum(imgs, axis=-1), shp)
        return res

    def kmaxPool(self, conv_out, pool_shape, k):
        """
        Perform k-max Pooling.
        """
        n0, n1, d, size = pool_shape
        imgs = images2neibs(conv_out, T.as_tensor_variable((1, size)))

        indices = T.argsort(T.mul(imgs, -1))
        self.k_max_indices = T.sort(indices[:, :k])

        S = T.arange(d * n1 * n0).reshape((d * n1 * n0, 1))
        return imgs[S, self.k_max_indices].reshape((n0, n1, d, k))

    def unpooling(self, Y_4D, Z, X_4D):
        """ This method reverses pooling operation.
            """
        Y = images2neibs(Y_4D, T.as_tensor_variable((1, Y_4D.shape[3])))
        X = images2neibs(X_4D, T.as_tensor_variable((1, X_4D.shape[3])))
        X_z = T.zeros_like(X)
        X_ = T.set_subtensor(X_z[T.arange(X.shape[0]).reshape((X.shape[0], 1)), Z], Y)

        return X_.reshape(X_4D.shape)

    def Output(self):

        #  Convolve input with trained parameters.
        conv_out = conv.conv2d(
            input=self.x, filters=self.W, border_mode="full", filter_shape=self.kshp, image_shape=self.imshp
        )
        # Fold conv result into two.
        if self.do_fold:
            fold = self.Fold(conv_out)

        # k-max pooling.
        k = T.cast(T.max((self.k_Top, T.ceil(self.factor * self.s))), "int32")
        if self.do_fold:
            pool_shape = fold.shape
            pooled_out = self.kmaxPool(fold, pool_shape, k)
        else:
            pool_shape = conv_out.shape
            pooled_out = self.kmaxPool(conv_out, pool_shape, k)

        return T.tanh(pooled_out + self.b.dimshuffle("x", 0, "x", "x"))

    def get_hidden_values(self, input):

        # convolve input feature maps with filters
        self.conv_out = conv.conv2d(
            input=input, filters=self.W, border_mode="full", filter_shape=self.kshp, image_shape=self.imshp
        )

        # k-max pooling.
        k = T.cast(T.max((self.k_Top, T.ceil(self.factor * self.s))), "int32")
        pool_shape = self.conv_out.shape
        pool = self.kmaxPool(self.conv_out, pool_shape, k)

        output = T.tanh(pool + self.b.dimshuffle("x", 0, "x", "x"))
        self.shape = output.shape

        hidden_input = output.flatten(2)
        self.fully_connected = AE(
            (self.rng), input=hidden_input, n_visible=self.kshp[0] * 25 * self.k_Top, n_hidden=60
        )  # nkerns[0] replaced with 8
        self.params.extend(self.fully_connected.params)

        return self.fully_connected.get_hidden_values(hidden_input)

    def get_reconstructed_input(self, hidden, start):

        reconstruct_AE = self.fully_connected.get_reconstructed_input(hidden)
        hidden_NN = reconstruct_AE.reshape(self.shape)

        unpool = self.unpooling(hidden_NN, self.k_max_indices, start)
        deconv = conv.conv2d(input=unpool, filters=self.W_tilde, filter_shape=self.i_kshp, image_shape=None)

        return T.tanh(deconv + self.c.dimshuffle("x", "x", "x", "x"))
        # return val*(val>0)

    def get_cost_updates(self, learning_rate):

        y = self.get_hidden_values(self.x)
        z = self.get_reconstructed_input(y, self.conv_out)

        L = T.sum((self.x - z) ** 2, axis=(1, 2, 3))
        cost = T.mean(L)

        gparams = T.grad(cost, self.params)

        rho = 1e-7
        G = [
            (theano.shared(value=numpy.zeros_like(param.get_value()), name="AdaGrad_" + param.name, borrow=True))
            for param in self.params
        ]
        G_update = [T.add(g_adag, T.sqr(grad_i)) for g_adag, grad_i in zip(G, gparams)]

        updates = []
        for param_i, g_update, grad_i, g in zip(self.params, G_update, gparams, G):
            updates.append((param_i, param_i - learning_rate * grad_i / T.sqrt(g_update)))
            updates.append((g, g_update))

        return (cost, updates)