Esempio n. 1
0
    def __init__(self,
                 rng,
                 n_in,
                 n_out,
                 n_h,
                 f_act=leaky_relu,
                 f_out=softmax,
                 orth_init=True,
                 dropout_rate=0,
                 obj='c'):
        '''
        :param rng: Numpy RandomState
        :param n_in: Input dimension (int)
        :param n_out: Output dimension (int)
        :param n_h: Hidden dimension (int)
        :param f_act: Hidden-to-hidden activation function
        :param f_out: Output activation function
        :param orth_init: if true, the initialize transition matrix to be orthogonal (bool)
        :param dropout_rate: dropout rate (float)
        :param obj: objective type, 'c' for classification with cross entropy loss, 'r' for regression with MSE loss. (['c','r'])
        '''
        if orth_init:
            Whh_ = rvs(rng, n_h)
        else:
            Whh_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                               np.sqrt(6. / (n_h + n_h)), (n_h, n_h))

        Whi_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                           np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        bh_ = np.zeros(n_h)
        Woh_ = rng.uniform(-np.sqrt(6. / (n_out + n_h)),
                           np.sqrt(6. / (n_h + n_out)), (n_out, n_h))
        bo_ = np.zeros(n_out)
        h0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)),
                          n_h)

        # Theano: Created shared variables
        Whh = theano.shared(name='Whh',
                            value=Whh_.astype(theano.config.floatX))
        Whi = theano.shared(name='Whi',
                            value=Whi_.astype(theano.config.floatX))
        bh = theano.shared(name='bh', value=bh_.astype(theano.config.floatX))
        Woh = theano.shared(name='Woh',
                            value=Woh_.astype(theano.config.floatX))
        bo = theano.shared(name='bo', value=bo_.astype(theano.config.floatX))
        h0 = theano.shared(name='h0', value=h0_.astype(theano.config.floatX))

        self.p = [Whh, Whi, Woh, bh, bo, h0]

        seq_len = T.iscalar('seq_len')
        self.seq_len = seq_len
        self.dropout_rate = dropout_rate
        self.x = T.vector()
        x_scan = T.reshape(self.x, [seq_len, n_in], ndim=2)

        if dropout_rate > 0:
            np.random.seed(int(time.time()))

            # for training
            def masked_forward_prop_step(x_t, h_t_prev):
                h_t = f_act(Whi.dot(x_t) + Whh.dot(h_t_prev) + bh)
                o_t = Woh.dot(h_t) + bo
                mask = np.random.binomial(np.ones(n_h, dtype=int),
                                          1 - dropout_rate)
                masked_h_t = h_t * T.cast(mask, theano.config.floatX)

                return [o_t, masked_h_t]

            # for testing
            def forward_prop_step(x_t, h_t_prev):
                h_t = f_act(Whi.dot(x_t) + Whh.dot(h_t_prev) + bh)
                o_t = Woh.dot(h_t) + bo
                h_t = (1.0 - dropout_rate) * h_t
                return [o_t, h_t]

            [o_train, _], _ = theano.scan(masked_forward_prop_step,
                                          sequences=[x_scan],
                                          outputs_info=[None, h0],
                                          n_steps=seq_len)

            [o_test, _], _ = theano.scan(forward_prop_step,
                                         sequences=[x_scan],
                                         outputs_info=[None, h0],
                                         n_steps=seq_len)

        else:

            def forward_prop_step(x_t, h_t_prev):
                h_t = f_act(Whi.dot(x_t) + Whh.dot(h_t_prev) + bh)
                o_t = Woh.dot(h_t) + bo
                return [o_t, h_t]

            [o_train, _], _ = theano.scan(forward_prop_step,
                                          sequences=[x_scan],
                                          outputs_info=[None, h0],
                                          n_steps=seq_len)
            o_test = o_train

        if obj == 'c':  # classification task
            self.y = T.bscalar('y')
            self.o_train = f_out(o_train[-1])
            self.o_test = f_out(o_test[-1])
            #obj function to compute grad, use dropout
            self.cost = T.nnet.categorical_crossentropy(
                self.o_train,
                T.eye(n_out)[self.y])
            #compute accuracy use average of dropout rate
            self.accuracy = T.switch(T.eq(T.argmax(self.o_test), self.y), 1.,
                                     0.)
            self.prediction = np.argmax(self.o_test)
        elif obj == 'r':  # regression task
            self.y = T.dscalar('y')
            self.o_train = o_train[-1]
            self.o_test = o_test[-1]
            #obj function to compute grad, use dropout
            self.cost = (self.o_train[0] - self.y)**2
            #compute accuracy use average of dropout rate
            self.accuracy = (self.o_test[0] - self.y)**2
            self.prediction = self.o_test[0]

        _, self.Sigma, _ = T.nlinalg.SVD(full_matrices=1,
                                         compute_uv=1)(self.p[0])
        self.max_singular = T.max(self.Sigma)
        self.min_singular = T.min(self.Sigma)

        self.optimiser = sgd_optimizer(self, 'RNN')
Esempio n. 2
0
    def __init__(self, rng, n_in, n_out, n_h, n_layers, f_act=leaky_relu, obj='single', dropout_rate = 0):
        '''
        :param rng: Numpy RandomState
        :param n_in: Input dimension (int)
        :param n_out: Output dimension (int)
        :param n_h: Hidden dimension (int)
        :param n_layers: Number of hidden layers (int)
        :param f_act: Hidden-to-hidden activation function
        :param f_out: Output activation function
        '''
        if obj=='single':
            f_out = softmax
        elif obj=='multi':
            f_out = sigmoid
        self.x = T.vector()

        # construct hidden layers
        assert(n_layers>=1)
        first_hiddenLayer = HiddenLayer(
            rng=rng,
            input=self.x,
            predict_input=self.x,
            n_in=n_in,
            n_out=n_h,
            activation=f_act,
            dropout_rate = dropout_rate,
            nametag='0'
        )

        self.hidden_layers = [first_hiddenLayer]
        self.p = first_hiddenLayer.params[:]

        for i in range(n_layers-1):
            cur_hiddenLayer = ResNetLayer(
                rng=rng,
                input=self.hidden_layers[-1].output,
                predict_input=self.hidden_layers[-1].predict_output,
                n_h=n_h,
                activation=f_act,
                dropout_rate = dropout_rate,
                nametag=str(i+1)
                )
            self.hidden_layers.append(cur_hiddenLayer)
            self.p.extend(cur_hiddenLayer.params[:])

        # params for output layer

        self.outputLayer = HiddenLayer(
            rng=rng,
            input=self.hidden_layers[-1].output,
            predict_input=self.hidden_layers[-1].predict_output,
            n_in=n_h,
            n_out=n_out,
            activation=f_out,
            dropout_rate = 0,
            nametag='o'
        )
        self.p.extend(self.outputLayer.params[:])

        self.n_layers = n_layers + 1
        self.obj = obj
        if obj=='single':
            self.y = T.bscalar('y')
            self.o = self.outputLayer.output
            self.cost = T.nnet.categorical_crossentropy(self.o, T.eye(n_out)[self.y])
            self.accuracy = T.switch(T.eq(T.argmax(self.o), self.y), 1., 0.)
            self.prediction = np.argmax(self.o)
        elif obj=='multi':
            self.y = T.bvector('y')
            self.o = self.outputLayer.output
            self.cost = T.nnet.binary_crossentropy(self.o, self.y).mean()
            self.prediction = T.argsort(self.o)
            self.accuracy = self.y[T.argmax(self.o)]
            self.accuracy3 = (1.0/3.0) * (self.y[self.prediction[-3]]+self.y[self.prediction[-2]]+self.y[self.prediction[-1]])
            self.accuracy5 = (1.0/5.0) * (self.y[self.prediction[-5]]+self.y[self.prediction[-4]]+self.y[self.prediction[-3]]+self.y[self.prediction[-2]]+self.y[self.prediction[-1]])

        self.optimiser = sgd_optimizer(self, 'ResNet')
Esempio n. 3
0
    def __init__(self,
                 rng,
                 n_in,
                 n_out,
                 n_h,
                 n_r,
                 f_act=leaky_relu,
                 f_out=softmax,
                 obj='c'):
        '''
        :param rng: Numpy RandomState
        :param n_in: Input dimension (int)
        :param n_out: Output dimension (int)
        :param n_h: Hidden dimension (int)
        :param n_r: Number of reflection vectors (int)
        :param f_act: Hidden-to-hidden activation function
        :param f_out: Output activation function
        :param obj: objective type, 'c' for classification with cross entropy loss, 'r' for regression with MSE loss. (['c','r'])
        '''
        U_ = np.tril(rng.normal(0, 0.01, (n_h, n_r)))
        norms = np.linalg.norm(U_, axis=0)
        U_ = 1. / norms * U_

        Whi_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                           np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        bh_ = np.zeros(n_h)
        Woh_ = rng.uniform(-np.sqrt(6. / (n_out + n_h)),
                           np.sqrt(6. / (n_h + n_out)), (n_out, n_h))
        bo_ = np.zeros(n_out)
        h0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)),
                          n_h)

        # Theano: Created shared variables
        Whi = theano.shared(name='Whi',
                            value=Whi_.astype(theano.config.floatX))
        U = theano.shared(name='U', value=U_.astype(theano.config.floatX))
        bh = theano.shared(name='bh', value=bh_.astype(theano.config.floatX))
        Woh = theano.shared(name='Woh',
                            value=Woh_.astype(theano.config.floatX))
        bo = theano.shared(name='bo', value=bo_.astype(theano.config.floatX))
        h0 = theano.shared(name='h0', value=h0_.astype(theano.config.floatX))

        self.p = [U, Whi, Woh, bh, bo, h0]

        seq_len = T.iscalar('seq_len')
        self.seq_len = seq_len

        self.x = T.vector()
        #x_scan = T.shape_padright(self.x)
        x_scan = T.reshape(self.x, [seq_len, n_in], ndim=2)
        if n_h != n_r:  # Number of reflection vectors is less than the hidden dimension

            def forward_prop_step(x_t, h_t_prev):
                h_t = f_act(Whi.dot(x_t) + H_wy(U, h_t_prev) + bh)
                o_t = Woh.dot(h_t) + bo
                return [o_t, h_t]
        else:

            def forward_prop_step(x_t, h_t_prev):
                h_t_prev = T.set_subtensor(h_t_prev[-1],
                                           h_t_prev[-1] * U[-1, -1])
                h_t = f_act(Whi.dot(x_t) + H_wy(U[:, :-1], h_t_prev) + bh)
                o_t = Woh.dot(h_t) + bo
                return [o_t, h_t]

        ## For loop version below (when n_r < n_h)
        # def forward_prop_step(x_t, h_t_prev):
        #     Wh = h_t_prev
        #     for i in range(n_r):
        #         Wh -= 2. * U[:, n_r - i - 1] * T.dot(U[:, n_r - i - 1], Wh)
        #     h_t = f_act(Whi.dot(x_t) + Wh + bh)
        #     o_t = Woh.dot(h_t) + bo
        #     return [o_t, h_t]

        [o_scan, _], _ = theano.scan(forward_prop_step,
                                     sequences=[x_scan],
                                     outputs_info=[None, h0],
                                     n_steps=seq_len)

        if obj == 'c':  # classification task
            self.y = T.bscalar('y')
            self.o = f_out(o_scan[-1])
            #obj function to compute grad, use dropout
            self.cost = T.nnet.categorical_crossentropy(
                self.o,
                T.eye(n_out)[self.y])
            #compute accuracy use average of dropout rate
            self.accuracy = T.switch(T.eq(T.argmax(self.o), self.y), 1., 0.)
            self.prediction = np.argmax(self.o)
        elif obj == 'r':  # regression task
            self.y = T.dscalar('y')
            self.o = o_scan[-1]
            #obj function to compute grad, use dropout
            self.cost = (self.o[0] - self.y)**2
            #compute accuracy use average of dropout rate
            self.accuracy = (self.o[0] - self.y)**2
            self.prediction = self.o[0]

        self.optimiser = sgd_optimizer(self, 'oRNN')
Esempio n. 4
0
    def __init__(self,
                 rng,
                 n_in,
                 n_out,
                 n_h,
                 n_r,
                 margin=1.0,
                 sig_mean=1.0,
                 f_act=leaky_relu,
                 f_out=softmax,
                 obj='c'):
        '''
        :param rng: Numpy RandomState
        :param n_in: Input dimension (int)
        :param n_out: Output dimension (int)
        :param n_h: Hidden dimension (int)
        :param n_r: Number of reflection vectors (int)
        :param f_act: Hidden-to-hidden activation function
        :param f_out: Output activation function
        :param obj: objective type, 'c' for classification with cross entropy loss, 'r' for regression with MSE loss. (['c','r'])
        '''
        U_ = np.tril(rng.normal(0, 0.01, (n_h, n_r)))
        norms_U_ = np.linalg.norm(U_, axis=0)
        U_ = 1. / norms_U_ * U_

        V_ = np.tril(rng.normal(0, 0.01, (n_h, n_r)))
        norms_V_ = np.linalg.norm(V_, axis=0)
        V_ = 1. / norms_V_ * V_

        #Sig_ = np.ones( n_h)
        P_ = np.zeros(n_h)

        Whi_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                           np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        bh_ = np.zeros(n_h)
        Woh_ = rng.uniform(-np.sqrt(6. / (n_out + n_h)),
                           np.sqrt(6. / (n_h + n_out)), (n_out, n_h))
        bo_ = np.zeros(n_out)
        h0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)),
                          n_h)

        # Theano: Created shared variables
        Whi = theano.shared(name='Whi',
                            value=Whi_.astype(theano.config.floatX))
        U = theano.shared(name='U', value=U_.astype(theano.config.floatX))
        V = theano.shared(name='V', value=V_.astype(theano.config.floatX))
        #Sig = theano.shared(name='Sig', value=Sig_.astype(theano.config.floatX))
        P = theano.shared(name='P', value=P_.astype(theano.config.floatX))
        bh = theano.shared(name='bh', value=bh_.astype(theano.config.floatX))
        Woh = theano.shared(name='Woh',
                            value=Woh_.astype(theano.config.floatX))
        bo = theano.shared(name='bo', value=bo_.astype(theano.config.floatX))
        h0 = theano.shared(name='h0', value=h0_.astype(theano.config.floatX))

        #self.p = [U, V, Sig, Whi, Woh, bh, bo, h0]
        self.p = [U, V, P, Whi, Woh, bh, bo, h0]
        seq_len = T.iscalar('seq_len')
        self.seq_len = seq_len

        self.x = T.vector()
        #x_scan = T.shape_padright(self.x)
        x_scan = T.reshape(self.x, [seq_len, n_in], ndim=2)
        if n_h != n_r:  # Number of reflection vectors is less than the hidden dimension

            def forward_prop_step(x_t, h_t_prev):
                Sig = 2 * margin * (sigmoid(P) - 0.5) + sig_mean
                h_t = f_act(Whi.dot(x_t) + svd_H_wy(U, V, Sig, h_t_prev) + bh)
                o_t = Woh.dot(h_t) + bo
                return [o_t, h_t]
        else:

            def forward_prop_step(x_t, h_t_prev):
                Sig = 2 * margin * (sigmoid(P) - 0.5) + sig_mean
                Hu1SigHv1 = T.set_subtensor(Sig[-1],
                                            Sig[-1] * U[-1, -1] * V[-1, -1])
                h_t = f_act(
                    Whi.dot(x_t) +
                    svd_H_wy(U[:, :-1], V[:, :-1], Hu1SigHv1, h_t_prev) + bh)
                o_t = Woh.dot(h_t) + bo
                return [o_t, h_t]

        [o_scan, _], _ = theano.scan(forward_prop_step,
                                     sequences=[x_scan],
                                     outputs_info=[None, h0],
                                     n_steps=seq_len)

        if obj == 'c':  # classification task
            self.y = T.bscalar('y')
            self.o = f_out(o_scan[-1])
            #obj function to compute grad, use dropout
            self.cost = T.nnet.categorical_crossentropy(
                self.o,
                T.eye(n_out)[self.y])
            #compute accuracy use average of dropout rate
            self.accuracy = T.switch(T.eq(T.argmax(self.o), self.y), 1., 0.)
            self.prediction = np.argmax(self.o)
        elif obj == 'r':  # regression task
            self.y = T.dscalar('y')
            self.o = o_scan[-1]
            #obj function to compute grad, use dropout
            self.cost = (self.o[0] - self.y)**2
            #compute accuracy use average of dropout rate
            self.accuracy = (self.o[0] - self.y)**2
            self.prediction = self.o[0]

        self.max_singular = 2 * margin * (sigmoid(T.max(self.p[2])) -
                                          0.5) + sig_mean
        self.min_singular = 2 * margin * (sigmoid(T.min(self.p[2])) -
                                          0.5) + sig_mean

        self.optimiser = sgd_optimizer(self, 'svdRNN')
Esempio n. 5
0
    def __init__(self,
                 rng,
                 n_in,
                 n_out,
                 n_h,
                 dropout=0,
                 sigma_g=sigmoid,
                 sigma_c=hyperbolic_tangent,
                 sigma_h=hyperbolic_tangent,
                 sigma_y=softmax,
                 dropout_rate=0,
                 obj='c'):
        '''
        :param rng: Numpy RandomState
        :param n_in: Input dimension (int)
        :param n_out: Output dimension (int)
        :param n_h: Hidden dimension (int)
        :param sigma_g, sigma_c, sigma_h, sigma_y: activation functions
        :param dropout_rate: dropout rate (float)
        :param obj: objective type, 'c' for classification with cross entropy loss, 'r' for regression with MSE loss. (['c','r'])
        '''

        Wf_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Uf_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bf_ = np.zeros(n_h)

        Wi_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Ui_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bi_ = np.zeros(n_h)

        Wo_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Uo_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bo_ = np.zeros(n_h)

        Wc_ = rng.uniform(-np.sqrt(6. / (n_in + n_h)),
                          np.sqrt(6. / (n_in + n_h)), (n_h, n_in))
        Uc_ = rng.uniform(-np.sqrt(6. / (n_h + n_h)),
                          np.sqrt(6. / (n_h + n_h)), (n_h, n_h))
        bc_ = np.zeros(n_h)

        Wy_ = rng.uniform(-np.sqrt(6. / (n_out + n_h)),
                          np.sqrt(6. / (n_out + n_h)), (n_out, n_h))
        by_ = np.zeros(n_out)

        h0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)),
                          n_h)
        c0_ = rng.uniform(-np.sqrt(3. / (2. * n_h)), np.sqrt(3. / (2. * n_h)),
                          n_h)

        # Theano: Created shared variables
        Wf = theano.shared(name='Wf', value=Wf_.astype(theano.config.floatX))
        Uf = theano.shared(name='Uf', value=Uf_.astype(theano.config.floatX))
        bf = theano.shared(name='bf', value=bf_.astype(theano.config.floatX))

        Wi = theano.shared(name='Wi', value=Wi_.astype(theano.config.floatX))
        Ui = theano.shared(name='Ui', value=Ui_.astype(theano.config.floatX))
        bi = theano.shared(name='bi', value=bi_.astype(theano.config.floatX))

        Wo = theano.shared(name='Wo', value=Wo_.astype(theano.config.floatX))
        Uo = theano.shared(name='Uo', value=Uo_.astype(theano.config.floatX))
        bo = theano.shared(name='bo', value=bo_.astype(theano.config.floatX))

        Wc = theano.shared(name='Wc', value=Wc_.astype(theano.config.floatX))
        Uc = theano.shared(name='Uc', value=Uc_.astype(theano.config.floatX))
        bc = theano.shared(name='bc', value=bc_.astype(theano.config.floatX))

        Wy = theano.shared(name='Wy', value=Wy_.astype(theano.config.floatX))
        by = theano.shared(name='by', value=by_.astype(theano.config.floatX))

        h0 = theano.shared(name='h0', value=h0_.astype(theano.config.floatX))
        c0 = theano.shared(name='c0', value=c0_.astype(theano.config.floatX))

        self.p = [
            Wf, Uf, bf, Wi, Ui, bi, Wo, Uo, bo, Wc, Uc, bc, Wy, by, c0, h0
        ]

        seq_len = T.iscalar('seq_len')
        self.seq_len = seq_len

        self.x = T.vector()
        x_scan = T.reshape(self.x, [seq_len, n_in], ndim=2)

        if dropout_rate > 0:
            np.random.seed(int(time.time()))

            # for training
            def masked_forward_prop_step(x_t, h_t_prev, c_t_prev):
                f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf)
                i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi)
                o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo)
                c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc)
                c_t += c_t_prev * f_t
                h_t = o_t * sigma_h(c_t)
                y_t = Wy.dot(h_t) + by
                mask = np.random.binomial(np.ones(n_h, dtype=int),
                                          1.0 - dropout_rate)
                masked_h_t = h_t * T.cast(mask, theano.config.floatX)

                return [y_t, masked_h_t, c_t]

            # for testing
            def forward_prop_step(x_t, h_t_prev, c_t_prev):
                f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf)
                i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi)
                o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo)
                c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc)
                c_t += c_t_prev * f_t
                h_t = o_t * sigma_h(c_t)
                h_t = (1.0 - dropout_rate) * h_t
                y_t = Wy.dot(h_t) + by

                return [y_t, h_t, c_t]

            [o_train, _, _], _ = theano.scan(masked_forward_prop_step,
                                             sequences=[x_scan],
                                             outputs_info=[None, h0, c0],
                                             n_steps=seq_len)

            [o_test, _, _], _ = theano.scan(forward_prop_step,
                                            sequences=[x_scan],
                                            outputs_info=[None, h0, c0],
                                            n_steps=seq_len)

        else:

            def forward_prop_step(x_t, h_t_prev, c_t_prev):
                f_t = sigma_g(Wf.dot(x_t) + Uf.dot(h_t_prev) + bf)
                i_t = sigma_g(Wi.dot(x_t) + Ui.dot(h_t_prev) + bi)
                o_t = sigma_g(Wo.dot(x_t) + Uo.dot(h_t_prev) + bo)
                c_t = i_t * sigma_c(Wc.dot(x_t) + Uc.dot(h_t_prev) + bc)
                c_t += c_t_prev * f_t
                h_t = o_t * sigma_h(c_t)
                y_t = Wy.dot(h_t) + by

                return [y_t, h_t, c_t]

            [o_train, _, _], _ = theano.scan(forward_prop_step,
                                             sequences=[x_scan],
                                             outputs_info=[None, h0, c0],
                                             n_steps=seq_len)
            o_test = o_train

        if obj == 'c':  # classification task
            self.y = T.bscalar('y')
            self.o_train = sigma_y(o_train[-1])
            self.o_test = sigma_y(o_test[-1])
            #obj function to compute grad, use dropout
            self.cost = T.nnet.categorical_crossentropy(
                self.o_train,
                T.eye(n_out)[self.y])
            #compute accuracy use average of dropout rate
            self.accuracy = T.switch(T.eq(T.argmax(self.o_test), self.y), 1.,
                                     0.)
            self.prediction = np.argmax(self.o_test)
        elif obj == 'r':  # regression task
            self.y = T.dscalar('y')
            self.o_train = o_train[-1]
            self.o_test = o_test[-1]
            #obj function to compute grad, use dropout
            self.cost = (self.o_train[0] - self.y)**2
            #compute accuracy use average of dropout rate
            self.accuracy = (self.o_test[0] - self.y)**2
            self.prediction = self.o_test[0]

        self.optimiser = sgd_optimizer(self, 'LSTM')