Example #1
0
    def __init__(self, n_in, n_hid, n_out, lr=0.05, batch_size=64, single_output=True, output_activation=T.nnet.softmax, cost_function='nll'):   
        self.n_in = n_in
        self.n_hid = n_hid
        self.n_out = n_out
        self.W_in = init_weight((self.n_in, self.n_hid),'W_in')
        self.W_out = init_weight((self.n_hid, self.n_out),'W_out')
        self.W_rec = init_weight((self.n_hid, self.n_hid),'W_rec', 'svd')
        self.b_hid = shared(np.zeros(shape = n_hid, dtype=dtype))
        self.b_out = shared(np.zeros(shape = n_out, dtype=dtype))

        self.params = [self.W_in,self.W_out,self.W_rec,self.b_out,self.b_hid]

        self.activation = output_activation

        def step(x_t, h_tm1):
            h_t = T.tanh(T.dot(x_t, self.W_in) + T.dot(h_tm1, self.W_rec) + self.b_hid)
            y_t = T.nnet.softmax(T.dot(h_t, self.W_out) + self.b_out)
            return [h_t, y_t]

        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null) 
        if single_output:
            Y = T.matrix() 
        else:
            Y = T.tensor3()
        h0 = shared(np.zeros(shape=(batch_size,self.n_hid), dtype=dtype)) # initial hidden state                 
        lr = shared(np.cast[dtype](lr))
        
        [h_vals, y_vals], _ = theano.scan(fn=step,        
                                          sequences=X.dimshuffle(1,0,2),
                                          outputs_info=[h0, None])

        if single_output:
            self.output = y_vals[-1]            
        else:
            self.output = y_vals.dimshuffle(1,0,2)
        
        cxe = T.mean(T.nnet.binary_crossentropy(self.output, Y))
        nll = -T.mean(Y * T.log(self.output)+ (1.- Y) * T.log(1. - self.output))     
        mse = T.mean((self.output - Y) ** 2)

        cost = 0
        if cost_function == 'mse':
            cost = mse
        elif cost_function == 'cxe':
            cost = cxe
        else:
            cost = nll        

        gparams = T.grad(cost, self.params)
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * lr
        
        self.loss = theano.function(inputs = [X, Y], outputs = cost)
        self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates)
        self.predictions = theano.function(inputs = [X], outputs = self.output)
        self.debug = theano.function(inputs = [X, Y], outputs = [X.shape, Y.shape, y_vals.shape, self.output.shape])
Example #2
0
    def __init__(self, n_in, n_hid, n_out, lr=0.05, batch_size=64):   
        self.n_in = n_in
        self.n_hid = n_hid
        self.n_out = n_out
        self.W_in = init_weight((self.n_in, self.n_hid),'W_in')
        self.W_out = init_weight((self.n_hid, self.n_out),'W_out')
        self.W_rec = init_weight((self.n_hid, self.n_hid),'W_rec', 'svd')
                
        self.params = [self.W_in,self.W_out,self.W_rec]
        
        def step(x_t, h_tm1):
            h_t = T.nnet.sigmoid(T.dot(x_t, self.W_in) + T.dot(h_tm1, self.W_rec))
            y_t = T.nnet.softmax(T.dot(h_t, self.W_out))
            return [h_t, y_t]


        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null) 
        h0 = shared(np.zeros(shape=(batch_size,self.n_hid), dtype=dtype)) # initial hidden state         
        mask = 1. - X.sum(axis = 2)
        lr = shared(np.cast[dtype](lr))
        
        [h_vals, y_vals], _ = theano.scan(fn=step,        
                                          sequences=X.dimshuffle(1,0,2),
                                          outputs_info=[h0, None])

        cxe = T.nnet.categorical_crossentropy(y_vals.dimshuffle(1,0,2), Y)
        cost = (cxe * mask).sum()        
        gparams = T.grad(cost, self.params)
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * lr
        
        self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates)
        self.predictions = theano.function(inputs = [X], outputs = y_vals[-1,0,:])
        self.debug = theano.function(inputs = [X, Y], outputs = [X.shape, Y.shape, y_vals.shape, mask.shape, cxe.shape])
Example #3
0
    def __init__(self,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.05,
                 single_output=True,
                 output_activation=T.nnet.softmax,
                 cost_function='nll'):
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xi = init_weight((self.n_in, self.n_lstm), 'W_xi')
        self.W_hi = init_weight((self.n_lstm, self.n_lstm), 'W_hi', 'svd')
        self.W_ci = init_weight((self.n_lstm, self.n_lstm), 'W_ci', 'svd')
        self.b_i = shared(np.cast[dtype](np.random.uniform(-0.5,
                                                           .5,
                                                           size=n_lstm)))
        self.W_xf = init_weight((self.n_in, self.n_lstm), 'W_xf')
        self.W_hf = init_weight((self.n_lstm, self.n_lstm), 'W_hf', 'svd')
        self.W_cf = init_weight((self.n_lstm, self.n_lstm), 'W_cf', 'svd')
        self.b_f = shared(np.cast[dtype](np.random.uniform(0, 1.,
                                                           size=n_lstm)))
        self.W_xc = init_weight((self.n_in, self.n_lstm), 'W_xc')
        self.W_hc = init_weight((self.n_lstm, self.n_lstm), 'W_hc', 'svd')
        self.b_c = shared(np.zeros(n_lstm, dtype=dtype))
        self.W_xo = init_weight((self.n_in, self.n_lstm), 'W_xo')
        self.W_ho = init_weight((self.n_lstm, self.n_lstm), 'W_ho', 'svd')
        self.W_co = init_weight((self.n_lstm, self.n_lstm), 'W_co', 'svd')
        self.b_o = shared(np.cast[dtype](np.random.uniform(-0.5,
                                                           .5,
                                                           size=n_lstm)))
        self.W_hy = init_weight((self.n_lstm, self.n_out), 'W_hy')
        self.b_y = shared(np.zeros(n_out, dtype=dtype))
        self.params = [
            self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf,
            self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_ho,
            self.W_co, self.W_co, self.b_o, self.W_hy, self.b_y
        ]

        def step_lstm(x_t, h_tm1, c_tm1):
            i_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xi) + T.dot(h_tm1, self.W_hi) +
                T.dot(c_tm1, self.W_ci) + self.b_i)
            f_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xf) + T.dot(h_tm1, self.W_hf) +
                T.dot(c_tm1, self.W_cf) + self.b_f)
            c_t = f_t * c_tm1 + i_t * T.tanh(
                T.dot(x_t, self.W_xc) + T.dot(h_tm1, self.W_hc) + self.b_c)
            o_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xo) + T.dot(h_tm1, self.W_ho) +
                T.dot(c_t, self.W_co) + self.b_o)
            h_t = o_t * T.tanh(c_t)
            y_t = T.nnet.softmax(T.dot(h_t, self.W_hy) + self.b_y)
            return [h_t, c_t, y_t]

        X = T.matrix()  # batch of sequence of vector
        Y = T.matrix(
        )  # batch of sequence of vector (should be 0 when X is not null)
        if single_output:
            Y = T.vector()
        h0 = shared(np.zeros(shape=self.n_lstm,
                             dtype=dtype))  # initial hidden state
        c0 = shared(np.zeros(shape=self.n_lstm,
                             dtype=dtype))  # initial hidden state
        lr = shared(np.cast[dtype](lr))

        [h_vals, c_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                                  sequences=X,
                                                  outputs_info=[h0, c0, None])

        if single_output:
            self.output = y_vals[-1]
        else:
            self.output = y_vals

        cxe = T.mean(T.nnet.binary_crossentropy(self.output, Y))
        nll = -T.mean(Y * T.log(self.output) +
                      (1. - Y) * T.log(1. - self.output))
        mse = T.mean((self.output - Y)**2)

        cost = 0
        if cost_function == 'mse':
            cost = mse
        elif cost_function == 'cxe':
            cost = cxe
        else:
            cost = nll

        gparams = T.grad(cost, self.params)
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * lr

        self.loss = theano.function(inputs=[X, Y], outputs=cost)
        self.train = theano.function(inputs=[X, Y],
                                     outputs=cost,
                                     updates=updates)
        self.predictions = theano.function(inputs=[X], outputs=self.output)
        self.debug = theano.function(
            inputs=[X, Y],
            outputs=[X.shape, Y.shape, y_vals.shape, cost.shape])
Example #4
0
    def __init__(self, n_in, n_lstm, n_out, lr=0.05, batch_size=64, single_output=True, output_activation=T.nnet.softmax, cost_function='nll'):        
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xi = init_weight((self.n_in, self.n_lstm),'W_xi') 
        self.W_hi = init_weight((self.n_lstm, self.n_lstm),'W_hi', 'svd') 
        self.W_ci = init_weight((self.n_lstm, self.n_lstm),'W_ci', 'svd') 
        self.b_i = shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_lstm)))
        self.W_xf = init_weight((self.n_in, self.n_lstm),'W_xf') 
        self.W_hf = init_weight((self.n_lstm, self.n_lstm),'W_hf', 'svd') 
        self.W_cf = init_weight((self.n_lstm, self.n_lstm),'W_cf', 'svd') 
        self.b_f = shared(np.cast[dtype](np.random.uniform(0, 1.,size = n_lstm)))
        self.W_xc = init_weight((self.n_in, self.n_lstm),'W_xc') 
        self.W_hc = init_weight((self.n_lstm, self.n_lstm),'W_hc', 'svd') 
        self.b_c = shared(np.zeros(n_lstm, dtype=dtype))
        self.W_xo = init_weight((self.n_in, self.n_lstm),'W_xo') 
        self.W_ho = init_weight((self.n_lstm, self.n_lstm),'W_ho', 'svd') 
        self.W_co = init_weight((self.n_lstm, self.n_lstm),'W_co', 'svd') 
        self.b_o = shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_lstm)))
        self.W_hy = init_weight((self.n_lstm, self.n_out),'W_hy') 
        self.b_y = shared(np.zeros(n_out, dtype=dtype))
        self.params = [self.W_xi, self.W_hi, self.W_ci, self.b_i, 
                       self.W_xf, self.W_hf, self.W_cf, self.b_f, 
                       self.W_xc, self.W_hc, self.b_c, 
                       self.W_ho, self.W_co, self.W_co, self.b_o, 
                       self.W_hy, self.b_y]
                

        def step_lstm(x_t, h_tm1, c_tm1):
            i_t = T.nnet.sigmoid(T.dot(x_t, self.W_xi) + T.dot(h_tm1, self.W_hi) + T.dot(c_tm1, self.W_ci) + self.b_i)
            f_t = T.nnet.sigmoid(T.dot(x_t, self.W_xf) + T.dot(h_tm1, self.W_hf) + T.dot(c_tm1, self.W_cf) + self.b_f)
            c_t = f_t * c_tm1 + i_t * T.tanh(T.dot(x_t, self.W_xc) + T.dot(h_tm1, self.W_hc) + self.b_c) 
            o_t = T.nnet.sigmoid(T.dot(x_t, self.W_xo)+ T.dot(h_tm1, self.W_ho) + T.dot(c_t, self.W_co)  + self.b_o)
            h_t = o_t * T.tanh(c_t)
            y_t = T.nnet.softmax(T.dot(h_t, self.W_hy) + self.b_y) 
            return [h_t, c_t, y_t]

        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null) 
        h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state         
        c0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state         
        lr = shared(np.cast[dtype](lr))
        
        [h_vals, c_vals, y_vals], _ = theano.scan(fn=step_lstm,        
                                          sequences=X.dimshuffle(1,0,2),
                                          outputs_info=[h0, c0, None])

        if single_output:
            self.output = y_vals[-1]            
        else:
            self.output = y_vals.dimshuffle(1,0,2)
        
        cxe = T.mean(T.nnet.binary_crossentropy(self.output, Y))
        nll = -T.mean(Y * T.log(self.output)+ (1.- Y) * T.log(1. - self.output))     
        mse = T.mean((self.output - Y) ** 2)

        cost = 0
        if cost_function == 'mse':
            cost = mse
        elif cost_function == 'cxe':
            cost = cxe
        else:
            cost = nll 
        
        gparams = T.grad(cost, self.params)
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * lr
        
        self.loss = theano.function(inputs = [X, Y], outputs = [cxe, mse, cost])
        self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates)
        self.predictions = theano.function(inputs = [X], outputs = y_vals.dimshuffle(1,0,2))
        self.debug = theano.function(inputs = [X, Y], outputs = [X.shape, Y.shape, y_vals.shape, cxe.shape])