Esempio n. 1
0
   def __init__(self,rng, n_in, n_lstm, n_out, lr=0.00001, batch_size=64, output_activation=theano.tensor.nnet.relu,cost_function='mse',optimizer = RMSprop):
       # rng = RandomStreams(seed=1234)
       self.n_in = n_in
       self.n_lstm = n_lstm
       self.n_out = n_out
       self.W_xr = init_weight((self.n_in, self.n_lstm),rng=rng,name='W_xi',sample= 'glorot')
       self.W_hr = init_weight((self.n_lstm, self.n_lstm),rng=rng,  name='W_hr', sample='glorot')
       self.b_r  = init_bias(self.n_lstm,rng=rng, sample='zero')
       self.W_xz = init_weight((self.n_in, self.n_lstm), rng=rng, name='W_xz', sample='glorot')
       self.W_hz = init_weight((self.n_lstm, self.n_lstm), rng=rng, name='W_hz', sample='glorot')
       self.b_z = init_bias(self.n_lstm,rng=rng, sample='zero')
       self.W_xh = init_weight((self.n_in, self.n_lstm), rng=rng, name='W_xh', sample='glorot')
       self.W_hh = init_weight((self.n_lstm, self.n_lstm), rng=rng, name='W_hh',sample= 'glorot')
       self.b_h = init_bias(self.n_lstm,rng=rng, sample='zero')
       self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
       self.b_y = init_bias(self.n_out,rng=rng, sample='zero')
       self.one_mat=T.ones((batch_size,n_lstm),dtype=dtype)

       self.params = [self.W_xr, self.W_hr, self.b_r,
                      self.W_xz, self.W_hz, self.b_z,
                      self.W_xh, self.W_hh, self.b_h,
                      self.W_hy, self.b_y]

       def step_lstm(x_t, h_tm1):
           r_t = T.nnet.sigmoid(T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r)
           z_t = T.nnet.sigmoid(T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz)  + self.b_z)
           h_t = T.tanh(T.dot(x_t, self.W_xh) + T.dot((r_t*h_tm1),self.W_hh)  + self.b_h)
           hh_t = z_t * h_t + (1-z_t)*h_tm1
           y_t = T.tanh(T.dot(hh_t, self.W_hy) + self.b_y)
           return [hh_t, y_t]



       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector
       #Y_NaN= T.tensor3() # batch of sequence of vector
       h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

       [h_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=X.dimshuffle(1,0,2),
                                         outputs_info=[h0, None])

       self.output = y_vals.dimshuffle(1,0,2)

       cost=get_err_fn(self,cost_function,Y)

       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )


       self.train = theano.function(inputs=[X, Y],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True)
       self.n_param=n_lstm*n_lstm*3+n_in*n_lstm*3+n_lstm*n_out+n_lstm*3
Esempio n. 2
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        batch_size=params["batch_size"]
        n_output=params['n_output']
        corruption_level=params["corruption_level"]

        X = T.matrix(name="input",dtype=dtype) # batch of sequence of vector
        Y = T.matrix(name="output",dtype=dtype) # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
        bin_noise=rng.binomial(size=(batch_size,n_output/3,1), n=1,p=1 - corruption_level,dtype=theano.config.floatX)
        #bin_noise_3d= T.reshape(T.concatenate((bin_noise, bin_noise,bin_noise),axis=1),(batch_size,n_output/3,3))
        bin_noise_3d= T.concatenate((bin_noise, bin_noise,bin_noise),axis=2)

        noise= rng.normal(size=(batch_size,n_output), std=0.03, avg=0.0,dtype=theano.config.floatX)
        noise_bin=T.reshape(noise,(batch_size,n_output/3,3))*bin_noise_3d
        X_train=T.reshape(noise_bin,(batch_size,n_output))+X

        X_tilde= T.switch(T.neq(is_train, 0), X_train, X)

        W_1_e =u.init_weight(shape=(n_output,1024),rng=rng,name="w_hid",sample="glorot")
        b_1_e=u.init_bias(1024,rng)

        W_2_e =u.init_weight(shape=(1024,2048),rng=rng,name="w_hid",sample="glorot")
        b_2_e=u.init_bias(2048,rng)

        W_2_d = W_2_e.T
        b_2_d=u.init_bias(1024,rng)

        W_1_d = W_1_e.T
        b_1_d=u.init_bias(n_output,rng)

        h_1_e=HiddenLayer(rng,X_tilde,0,0, W=W_1_e,b=b_1_e,activation=nn.relu)
        h_2_e=HiddenLayer(rng,h_1_e.output,0,0, W=W_2_e,b=b_2_e,activation=nn.relu)
        h_2_d=HiddenLayer(rng,h_2_e.output,0,0, W=W_2_d,b=b_2_d,activation=u.do_nothing)
        h_1_d=LogisticRegression(rng,h_2_d.output,0,0, W=W_1_d,b=b_1_d)

        self.output = h_1_d.y_pred

        self.params =h_1_e.params+h_2_e.params
        self.params.append(b_2_d)
        self.params.append(b_1_d)

        cost=get_err_fn(self,cost_function,Y)
        L2_reg=0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param[0] ** 2)+T.sum(param[1] ** 2))

        cost += L2_reg*L2_sqr

        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train], outputs = self.output,allow_input_downcast=True)
        self.mid_layer = theano.function(inputs = [X,is_train], outputs = h_2_e.output,allow_input_downcast=True)
        self.n_param=count_params(self.params)
Esempio n. 3
0
   def __init__(self,rng, params,cost_function='mse',optimizer = RMSprop):
       batch_size=params['batch_size']
       sequence_length=params["seq_length"]

       lr=params['lr']
       self.n_in = params['n_output']
       self.n_lstm = params['n_hidden']
       self.n_out = params['n_output']

       self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
       self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

       layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)
       layer2=LSTMLayer(rng,1,self.n_lstm,self.n_lstm)
       layer3=LSTMLayer(rng,2,self.n_lstm,self.n_lstm)

       self.params = layer1.params+layer2.params+layer3.params
       self.params.append(self.W_hy)
       self.params.append(self.b_y)

       def step_lstm(x_t,mask,h_tm1_1,c_tm1_1,h_tm1_2,c_tm1_2,h_tm1_3,c_tm1_3):
           [h_t_1,c_t_1,y_t_1]=layer1.run(x_t,h_tm1_1,c_tm1_1)
           dl1=DropoutLayer(rng,input=y_t_1,prob=0.5,is_train=is_train,mask=mask)
           [h_t_2,c_t_2,y_t_2]=layer2.run(dl1.output,h_tm1_2,c_tm1_2)
           [h_t_3,c_t_3,y_t_3]=layer3.run(y_t_2,h_tm1_3,c_tm1_3)
           y = T.dot(y_t_3, self.W_hy) + self.b_y
           return [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y]

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector
       is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
       h0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state
       h0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state
       h0_3 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_3 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state

       mask_shape=(sequence_length,batch_size,self.n_lstm)
       p_1=0.5
       mask= rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype)

       [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[X.dimshuffle(1,0,2),mask],
                                         outputs_info=[h0_1, c0_1,h0_2, c0_2, h0_3, c0_3, None])

       self.output = y_vals.dimshuffle(1,0,2)
       cost=get_err_fn(self,cost_function,Y)

       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       self.train = theano.function(inputs=[X, Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X,is_train], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True)
       self.n_param=count_params(self.params)
Esempio n. 4
0
   def __init__(self,rng, params,cost_function='mse',optimizer = RMSprop):
       batch_size=params['batch_size']
       sequence_length=params["seq_length"]

       lr=params['lr']
       self.n_in = 1024
       self.n_lstm = params['n_hidden']
       self.n_out = params['n_output']

       self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
       self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

       layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)
       layer2=LSTMLayer(rng,1,self.n_lstm,self.n_lstm)
       layer3=LSTMLayer(rng,2,self.n_lstm,self.n_lstm)

       self.params = layer1.params+layer2.params+layer3.params
       self.params.append(self.W_hy)
       self.params.append(self.b_y)

       def step_lstm(x_t,mask,h_tm1_1,c_tm1_1,h_tm1_2,c_tm1_2,h_tm1_3,c_tm1_3):
           [h_t_1,c_t_1,y_t_1]=layer1.run(x_t,h_tm1_1,c_tm1_1)
           dl1=DropoutLayer(rng,input=y_t_1,prob=0.5,is_train=is_train,mask=mask)
           [h_t_2,c_t_2,y_t_2]=layer2.run(dl1.output,h_tm1_2,c_tm1_2)
           [h_t_3,c_t_3,y_t_3]=layer3.run(y_t_2,h_tm1_3,c_tm1_3)
           y = T.dot(y_t_3, self.W_hy) + self.b_y
           return [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y]

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector
       is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
       h0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state
       h0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state
       h0_3 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_3 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state

       mask_shape=(sequence_length,batch_size,self.n_lstm)
       p_1=0.5
       mask= rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype)

       [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[X.dimshuffle(1,0,2),mask],
                                         outputs_info=[h0_1, c0_1,h0_2, c0_2, h0_3, c0_3, None])

       self.output = y_vals.dimshuffle(1,0,2)
       cost=get_err_fn(self,cost_function,Y)

       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       self.train = theano.function(inputs=[X, Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X,is_train], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True)
       self.n_param=count_params(self.params)
Esempio n. 5
0
 def __init__(self,  rng,input, n_in, n_out,W=None,b=None):
     shape=(n_in, n_out)
     if(W ==None):
         W = u.init_weight(shape=shape,rng=rng,name='W_xreg',sample='glorot')
         b=u.init_bias(n_out,rng=rng)
     self.W = W
     self.b = b
     self.y_pred = T.dot(input, self.W) + self.b
     self.params = [self.W, self.b]
     self.input = input
Esempio n. 6
0
   def __init__(self,rng, params,cost_function='mse',optimizer = RMSprop):
       batch_size=params['batch_size']
       sequence_length=params["seq_length"]

       lr=params['lr']
       self.n_in = 48
       self.n_lstm = params['n_hidden']
       self.n_out = params['n_output']

       self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
       self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

       layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)

       self.params = layer1.params
       self.params.append(self.W_hy)
       self.params.append(self.b_y)

       def step_lstm(x_t,h_tm1_1,c_tm1_1):
           [h_t_1,c_t_1,y_t_1]=layer1.run(x_t,h_tm1_1,c_tm1_1)
           y = T.dot(y_t_1, self.W_hy) + self.b_y
           return [h_t_1,c_t_1,y]

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector
       is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
       H = T.matrix(name="H",dtype=dtype) # initial hidden state
       C = T.matrix(name="C",dtype=dtype) # initial hidden state

       noise= rng.normal(size=(batch_size,sequence_length,self.n_in), std=0.0002, avg=0.0,dtype=theano.config.floatX)
       X_train=noise+X

       X_tilde= T.switch(T.neq(is_train, 0), X_train, X)

       # h0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       # c0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state


       [h_t_1,c_t_1,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[X_tilde.dimshuffle(1,0,2)],
                                         outputs_info=[H, C, None])

       self.output = y_vals.dimshuffle(1,0,2)
       cost=get_err_fn(self,cost_function,Y)

       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t_1[-1],c_t_1[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t_1[-1],c_t_1[-1]],allow_input_downcast=True)
       self.n_param=count_params(self.params)
Esempio n. 7
0
    def __init__(self, rng, input, n_in, n_out,W=None,b=None,activation=T.tanh):
        self.input = input
        shape=[n_in,n_out]
        if(W ==None):
            W =u.init_weight(shape=shape,rng=rng,name="w_hid",sample="glorot")
            b=u.init_bias(n_out,rng)
        self.W = W
        self.b = b

        lin_output = T.dot(input, self.W) + self.b
        self.output = activation(lin_output)
        # parameters of the model
        self.params = [self.W, self.b]
Esempio n. 8
0
    def __init__(self, rng, input,filter_shape,input_shape,border_mode,subsample, activation=nn.relu,W=None,b=None,only_conv=0):
        # e.g. input_shape= (samples, channels, rows, cols)
        #    assert border_mode in {'same', 'valid'}

        self.input = input
        nb_filter=filter_shape[0]

        # W,b=None,None
        if(W ==None):
            W =u.init_weight(filter_shape,rng=rng, name="w_conv", sample='glorot')
            b=u.init_bias(nb_filter,rng=rng)
        self.W = W
        self.b = b

        b_mode=border_mode
        if(border_mode=='same'):
            b_mode='half'

        #image_shape: (batch size, num input feature maps,image height, image width)
        conv_out = conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            input_shape=input_shape,
            border_mode=b_mode,subsample=subsample

        )

        if border_mode == 'same':
            if filter_shape[2] % 2 == 0:
                conv_out = conv_out[:, :, :(input.shape[2] + subsample[0] - 1) // subsample[0], :]
            if filter_shape[3] % 2 == 0:
                conv_out = conv_out[:, :, :, :(input.shape[3] + subsample[1] - 1) // subsample[1]]

        if(only_conv==0):
            output = conv_out + b.dimshuffle('x', 0, 'x', 'x')
            self.output = activation(output, 0)
        else:
            self.output = conv_out


        # parameters of the model
        self.params = [self.W, self.b]

        rows = input_shape[2]
        cols = input_shape[3]

        rows = u.conv_output_length(rows, filter_shape[2],border_mode, subsample[0])
        cols = u.conv_output_length(cols, filter_shape[3], border_mode, subsample[1])

        self.output_shape=(input_shape[0], nb_filter, rows, cols)
Esempio n. 9
0
    def __init__(self, rng, layer_id, n_in, n_lstm):
       layer_id=str(layer_id)
       self.n_in = n_in
       self.n_lstm = n_lstm
       self.W_xi = init_weight((self.n_in, self.n_lstm),rng=rng,name='W_xi_'+layer_id,sample= 'glorot')
       self.W_hi = init_weight((self.n_lstm, self.n_lstm),rng=rng,name='W_hi_'+layer_id, sample='glorot')
       self.W_ci = init_weight((self.n_lstm, self.n_lstm),rng=rng,name='W_ci_'+layer_id,sample= 'glorot')
       self.b_i  = init_bias(self.n_lstm,rng=rng, sample='zero',name='b_i_'+layer_id)
       self.W_xf = init_weight((self.n_in, self.n_lstm),rng=rng,name='W_xf_'+layer_id,sample= 'glorot')
       self.W_hf = init_weight((self.n_lstm, self.n_lstm),rng=rng,name='W_hf_'+layer_id,sample= 'glorot')
       self.W_cf = init_weight((self.n_lstm, self.n_lstm),rng=rng,name='W_cf_'+layer_id, sample='glorot')
       self.b_f = init_bias(self.n_lstm, rng=rng,sample='one',name='b_f_'+layer_id)
       self.W_xc = init_weight((self.n_in, self.n_lstm),rng=rng,name='W_xc_'+layer_id, sample='glorot')
       self.W_hc = init_weight((self.n_lstm, self.n_lstm),rng=rng,name='W_hc_'+layer_id, sample='ortho')
       self.b_c = init_bias(self.n_lstm, rng=rng,sample='zero',name='b_c_'+layer_id)
       self.W_xo = init_weight((self.n_in, self.n_lstm),rng=rng,name='W_xo_'+layer_id,sample= 'glorot')
       self.W_ho = init_weight((self.n_lstm, self.n_lstm),rng=rng,name='W_ho_'+layer_id, sample='glorot')
       self.W_co = init_weight((self.n_lstm, self.n_lstm),rng=rng,name='W_co_'+layer_id,sample= 'glorot')
       self.b_o = init_bias(self.n_lstm,rng=rng, sample='zero',name='b_o_'+layer_id)

       self.params = [self.W_xi, self.W_hi, self.W_ci, self.b_i,
                      self.W_xf, self.W_hf, self.W_cf, self.b_f,
                      self.W_xc, self.W_hc, self.b_c,  self.W_xo,
                      self.W_ho, self.W_co, self.b_o]
Esempio n. 10
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        n_lstm=params['n_hidden']
        n_out=params['n_output']
        batch_size=params["batch_size"]
        sequence_length=params["seq_length"]

        # minibatch)
        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample=(1,1)
        p_1=0.5
        border_mode="valid"
        cnn_batch_size=batch_size*sequence_length
        pool_size=(2,2)

        #Layer1: conv2+pool+drop
        filter_shape=(64,1,9,9)
        input_shape=(cnn_batch_size,1,120,60) #input_shape= (samples, channels, rows, cols)
        input= X.reshape(input_shape)
        c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu)
        p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape)
        dl1=DropoutLayer(rng,input=p1.output,prob=p_1)
        retain_prob = 1. - p_1
        test_output = p1.output*retain_prob
        d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output)

        #Layer2: conv2+pool
        filter_shape=(128,p1.output_shape[1],3,3)
        c2=ConvLayer(rng, d1_output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu)
        p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape)


        #Layer3: conv2+pool
        filter_shape=(128,p2.output_shape[1],3,3)
        c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu)
        p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape)

        #Layer4: hidden
        n_in= reduce(lambda x, y: x*y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu)
        n_in=1024
        rnn_input = h1.output.reshape((batch_size,sequence_length, n_in))


        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xr = init_weight((self.n_in, self.n_lstm),rng=rng,name='W_xi',sample= 'glorot')
        self.W_hr = init_weight((self.n_lstm, self.n_lstm),rng=rng,  name='W_hr', sample='glorot')
        self.b_r  = init_bias(self.n_lstm,rng=rng, sample='zero')
        self.W_xz = init_weight((self.n_in, self.n_lstm), rng=rng, name='W_xz', sample='glorot')
        self.W_hz = init_weight((self.n_lstm, self.n_lstm), rng=rng, name='W_hz', sample='glorot')
        self.b_z = init_bias(self.n_lstm,rng=rng, sample='zero')
        self.W_xh = init_weight((self.n_in, self.n_lstm), rng=rng, name='W_xh', sample='glorot')
        self.W_hh = init_weight((self.n_lstm, self.n_lstm), rng=rng, name='W_hh',sample= 'glorot')
        self.b_h = init_bias(self.n_lstm,rng=rng, sample='zero')
        self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
        self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

        self.params = [self.W_xr, self.W_hr, self.b_r,
                      self.W_xz, self.W_hz, self.b_z,
                      self.W_xh, self.W_hh, self.b_h,
                      self.W_hy, self.b_y]

        def step_lstm(x_t, h_tm1):
           r_t = T.nnet.sigmoid(T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r)
           z_t = T.nnet.sigmoid(T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz)  + self.b_z)
           h_t = T.tanh(T.dot(x_t, self.W_xh) + T.dot((r_t*h_tm1),self.W_hh)  + self.b_h)
           hh_t = z_t * h_t + (1-z_t)*h_tm1
           y_t = T.dot(hh_t, self.W_hy) + self.b_y
           return [hh_t, y_t]


        h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=rnn_input.dimshuffle(1,0,2),
                                         outputs_info=[h0, None])

        self.output = y_vals.dimshuffle(1,0,2)

        self.params =c1.params+c2.params+c3.params+h1.params+self.params

        cost=get_err_fn(self,cost_function,Y)
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train], outputs = self.output,allow_input_downcast=True)
        self.n_param=count_params(self.params)
Esempio n. 11
0
   def __init__(self, n_in, n_lstm, n_out, lr=0.05, batch_size=64, single_output=True, output_activation=theano.tensor.tanh,cost_function='nll',optimizer = RMSprop):

       self.n_in = n_in
       self.n_lstm = n_lstm
       self.n_out = n_out
       self.n_fc1=512
       self.n_fc2=256
       self.n_prefc1=128
       self.n_prefc2=128


       self.W_prefc1 = init_weight((self.n_in, self.n_prefc1),'W_prefc1', 'glorot')
       self.b_prefc1 = init_bias(self.n_prefc1, sample='zero')
       self.W_prefc2 = init_weight((self.n_prefc1, self.n_prefc2),'W_prefc2', 'glorot')
       self.b_prefc2 =init_bias(self.n_prefc2, sample='zero')

       self.W_fc1 = init_weight((self.n_fc1, self.n_fc2),'W_fc1', 'glorot')
       self.b_fc1 = init_bias(self.n_fc2, sample='zero')
       self.W_fc2 = init_weight((self.n_fc2, self.n_out),'W_fc2', 'glorot')
       self.b_fc2 =init_bias(self.n_out, sample='zero')

       self.W_xi = init_weight((self.n_prefc2, self.n_lstm),'W_xi', 'glorot')
       self.W_hi = init_weight((self.n_lstm, self.n_lstm),'W_hi', 'glorot')
       self.W_ci = init_weight((self.n_lstm, self.n_lstm),'W_ci', 'glorot')
       self.b_i = init_bias(self.n_lstm, sample='zero')
       self.W_xf = init_weight((self.n_prefc2, self.n_lstm),'W_xf', 'glorot')
       self.W_hf = init_weight((self.n_lstm, self.n_lstm),'W_hf', 'glorot')
       self.W_cf = init_weight((self.n_lstm, self.n_lstm),'W_cf', 'glorot')
       self.b_f =init_bias(self.n_lstm, sample='zero')
       self.W_xc = init_weight((self.n_prefc2, self.n_lstm),'W_xc', 'glorot')
       self.W_hc = init_weight((self.n_lstm, self.n_lstm),'W_hc', 'glorot')
       self.b_c = shared(np.zeros(n_lstm, dtype=dtype))
       self.W_xo = init_weight((self.n_prefc2, self.n_lstm),'W_xo', 'glorot')
       self.W_ho = init_weight((self.n_lstm, self.n_lstm),'W_ho', 'glorot')
       self.W_co = init_weight((self.n_lstm, self.n_lstm),'W_co', 'glorot')
       self.b_o = init_bias(self.n_lstm, sample='zero')
       self.W_hy = init_weight((self.n_lstm, self.n_fc1),'W_hy', 'glorot')
       self.b_y = init_bias(self.n_fc1, sample='zero')

       self.params = [self.W_xi, self.W_hi, self.W_ci, self.b_i,
                      self.W_xf, self.W_hf, self.W_cf, self.b_f,
                      self.W_xc, self.W_hc, self.b_c,self.W_xo,
                      self.W_ho, self.W_co, self.b_o,
                      self.W_hy, self.b_y,self.W_fc1, self.b_fc1,self.W_fc2, self.b_fc2,
                      self.W_prefc1, self.b_prefc1,self.W_prefc2, self.b_prefc2]

       sigma = lambda x: 1 / (1 + T.exp(-x))

       def step_lstm(x_t, h_tm1, c_tm1):
           i_t = T.nnet.sigmoid(T.dot(x_t, self.W_xi) + T.dot(h_tm1, self.W_hi) + T.dot(c_tm1, self.W_ci) + self.b_i)
           f_t = T.nnet.sigmoid(T.dot(x_t, self.W_xf) + T.dot(h_tm1, self.W_hf) + T.dot(c_tm1, self.W_cf) + self.b_f)
           c_t = f_t * c_tm1 + i_t * T.tanh(T.dot(x_t, self.W_xc) + T.dot(h_tm1, self.W_hc) + self.b_c)
           o_t = T.nnet.sigmoid(T.dot(x_t, self.W_xo)+ T.dot(h_tm1, self.W_ho) + T.dot(c_t, self.W_co)  + self.b_o)
           h_t = o_t * T.tanh(c_t)
           y_t = T.tanh(T.dot(h_t, self.W_hy) + self.b_y)
           return [h_t, c_t, y_t]

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null)
       h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

       #Hidden layer
       prefc1_out = T.tanh(T.dot(X.dimshuffle(1,0,2), self.W_prefc1)  + self.b_prefc1)
       prefc2_out = T.tanh(T.dot(prefc1_out, self.W_prefc2)  + self.b_prefc2)

       [h_vals, c_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=prefc2_out,
                                         outputs_info=[h0, c0, None])


       #Hidden layer
       fc1_out = T.tanh(T.dot(y_vals, self.W_fc1)  + self.b_fc1)
       fc2_out = T.tanh(T.dot(fc1_out, self.W_fc2)  + self.b_fc2)

       self.output=fc2_out.dimshuffle(1,0,2)

       cost=get_err_fn(self,cost_function,Y)
       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       self.train = theano.function(inputs=[X, Y],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X], outputs = self.output,allow_input_downcast=True)
       self.n_param=n_lstm*n_lstm*4+n_in*n_lstm*4+n_lstm*n_out+n_lstm*3
Esempio n. 12
0
   def __init__(self, n_in, n_lstm, n_out, lr=0.05, batch_size=64, single_output=True, output_activation=theano.tensor.nnet.relu,cost_function='mse',optimizer = RMSprop):

       self.n_in = n_in
       self.n_lstm = n_lstm
       self.n_out = n_out
       self.n_fc1=256
       self.n_fc2=256
       self.n_fc3=256


       self.W_fc1 = init_weight((self.n_fc1, self.n_fc2),'W_fc1', 'glorot')
       self.b_fc1 = init_bias(self.n_fc2, sample='zero')

       self.W_fc2 = init_weight((self.n_fc2, self.n_fc3),'W_fc2', 'glorot')
       self.b_fc2 =init_bias(self.n_fc3, sample='zero')

       self.W_fc3 = init_weight((self.n_fc3, self.n_out),'w_fc3', 'glorot')
       self.b_fc3 =init_bias(self.n_out, sample='zero')

       self.W_xr = init_weight((self.n_in, self.n_lstm), 'W_xr', 'glorot')
       self.W_hr = init_weight((self.n_lstm, self.n_lstm), 'W_hr', 'ortho')
       self.b_r  = init_bias(self.n_lstm, sample='zero')
       self.W_xz = init_weight((self.n_in, self.n_lstm), 'W_xz', 'glorot')
       self.W_hz = init_weight((self.n_lstm, self.n_lstm), 'W_hz', 'ortho')
       self.b_z = init_bias(self.n_lstm, sample='zero')
       self.W_xh = init_weight((self.n_in, self.n_lstm), 'W_xh', 'glorot')
       self.W_hh = init_weight((self.n_lstm, self.n_lstm), 'W_hh', 'ortho')
       self.b_h = init_bias(self.n_lstm, sample='zero')
       self.W_hy = init_weight((self.n_lstm, self.n_fc1),'W_hy', 'glorot')
       self.b_y = init_bias(self.n_fc1, sample='zero')

       self.params = [self.W_xr, self.W_hr, self.b_r,
                      self.W_xz, self.W_hz, self.b_z,
                      self.W_xh, self.W_hh, self.b_h,
                      self.W_hy, self.b_y,self.W_fc1, self.b_fc1,self.W_fc2, self.b_fc2,self.W_fc3, self.b_fc3]

       def step_lstm(x_t, h_tm1):
           r_t = T.nnet.sigmoid(T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r)
           z_t = T.nnet.sigmoid(T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz)  + self.b_z)
           h_t = T.tanh(T.dot(x_t, self.W_xh) + T.dot((r_t*h_tm1),self.W_hh)  + self.b_h)
           hh_t = z_t * h_t + (1-z_t)*h_tm1
           y_t = T.tanh(T.dot(hh_t, self.W_hy) + self.b_y)
           return [hh_t, y_t]



       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector
       #Y_NaN= T.tensor3() # batch of sequence of vector
       h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

       [h_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=X.dimshuffle(1,0,2),
                                         outputs_info=[h0, None])


       #Hidden layer
       fc1_out = T.tanh(T.dot(y_vals, self.W_fc1)  + self.b_fc1)
       fc2_out = T.tanh(T.dot(fc1_out, self.W_fc2)  + self.b_fc2)
       fc3_out = T.tanh(T.dot(fc2_out, self.W_fc3)  + self.b_fc3)

       self.output=fc3_out.dimshuffle(1,0,2)

       cost=get_err_fn(self,cost_function,Y)
       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       self.train = theano.function(inputs=[X, Y],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X], outputs = self.output,allow_input_downcast=True)
       self.n_param=n_lstm*n_lstm*4+n_in*n_lstm*4+n_lstm*n_out+n_lstm*3
Esempio n. 13
0
    def __init__(self,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.00001,
                 batch_size=64,
                 output_activation=theano.tensor.nnet.relu,
                 cost_function='mse',
                 optimizer=RMSprop):
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xr_1 = init_weight((self.n_in, self.n_lstm), 'W_xr_1', 'glorot')
        self.W_hr_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hr_1',
                                  'ortho')
        self.b_r_1 = init_bias(self.n_lstm, sample='zero')
        self.W_xz_1 = init_weight((self.n_in, self.n_lstm), 'W_xz_1', 'glorot')
        self.W_hz_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hz_1',
                                  'ortho')
        self.b_z_1 = init_bias(self.n_lstm, sample='zero')
        self.W_xh_1 = init_weight((self.n_in, self.n_lstm), 'W_xh_1', 'glorot')
        self.W_hh_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hh_1',
                                  'ortho')
        self.b_h_1 = init_bias(self.n_lstm, sample='zero')
        # self.W_hy_1 = init_weight((self.n_lstm, self.n_out),'W_hy_1', 'glorot')
        # self.b_y_1 = init_bias(self.n_out, sample='zero')

        self.W_xr_2 = init_weight((self.n_in, self.n_lstm), 'W_xr', 'glorot')
        self.W_hr_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hr', 'ortho')
        self.b_r_2 = init_bias(self.n_lstm, sample='zero')
        self.W_xz_2 = init_weight((self.n_in, self.n_lstm), 'W_xz', 'glorot')
        self.W_hz_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hz', 'ortho')
        self.b_z_2 = init_bias(self.n_lstm, sample='zero')
        self.W_xh_2 = init_weight((self.n_in, self.n_lstm), 'W_xh', 'glorot')
        self.W_hh_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hh', 'ortho')
        self.b_h_2 = init_bias(self.n_lstm, sample='zero')
        self.W_hy_2 = init_weight((self.n_lstm, self.n_out), 'W_hy', 'glorot')
        self.b_y_2 = init_bias(self.n_out, sample='zero')

        self.params = [
            self.W_xr_1, self.W_hr_1, self.b_r_1, self.W_xz_1, self.W_hz_1,
            self.b_z_1, self.W_xh_1, self.W_hh_1, self.b_h_1, self.W_xr_2,
            self.W_hr_2, self.b_r_2, self.W_xz_2, self.W_hz_2, self.b_z_2,
            self.W_xh_2, self.W_hh_2, self.b_h_2, self.W_hy_f, self.W_hy_b,
            self.b_y
        ]

        def f_step_lstm(x_t, h_tm1_1):
            r_t_1 = T.nnet.sigmoid(
                T.dot(x_t, self.W_xr_1) + T.dot(h_tm1_1, self.W_hr_1) +
                self.b_r_1)
            z_t_1 = T.nnet.sigmoid(
                T.dot(x_t, self.W_xz_1) + T.dot(h_tm1_1, self.W_hz_1) +
                self.b_z_1)
            h_t_1 = T.tanh(
                T.dot(x_t, self.W_xh_1) +
                T.dot((r_t_1 * h_tm1_1), self.W_hh_1) + self.b_h_1)
            hh_t_1 = z_t_1 * h_t_1 + (1 - z_t_1) * h_tm1_1
            return [hh_t_1]

        def b_step_lstm(x_t, h_tm1_2):
            r_t_2 = T.nnet.sigmoid(
                T.dot(x_t, self.W_xr_2) + T.dot(h_tm1_2, self.W_hr_2) +
                self.b_r_2)
            z_t_2 = T.nnet.sigmoid(
                T.dot(x_t, self.W_xz_2) + T.dot(h_tm1_2, self.W_hz_2) +
                self.b_z_2)
            h_t_2 = T.tanh(
                T.dot(x_t, self.W_xh_2) +
                T.dot((r_t_2 * h_tm1_2), self.W_hh_2) + self.b_h_2)
            hh_t_2 = z_t_2 * h_t_2 + (1 - z_t_2) * h_tm1_2
            return [hh_t_2]

        X_f = T.tensor3()  # batch of sequence of vector
        X_b = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3(
        )  # batch of sequence of vector (should be 0 when X is not null)
        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state
        c0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        [h_f, c_vals], _ = theano.scan(fn=f_step_lstm,
                                       sequences=X_f.dimshuffle(1, 0, 2),
                                       outputs_info=[h0, c0])

        [h_b, c_vals], _ = theano.scan(fn=b_step_lstm,
                                       sequences=X_b.dimshuffle(1, 0, 2),
                                       outputs_info=[h0, c0])
        h_b = h_b[:, ::-1]
        y_vals = T.tanh(
            T.dot(h_f, self.W_hy_f) + T.dot(h_b, self.W_hy_b) + self.b_y)

        self.output = y_vals.dimshuffle(1, 0, 2)

        cost = get_err_fn(self, cost_function, Y)

        _optimizer = optimizer(cost, self.params, lr=lr)

        self.train = theano.function(inputs=[X_f, X_b, Y],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X_f, X_b],
                                           outputs=y_vals.dimshuffle(1, 0, 2),
                                           allow_input_downcast=True)
        self.n_param = n_lstm * n_lstm * 3 + n_in * n_lstm * 3 + n_lstm * n_out + n_lstm * 3
Esempio n. 14
0
    def __init__(self,
                 rng,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.00001,
                 batch_size=64,
                 output_activation=theano.tensor.nnet.relu,
                 cost_function='mse',
                 optimizer=RMSprop):
        # rng = RandomStreams(seed=1234)
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xr = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xi',
                                sample='glorot')
        self.W_hr = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hr',
                                sample='glorot')
        self.b_r = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_xz = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xz',
                                sample='glorot')
        self.W_hz = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hz',
                                sample='glorot')
        self.b_z = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_xh = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xh',
                                sample='glorot')
        self.W_hh = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hh',
                                sample='glorot')
        self.b_h = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_hy = init_weight((self.n_lstm, self.n_out),
                                rng=rng,
                                name='W_hy',
                                sample='glorot')
        self.b_y = init_bias(self.n_out, rng=rng, sample='zero')
        self.one_mat = T.ones((batch_size, n_lstm), dtype=dtype)

        self.params = [
            self.W_xr, self.W_hr, self.b_r, self.W_xz, self.W_hz, self.b_z,
            self.W_xh, self.W_hh, self.b_h, self.W_hy, self.b_y
        ]

        def step_lstm(x_t, h_tm1):
            r_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r)
            z_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz) + self.b_z)
            h_t = T.tanh(
                T.dot(x_t, self.W_xh) + T.dot((r_t * h_tm1), self.W_hh) +
                self.b_h)
            hh_t = z_t * h_t + (1 - z_t) * h_tm1
            y_t = T.tanh(T.dot(hh_t, self.W_hy) + self.b_y)
            return [hh_t, y_t]

        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        #Y_NaN= T.tensor3() # batch of sequence of vector
        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        [h_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                          sequences=X.dimshuffle(1, 0, 2),
                                          outputs_info=[h0, None])

        self.output = y_vals.dimshuffle(1, 0, 2)

        cost = get_err_fn(self, cost_function, Y)

        _optimizer = optimizer(cost, self.params, lr=lr)

        self.train = theano.function(inputs=[X, Y],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X],
                                           outputs=y_vals.dimshuffle(1, 0, 2),
                                           allow_input_downcast=True)
        self.n_param = n_lstm * n_lstm * 3 + n_in * n_lstm * 3 + n_lstm * n_out + n_lstm * 3
Esempio n. 15
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        n_lstm = params['n_hidden']
        n_out = params['n_output']
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "valid"
        cnn_batch_size = batch_size * sequence_length
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (64, 1, 9, 9)
        input_shape = (cnn_batch_size, 1, 120, 60
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1)
        retain_prob = 1. - p_1
        test_output = p1.output * retain_prob
        d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output)

        #Layer2: conv2+pool
        filter_shape = (128, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       d1_output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (128, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: hidden
        n_in = reduce(lambda x, y: x * y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)
        n_in = 1024
        rnn_input = h1.output.reshape((batch_size, sequence_length, n_in))

        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xr = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xi',
                                sample='glorot')
        self.W_hr = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hr',
                                sample='glorot')
        self.b_r = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_xz = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xz',
                                sample='glorot')
        self.W_hz = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hz',
                                sample='glorot')
        self.b_z = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_xh = init_weight((self.n_in, self.n_lstm),
                                rng=rng,
                                name='W_xh',
                                sample='glorot')
        self.W_hh = init_weight((self.n_lstm, self.n_lstm),
                                rng=rng,
                                name='W_hh',
                                sample='glorot')
        self.b_h = init_bias(self.n_lstm, rng=rng, sample='zero')
        self.W_hy = init_weight((self.n_lstm, self.n_out),
                                rng=rng,
                                name='W_hy',
                                sample='glorot')
        self.b_y = init_bias(self.n_out, rng=rng, sample='zero')

        self.params = [
            self.W_xr, self.W_hr, self.b_r, self.W_xz, self.W_hz, self.b_z,
            self.W_xh, self.W_hh, self.b_h, self.W_hy, self.b_y
        ]

        def step_lstm(x_t, h_tm1):
            r_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r)
            z_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz) + self.b_z)
            h_t = T.tanh(
                T.dot(x_t, self.W_xh) + T.dot((r_t * h_tm1), self.W_hh) +
                self.b_h)
            hh_t = z_t * h_t + (1 - z_t) * h_tm1
            y_t = T.dot(hh_t, self.W_hy) + self.b_y
            return [hh_t, y_t]

        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_vals,
         y_vals], _ = theano.scan(fn=step_lstm,
                                  sequences=rnn_input.dimshuffle(1, 0, 2),
                                  outputs_info=[h0, None])

        self.output = y_vals.dimshuffle(1, 0, 2)

        self.params = c1.params + c2.params + c3.params + h1.params + self.params

        cost = get_err_fn(self, cost_function, Y)
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
Esempio n. 16
0
   def __init__(self, n_in, n_lstm, n_out, lr=0.05, batch_size=64, output_activation=theano.tensor.nnet.relu,cost_function='mse',optimizer = RMSprop):
       self.n_in = n_in
       self.n_lstm = n_lstm
       self.n_out = n_out
       self.W_xi = init_weight((self.n_in, self.n_lstm),'W_xi', 'glorot')
       self.W_hi = init_weight((self.n_lstm, self.n_lstm),'W_hi', 'ortho')
       self.b_i  = init_bias(self.n_lstm, sample='zero')
       self.W_xf = init_weight((self.n_in, self.n_lstm),'W_xf', 'glorot')
       self.W_hf = init_weight((self.n_lstm, self.n_lstm),'W_hf', 'ortho')
       self.b_f = init_bias(self.n_lstm, sample='one')
       self.W_xc = init_weight((self.n_in, self.n_lstm),'W_xc', 'glorot')
       self.W_hc = init_weight((self.n_lstm, self.n_lstm),'W_hc', 'ortho')
       self.b_c = init_bias(self.n_lstm, sample='zero')
       self.W_xo = init_weight((self.n_in, self.n_lstm),'W_xo', 'glorot')
       self.W_ho = init_weight((self.n_lstm, self.n_lstm),'W_ho', 'ortho')
       self.b_o = init_bias(self.n_lstm, sample='zero')
       self.W_hy = init_weight((self.n_lstm, self.n_out),'W_hy', 'glorot')
       self.b_y = init_bias(self.n_out, sample='zero')

       self.params = [self.W_xi, self.W_hi, self.b_i,
                      self.W_xf, self.W_hf,  self.b_f,
                      self.W_xc, self.W_hc, self.b_c,  self.W_xo,
                      self.W_ho, self.b_o,
                      self.W_hy, self.b_y]

       def step_lstm(x_t, h_tm1, c_tm1):
           i_t = T.nnet.sigmoid(T.dot(x_t, self.W_xi) + T.dot(h_tm1, self.W_hi) + self.b_i)
           f_t = T.nnet.sigmoid(T.dot(x_t, self.W_xf) + T.dot(h_tm1, self.W_hf) + self.b_f)
           c_t = f_t * c_tm1 + i_t * T.tanh(T.dot(x_t, self.W_xc) + T.dot(h_tm1, self.W_hc) + self.b_c)
           o_t = T.nnet.sigmoid(T.dot(x_t, self.W_xo)+ T.dot(h_tm1, self.W_ho) + self.b_o)
           h_t = o_t * T.tanh(c_t)
           y_t = T.tanh(T.dot(h_t, self.W_hy) + self.b_y)
           return [h_t, c_t, y_t]

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null)
       h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

       [h_vals, c_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=X.dimshuffle(1,0,2),
                                         outputs_info=[h0, c0, None])

       self.output = y_vals.dimshuffle(1,0,2)
       cxe = T.mean(T.nnet.binary_crossentropy(self.output, Y))
       nll = -T.mean(Y * T.log(self.output)+ (1.- Y) * T.log(1. - self.output))
       mse = T.mean((self.output - Y) ** 2)

       cost = 0
       if cost_function == 'mse':
           cost = mse
       elif cost_function == 'cxe':
           cost = cxe
       else:
           cost = nll
       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )
       def reset():
           h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype))
           c0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype))


       self.train = theano.function(inputs=[X, Y],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True)
       self.n_param=n_lstm*n_lstm*4+n_in*n_lstm*4+n_lstm*n_out+n_lstm*3
Esempio n. 17
0
    def __init__(self,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.05,
                 batch_size=64,
                 output_activation=theano.tensor.nnet.relu,
                 cost_function='mse',
                 optimizer=RMSprop):
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out

        self.n_fc1 = 256
        self.n_fc2 = 256
        self.n_fc3 = 256

        self.W_fc1 = init_weight((self.n_fc1, self.n_fc2), 'W_fc1', 'glorot')
        self.b_fc1 = init_bias(self.n_fc2, sample='zero')

        self.W_fc2 = init_weight((self.n_fc2, self.n_fc3), 'W_fc2', 'glorot')
        self.b_fc2 = init_bias(self.n_fc3, sample='zero')

        self.W_fc3 = init_weight((self.n_fc3, self.n_out), 'w_fc3', 'glorot')
        self.b_fc3 = init_bias(self.n_out, sample='zero')

        #1th layer
        self.W_xi_1 = init_weight((self.n_in, self.n_lstm), 'W_xi_1', 'glorot')
        self.W_hi_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hi_1',
                                  'glorot')
        self.W_ci_1 = init_weight((self.n_lstm, self.n_lstm), 'W_ci_1',
                                  'glorot')
        self.b_i_1 = init_bias(self.n_lstm, sample='zero')
        self.W_xf_1 = init_weight((self.n_in, self.n_lstm), 'W_xf_1', 'glorot')
        self.W_hf_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hf_1',
                                  'glorot')
        self.W_cf_1 = init_weight((self.n_lstm, self.n_lstm), 'W_cf_1',
                                  'glorot')
        self.b_f_1 = init_bias(self.n_lstm, sample='one')
        self.W_xc_1 = init_weight((self.n_in, self.n_lstm), 'W_xc_1', 'glorot')
        self.W_hc_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hc_1',
                                  'glorot')
        self.b_c_1 = init_bias(self.n_lstm, sample='zero')
        self.W_xo_1 = init_weight((self.n_in, self.n_lstm), 'W_xo_1', 'glorot')
        self.W_ho_1 = init_weight((self.n_lstm, self.n_lstm), 'W_ho_1',
                                  'glorot')
        self.W_co_1 = init_weight((self.n_lstm, self.n_lstm), 'W_co_1',
                                  'glorot')
        self.b_o_1 = init_bias(self.n_lstm, sample='zero')
        #self.W_hy_1 = init_weight((self.n_lstm, self.n_out), 'W_hy_1')
        #self.b_y_1 = init_bias(self.n_lstm, sample='zero')

        #2th layer
        self.W_xi_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xi_2',
                                  'glorot')
        self.W_hi_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hi_2',
                                  'glorot')
        self.W_ci_2 = init_weight((self.n_lstm, self.n_lstm), 'W_ci_2',
                                  'glorot')
        self.b_i_2 = init_bias(self.n_lstm, sample='zero')
        self.W_xf_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xf_2',
                                  'glorot')
        self.W_hf_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hf_2',
                                  'glorot')
        self.W_cf_2 = init_weight((self.n_lstm, self.n_lstm), 'W_cf_2',
                                  'glorot')
        self.b_f_2 = init_bias(self.n_lstm, sample='one')
        self.W_xc_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xc_2',
                                  'glorot')
        self.W_hc_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hc_2',
                                  'glorot')
        self.b_c_2 = init_bias(self.n_lstm, sample='zero')
        self.W_xo_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xo_2',
                                  'glorot')
        self.W_ho_2 = init_weight((self.n_lstm, self.n_lstm), 'W_ho_2',
                                  'glorot')
        self.W_co_2 = init_weight((self.n_lstm, self.n_lstm), 'W_co_2',
                                  'glorot')
        self.b_o_2 = init_bias(self.n_lstm, sample='zero')
        self.W_hy_2 = init_weight((self.n_lstm, self.n_out), 'W_hy_2',
                                  'glorot')
        self.b_y_2 = init_bias(self.n_out, sample='zero')

        self.params = [
            self.W_xi_1,
            self.W_hi_1,
            self.W_ci_1,
            self.b_i_1,
            self.W_xf_1,
            self.W_hf_1,
            self.W_cf_1,
            self.b_f_1,
            self.W_xc_1,
            self.W_hc_1,
            self.b_c_1,
            self.W_xo_1,
            self.W_ho_1,
            self.W_co_1,
            self.b_o_1,  # self.W_hy_1, self.b_y_1,
            self.W_xi_2,
            self.W_hi_2,
            self.W_ci_2,
            self.b_i_2,
            self.W_xf_2,
            self.W_hf_2,
            self.W_cf_2,
            self.b_f_2,
            self.W_xc_2,
            self.W_hc_2,
            self.b_c_2,
            self.W_xo_2,
            self.W_ho_2,
            self.W_co_2,
            self.b_o_2,
            self.W_hy_2,
            self.b_y_2,
            self.W_fc1,
            self.b_fc1,
            self.W_fc2,
            self.b_fc2,
            self.W_fc3,
            self.b_fc3
        ]

        def step_lstm(x_t, h_tm1, c_tm1, h_tm2, c_tm2):
            i_t_1 = T.nnet.sigmoid(
                T.dot(x_t, self.W_xi_1) + T.dot(h_tm1, self.W_hi_1) +
                T.dot(c_tm1, self.W_ci_1) + self.b_i_1)
            f_t_1 = T.nnet.sigmoid(
                T.dot(x_t, self.W_xf_1) + T.dot(h_tm1, self.W_hf_1) +
                T.dot(c_tm1, self.W_cf_1) + self.b_f_1)
            c_t_1 = f_t_1 * c_tm1 + i_t_1 * T.tanh(
                T.dot(x_t, self.W_xc_1) + T.dot(h_tm1, self.W_hc_1) +
                self.b_c_1)
            o_t_1 = T.nnet.sigmoid(
                T.dot(x_t, self.W_xo_1) + T.dot(h_tm1, self.W_ho_1) +
                T.dot(c_t_1, self.W_co_1) + self.b_o_1)
            h_t_1 = o_t_1 * T.tanh(c_t_1)
            #y_t_1 = output_activation(T.dot(h_t_1, self.W_hy_1) + self.b_y_1)

            i_t_2 = T.nnet.sigmoid(
                T.dot(h_t_1, self.W_xi_2) + T.dot(h_tm2, self.W_hi_2) +
                T.dot(c_tm2, self.W_ci_2) + self.b_i_2)
            f_t_2 = T.nnet.sigmoid(
                T.dot(h_t_1, self.W_xf_2) + T.dot(h_tm2, self.W_hf_2) +
                T.dot(c_tm2, self.W_cf_2) + self.b_f_2)
            c_t_2 = f_t_2 * c_tm2 + i_t_2 * T.tanh(
                T.dot(h_t_1, self.W_xc_2) + T.dot(h_tm2, self.W_hc_2) +
                self.b_c_2)
            o_t_2 = T.nnet.sigmoid(
                T.dot(h_t_1, self.W_xo_2) + T.dot(h_tm2, self.W_ho_2) +
                T.dot(c_t_2, self.W_co_2) + self.b_o_2)
            h_t_2 = o_t_2 * T.tanh(c_t_2)
            y_t_2 = T.tanh(T.dot(h_t_2, self.W_hy_2) + self.b_y_2)

            return [h_t_1, c_t_1, h_t_2, c_t_2, y_t_2]

        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3(
        )  # batch of sequence of vector (should be 0 when X is not null)
        h0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        h0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state

        [h_vals_1, c_vals_1, h_vals_2, c_vals_2,
         y_vals], _ = theano.scan(fn=step_lstm,
                                  sequences=X.dimshuffle(1, 0, 2),
                                  outputs_info=[h0_1, c0_1, h0_2, c0_2, None])
        #Hidden layers
        fc1_out = T.tanh(T.dot(y_vals, self.W_fc1) + self.b_fc1)
        fc2_out = T.tanh(T.dot(fc1_out, self.W_fc2) + self.b_fc2)
        fc3_out = T.tanh(T.dot(fc2_out, self.W_fc3) + self.b_fc3)

        self.output = fc3_out.dimshuffle(1, 0, 2)

        cost = get_err_fn(self, cost_function, Y)
        _optimizer = optimizer(cost, self.params, lr=lr)

        self.train = theano.function(inputs=[X, Y],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X],
                                           outputs=y_vals.dimshuffle(1, 0, 2),
                                           allow_input_downcast=True)
        self.n_param = (n_lstm * n_lstm * 4 + n_in * n_lstm * 4 +
                        n_lstm * n_out + n_lstm * 3) * 2
Esempio n. 18
0
   def __init__(self,rng, params,cost_function='mse',optimizer = RMSprop):
       batch_size=params['batch_size']
       sequence_length=params["seq_length"]

       lr=params['lr']
       self.n_in = 48
       self.n_lstm = params['n_hidden']
       self.n_out = params['n_output']
       n_fc=512

       self.W_hy = init_weight((self.n_lstm, n_fc), rng=rng,name='W_hy', sample= 'glorot')
       self.b_y = init_bias(n_fc,rng=rng, sample='zero')

       self.numOfLayers=1
       layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)

       self.params = layer1.params
       self.params.append(self.W_hy)
       self.params.append(self.b_y)

       def step_lstm(x_t,h_tm1_1,c_tm1_1):
           [h_t_1,c_t_1,y_t_1]=layer1.run(x_t,h_tm1_1,c_tm1_1)
           y = T.dot(y_t_1, self.W_hy) + self.b_y
           return [h_t_1,c_t_1,y]

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector
       is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
       H = T.matrix(name="H",dtype=dtype) # initial hidden state
       C = T.matrix(name="C",dtype=dtype) # initial hidden state

       noise= rng.normal(size=(batch_size,sequence_length,self.n_in), std=0.008, avg=0.0,dtype=theano.config.floatX)
       X_train=noise+X

       X_tilde= T.switch(T.neq(is_train, 0), X_train, X)

       [h_t_1,c_t_1,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[X_tilde.dimshuffle(1,0,2)],
                                         outputs_info=[H, C, None])

       # self.output = y_vals.dimshuffle(1,0,2)
       mdn_input=y_vals.dimshuffle(1,0,2)
       mdn_input=T.reshape(mdn_input,(batch_size*sequence_length,n_fc))
       Y_ll=T.reshape(Y,(batch_size*sequence_length,params['n_output']))

       mdn =  MDNoutputLayer(rng=rng,
                                           input=mdn_input,
                                           n_in=n_fc,
                                           n_out=params['n_output'],
                                           mu_activation=do_nothing,
                                         n_components=240)
       self.params=self.params+mdn.params
       # self.params.append(mdn.W_mixing)
       # self.params.append(mdn.W_mu)
       # self.params.append(mdn.W_sigma)

       cost = nll(mu = mdn.mu,
                 sigma = mdn.sigma,
                 mixing = mdn.mixing,
                 y = Y_ll) #+ L2_reg * self.frame_pred.L2_sqr

       L2_reg=0.0001
       L2_sqr = theano.shared(0.)
       for param in self.params:
           L2_sqr += (T.sum(param[0] ** 2)+T.sum(param[1] ** 2))

       cost += L2_reg*L2_sqr


       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       #Sampling from the GMM
       component = rng.multinomial(pvals=mdn.mixing)
       component_mean =  T.sum(mdn.mu * component.dimshuffle(0,'x',1),axis=2)
       component_std = T.sum(mdn.sigma * component, axis=1, keepdims=True)
       samples=rng.normal(size=(batch_size*sequence_length,params['n_output']),avg = component_mean, std=component_std)

       self.output = T.reshape(samples,(batch_size,sequence_length,params['n_output']))

       self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t_1[-1],c_t_1[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t_1[-1],c_t_1[-1]],allow_input_downcast=True)
       self.n_param=count_params(self.params)
Esempio n. 19
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        # n_lstm=params['n_hidden']
        # n_out=params['n_output']
        batch_size=params["batch_size"]
        sequence_length=params["seq_length"]

        # minibatch)
        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample=(1,1)
        p_1=0.5
        border_mode="same"
        cnn_batch_size=batch_size*sequence_length
        pool_size=(2,2)

        #Layer1: conv2+pool+drop
        filter_shape=(64,3,9,9)
        input_shape=(cnn_batch_size,3,112,112) #input_shape= (samples, channels, rows, cols)
        input=X.reshape(input_shape)
        # input= X_r.dimshuffle(0,3,1,2)
        c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu)
        p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape)
        dl1=DropoutLayer(rng,input=p1.output,prob=p_1,is_train=is_train)

        #Layer2: conv2+pool
        filter_shape=(128,p1.output_shape[1],3,3)
        c2=ConvLayer(rng, dl1.output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu)
        p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape=(128,p2.output_shape[1],3,3)
        c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu)
        p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape)


        #Layer4: conv2+pool
        filter_shape=(64,p3.output_shape[1],3,3)
        c4=ConvLayer(rng, p3.output,filter_shape,p3.output_shape,border_mode,subsample, activation=nn.relu)
        p4=PoolLayer(c4.output,pool_size=pool_size,input_shape=c4.output_shape)

        #Layer5: hidden
        n_in= reduce(lambda x, y: x*y, p4.output_shape[1:])
        x_flat = p4.output.flatten(2)
        h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu)

        #Layer6: Regressin layer
        lreg=LogisticRegression(rng,h1.output,1024,2048)

        #LSTM paramaters
        self.n_in = 2048
        self.n_lstm = params['n_hidden']
        self.n_out = params['n_output']

        self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
        self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

        layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)
        self.params =c1.params+c2.params+c3.params+c4.params+h1.params+lreg.params+layer1.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t,h_tm1_1,c_tm1_1):
           [h_t_1,c_t_1,y_t_1]=layer1.run(x_t,h_tm1_1,c_tm1_1)
           y = T.dot(y_t_1, self.W_hy) + self.b_y
           return [h_t_1,c_t_1,y]

        H = T.matrix(name="H",dtype=dtype) # initial hidden state
        C = T.matrix(name="C",dtype=dtype) # initial hidden state

        rnn_input = lreg.y_pred.reshape((batch_size,sequence_length, self.n_in))


        [h_t_1,c_t_1,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[rnn_input.dimshuffle(1,0,2)],
                                         outputs_info=[H, C, None])

        self.output = y_vals.dimshuffle(1,0,2)
        cost=get_err_fn(self,cost_function,Y)

        _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

        self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t_1[-1],c_t_1[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t_1[-1],c_t_1[-1]],allow_input_downcast=True)
        self.n_param=count_params(self.params)
Esempio n. 20
0
   def __init__(self,rng, params,cost_function='mse',optimizer = RMSprop):
       batch_size=params['batch_size']
       sequence_length=params["seq_length"]

       lr=params['lr']
       self.n_in = 48
       self.n_lstm = params['n_hidden']
       self.n_out = params['n_output']
       n_fc=512

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector
       is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

       self.W_hy = init_weight((self.n_lstm, n_fc), rng=rng,name='W_hy', sample= 'glorot')
       self.b_y = init_bias(n_fc,rng=rng, sample='zero')

       self.numOfLayers=3
       layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)
       layer2=LSTMLayer(rng,1,self.n_lstm,self.n_lstm)
       layer3=LSTMLayer(rng,2,self.n_lstm,self.n_lstm)

       self.params = layer1.params+layer2.params+layer3.params
       self.params.append(self.W_hy)
       self.params.append(self.b_y)

       def step_lstm(x_t,mask,h_tm1_1,c_tm1_1,h_tm1_2,c_tm1_2,h_tm1_3,c_tm1_3):
           [h_t_1,c_t_1,y_t_1]=layer1.run(x_t,h_tm1_1,c_tm1_1)
           dl1=DropoutLayer(rng,input=y_t_1,prob=0.5,is_train=is_train,mask=mask)
           [h_t_2,c_t_2,y_t_2]=layer2.run(dl1.output,h_tm1_2,c_tm1_2)
           [h_t_3,c_t_3,y_t_3]=layer3.run(y_t_2,h_tm1_3,c_tm1_3)
           y = T.dot(y_t_3, self.W_hy) + self.b_y
           return [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y]

       h0_1 = T.matrix(name="h0_1",dtype=dtype) # initial hidden state
       c0_1 = T.matrix(name="c0_1",dtype=dtype) # initial hidden state
       h0_2 = T.matrix(name="h0_2",dtype=dtype) # initial hidden state
       c0_2 = T.matrix(name="c0_2",dtype=dtype) # initial hidden state
       h0_3 = T.matrix(name="h0_3",dtype=dtype) # initial hidden state
       c0_3 = T.matrix(name="c0_3",dtype=dtype) # initial hidden state

       mask_shape=(sequence_length,batch_size,self.n_lstm)
       p_1=0.5
       mask= rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype)

       noise= rng.normal(size=(batch_size,sequence_length,self.n_in), std=0.008, avg=0.0,dtype=theano.config.floatX)
       X_train=noise+X
       X_tilde= T.switch(T.neq(is_train, 0), X_train, X)

       [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[X_tilde.dimshuffle(1,0,2),mask],
                                         outputs_info=[h0_1, c0_1,h0_2, c0_2, h0_3, c0_3, None])

       # self.output = y_vals.dimshuffle(1,0,2)
       mdn_input=y_vals.dimshuffle(1,0,2)
       mdn_input=T.reshape(mdn_input,(batch_size*sequence_length,n_fc))
       Y_ll=T.reshape(Y,(batch_size*sequence_length,params['n_output']))

       mdn =  MDNoutputLayer(rng=rng,
                                           input=mdn_input,
                                           n_in=n_fc,
                                           n_out=params['n_output'],
                                           mu_activation=do_nothing,
                                         n_components=5)
       self.params=self.params+mdn.params
       # self.params.append(mdn.W_mixing)
       # self.params.append(mdn.W_mu)
       # self.params.append(mdn.W_sigma)

       cost = nll(mu = mdn.mu,
                 sigma = mdn.sigma,
                 mixing = mdn.mixing,
                 y = Y_ll) #+ L2_reg * self.frame_pred.L2_sqr


       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       #Sampling from the GMM
       component = rng.multinomial(pvals=mdn.mixing)
       component_mean =  T.sum(mdn.mu * component.dimshuffle(0,'x',1),axis=2)
       component_std = T.sum(mdn.sigma * component, axis=1, keepdims=True)
       samples=rng.normal(size=(batch_size*sequence_length,params['n_output']),avg = component_mean, std=component_std)
       self.output = T.reshape(samples,(batch_size,sequence_length,params['n_output']))

       self.train = theano.function(inputs=[X,Y,is_train,h0_1, c0_1,h0_2, c0_2, h0_3, c0_3],outputs=[cost,h_t_1[-1],c_t_1[-1],h_t_2[-1],c_t_2[-1],h_t_3[-1],c_t_3[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X,is_train,h0_1, c0_1,h0_2, c0_2, h0_3, c0_3], outputs = [self.output,h_t_1[-1],c_t_1[-1],h_t_2[-1],c_t_2[-1],h_t_3[-1],c_t_3[-1]],allow_input_downcast=True)
       self.n_param=count_params(self.params)
Esempio n. 21
0
    def __init__(self,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.05,
                 batch_size=64,
                 single_output=True,
                 output_activation=theano.tensor.nnet.relu,
                 cost_function='nll',
                 optimizer=RMSprop):

        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.nzeros_fc1 = 500
        self.n_fc1 = 1024
        self.n_fc2 = 512

        self.W_fc1 = init_weight((self.n_fc1, self.n_fc2), 'W_fc1')
        self.b_fc1 = init_bias(self.n_fc2, sample='zero')

        self.W_fc2 = init_weight((self.n_fc2, self.n_out), 'W_fc2')
        self.b_fc2 = init_bias(self.n_out, sample='zero')

        self.W_xi = init_weight((self.n_in, self.n_lstm), 'W_xi')
        self.W_hi = init_weight((self.n_lstm, self.n_lstm), 'W_hi', 'svd')
        self.W_ci = init_weight((self.n_lstm, self.n_lstm), 'W_ci', 'svd')
        self.b_i = init_bias(self.n_lstm, sample='zero')
        self.W_xf = init_weight((self.n_in, self.n_lstm), 'W_xf')
        self.W_hf = init_weight((self.n_lstm, self.n_lstm), 'W_hf', 'svd')
        self.W_cf = init_weight((self.n_lstm, self.n_lstm), 'W_cf', 'svd')
        self.b_f = init_bias(self.n_lstm, sample='one')
        self.W_xc = init_weight((self.n_in, self.n_lstm), 'W_xc')
        self.W_hc = init_weight((self.n_lstm, self.n_lstm), 'W_hc', 'svd')
        self.b_c = init_bias(self.n_lstm, sample='zero')
        self.W_xo = init_weight((self.n_in, self.n_lstm), 'W_xo')
        self.W_ho = init_weight((self.n_lstm, self.n_lstm), 'W_ho', 'svd')
        self.W_co = init_weight((self.n_lstm, self.n_lstm), 'W_co', 'svd')
        self.b_o = init_bias(self.n_lstm, sample='zero')
        self.W_hy = init_weight((self.n_lstm, self.n_fc1), 'W_hy')
        self.b_y = init_bias(self.n_fc1, sample='zero')

        self.params = [
            self.W_xi, self.W_hi, self.W_ci, self.b_i, self.W_xf, self.W_hf,
            self.W_cf, self.b_f, self.W_xc, self.W_hc, self.b_c, self.W_xo,
            self.W_ho, self.W_co, self.b_o, self.W_hy, self.b_y, self.W_fc1,
            self.b_fc1, self.W_fc2, self.b_fc2
        ]

        def step_lstm(x_t, h_tm1, c_tm1):
            i_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xi) + T.dot(h_tm1, self.W_hi) +
                T.dot(c_tm1, self.W_ci) + self.b_i)
            f_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xf) + T.dot(h_tm1, self.W_hf) +
                T.dot(c_tm1, self.W_cf) + self.b_f)
            c_t = f_t * c_tm1 + i_t * T.tanh(
                T.dot(x_t, self.W_xc) + T.dot(h_tm1, self.W_hc) + self.b_c)
            o_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xo) + T.dot(h_tm1, self.W_ho) +
                T.dot(c_t, self.W_co) + self.b_o)
            h_t = o_t * T.tanh(c_t)
            y_t = T.tanh(T.dot(h_t, self.W_hy) + self.b_y)
            return [h_t, c_t, y_t]

        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3(
        )  # batch of sequence of vector (should be 0 when X is not null)
        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state
        c0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        [h_vals, c_vals,
         y_vals], _ = theano.scan(fn=step_lstm,
                                  sequences=X.dimshuffle(1, 0, 2),
                                  outputs_info=[h0, c0, None])

        fc_in = y_vals * X.dimshuffle(1, 0, 2)
        #Hidden layer
        fc1_out = T.tanh(T.dot(fc_in, self.W_fc1) + self.b_fc1)
        fc2_out = T.tanh(T.dot(fc1_out, self.W_fc2) + self.b_fc2)

        self.output = fc2_out.dimshuffle(1, 0, 2)

        cost = get_err_fn(self, cost_function, Y)

        _optimizer = optimizer(cost, self.params, lr=lr)

        self.train = theano.function(inputs=[X, Y],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = n_lstm * n_lstm * 4 + n_in * n_lstm * 4 + n_lstm * n_out + n_lstm * 3
Esempio n. 22
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        n_lstm=params['n_hidden']
        n_out=params['n_output']
        batch_size=params["batch_size"]
        sequence_length=params["seq_length"]

        # minibatch)
        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample=(1,1)
        p_1=0.5
        border_mode="valid"
        cnn_batch_size=batch_size*sequence_length
        pool_size=(2,2)

        #Layer1: conv2+pool+drop
        filter_shape=(64,1,9,9)
        input_shape=(cnn_batch_size,1,120,60) #input_shape= (samples, channels, rows, cols)
        input= X.reshape(input_shape)
        c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu)
        p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape)
        dl1=DropoutLayer(rng,input=p1.output,prob=p_1,is_train=is_train)
        retain_prob = 1. - p_1
        test_output = p1.output*retain_prob
        d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output)

        #Layer2: conv2+pool
        filter_shape=(128,p1.output_shape[1],3,3)
        c2=ConvLayer(rng, d1_output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu)
        p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape)


        #Layer3: conv2+pool
        filter_shape=(128,p2.output_shape[1],3,3)
        c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu)
        p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape)

        #Layer4: hidden
        n_in= reduce(lambda x, y: x*y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu)
        n_in=1024
        rnn_input = h1.output.reshape((batch_size,sequence_length, n_in))


        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
        self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

        layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)
        layer2=LSTMLayer(rng,1,self.n_lstm,self.n_lstm)
        layer3=LSTMLayer(rng,2,self.n_lstm,self.n_lstm)

        self.params = layer1.params+layer2.params+layer3.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t,mask,h_tm1_1,c_tm1_1,h_tm1_2,c_tm1_2,h_tm1_3,c_tm1_3):
           [h_t_1,c_t_1,y_t_1]=layer1.run(x_t,h_tm1_1,c_tm1_1)
           dl1=DropoutLayer(rng,input=y_t_1,prob=0.5,is_train=is_train,mask=mask)
           [h_t_2,c_t_2,y_t_2]=layer2.run(dl1.output,h_tm1_2,c_tm1_2)
           [h_t_3,c_t_3,y_t_3]=layer3.run(y_t_2,h_tm1_3,c_tm1_3)
           y = T.dot(y_t_3, self.W_hy) + self.b_y
           return [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y]

        h0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
        c0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state
        h0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
        c0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state
        h0_3 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
        c0_3 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state

        mask_shape=(sequence_length,batch_size,self.n_lstm)
        p_1=0.5
        mask= rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype)

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_t_1,c_t_1,h_t_2,c_t_2,h_t_3,c_t_3,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[rnn_input.dimshuffle(1,0,2),mask],
                                         outputs_info=[h0_1, c0_1,h0_2, c0_2, h0_3, c0_3, None])

        self.output = y_vals.dimshuffle(1,0,2)

        self.params =c1.params+c2.params+c3.params+h1.params+self.params

        cost=get_err_fn(self,cost_function,Y)
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train], outputs = self.output,allow_input_downcast=True)
        self.n_param=count_params(self.params)
Esempio n. 23
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        n_lstm = params['n_hidden']
        n_out = params['n_output']
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "valid"
        cnn_batch_size = batch_size * sequence_length
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (64, 1, 9, 9)
        input_shape = (cnn_batch_size, 1, 120, 60
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train)
        retain_prob = 1. - p_1
        test_output = p1.output * retain_prob
        d1_output = T.switch(T.neq(is_train, 0), dl1.output, test_output)

        #Layer2: conv2+pool
        filter_shape = (128, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       d1_output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (128, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: hidden
        n_in = reduce(lambda x, y: x * y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)
        n_in = 1024
        rnn_input = h1.output.reshape((batch_size, sequence_length, n_in))

        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_hy = init_weight((self.n_lstm, self.n_out),
                                rng=rng,
                                name='W_hy',
                                sample='glorot')
        self.b_y = init_bias(self.n_out, rng=rng, sample='zero')

        layer1 = LSTMLayer(rng, 0, self.n_in, self.n_lstm)
        layer2 = LSTMLayer(rng, 1, self.n_lstm, self.n_lstm)
        layer3 = LSTMLayer(rng, 2, self.n_lstm, self.n_lstm)

        self.params = layer1.params + layer2.params + layer3.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t, mask, h_tm1_1, c_tm1_1, h_tm1_2, c_tm1_2, h_tm1_3,
                      c_tm1_3):
            [h_t_1, c_t_1, y_t_1] = layer1.run(x_t, h_tm1_1, c_tm1_1)
            dl1 = DropoutLayer(rng,
                               input=y_t_1,
                               prob=0.5,
                               is_train=is_train,
                               mask=mask)
            [h_t_2, c_t_2, y_t_2] = layer2.run(dl1.output, h_tm1_2, c_tm1_2)
            [h_t_3, c_t_3, y_t_3] = layer3.run(y_t_2, h_tm1_3, c_tm1_3)
            y = T.dot(y_t_3, self.W_hy) + self.b_y
            return [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y]

        h0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_1 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state
        h0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_2 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state
        h0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial hidden state
        c0_3 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                               dtype=dtype))  # initial cell state

        mask_shape = (sequence_length, batch_size, self.n_lstm)
        p_1 = 0.5
        mask = rng.binomial(size=mask_shape, p=p_1, dtype=X.dtype)

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y_vals], _ = theano.scan(
            fn=step_lstm,
            sequences=[rnn_input.dimshuffle(1, 0, 2), mask],
            outputs_info=[h0_1, c0_1, h0_2, c0_2, h0_3, c0_3, None])

        self.output = y_vals.dimshuffle(1, 0, 2)

        self.params = c1.params + c2.params + c3.params + h1.params + self.params

        cost = get_err_fn(self, cost_function, Y)
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
Esempio n. 24
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        n_lstm=params['n_hidden']
        n_out=params['n_output']
        batch_size=params["batch_size"]
        sequence_length=params["seq_length"]

        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample=(1,1)
        p_1=0.5
        border_mode="valid"
        cnn_batch_size=batch_size*sequence_length
        pool_size=(2,2)

        #Layer1: conv2+pool+drop
        filter_shape=(64,1,9,9)
        input_shape=(cnn_batch_size,1,120,60) #input_shape= (samples, channels, rows, cols)
        input= X.reshape(input_shape)
        c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu)
        p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape)
        dl1=DropoutLayer(rng,input=p1.output,prob=p_1,is_train=is_train)

        #Layer2: conv2+pool
        filter_shape=(128,p1.output_shape[1],3,3)
        c2=ConvLayer(rng, dl1.output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu)
        p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape)


        #Layer3: conv2+pool
        filter_shape=(128,p2.output_shape[1],3,3)
        c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu)
        p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape)

        #Layer4: hidden
        n_in= reduce(lambda x, y: x*y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu)
        n_in=1024
        rnn_input = h1.output.reshape((batch_size,sequence_length, n_in))


        #Layer5: LSTM
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
        self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

        layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)

        self.params = layer1.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t,h_tm1,c_tm1):
           [h_t,c_t,y_t]=layer1.run(x_t,h_tm1,c_tm1)
           y = T.dot(y_t, self.W_hy) + self.b_y
           return [h_t,c_t,y]

        H = T.matrix(name="H",dtype=dtype) # initial hidden state
        C = T.matrix(name="C",dtype=dtype) # initial hidden state

        [h_t,c_t,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[rnn_input.dimshuffle(1,0,2)],
                                         outputs_info=[H, C, None])

        self.output = y_vals.dimshuffle(1,0,2)

        self.params =c1.params+c2.params+c3.params+h1.params+self.params

        cost=get_err_fn(self,cost_function,Y)
        L2_reg=0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param ** 2))

        cost += L2_reg*L2_sqr
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t[-1],c_t[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t[-1],c_t[-1]],allow_input_downcast=True)
        self.n_param=count_params(self.params)
Esempio n. 25
0
   def __init__(self, n_in, n_lstm, n_out, lr=0.05, batch_size=64, output_activation=theano.tensor.nnet.relu,cost_function='mse',optimizer = RMSprop):
       self.n_in = n_in
       self.n_lstm = n_lstm
       self.n_out = n_out

       self.n_fc1=256
       self.n_fc2=256
       self.n_fc3=256

       self.W_fc1 = init_weight((self.n_fc1, self.n_fc2),'W_fc1', 'glorot')
       self.b_fc1 = init_bias(self.n_fc2, sample='zero')

       self.W_fc2 = init_weight((self.n_fc2, self.n_fc3),'W_fc2', 'glorot')
       self.b_fc2 =init_bias(self.n_fc3, sample='zero')

       self.W_fc3 = init_weight((self.n_fc3, self.n_out),'w_fc3', 'glorot')
       self.b_fc3 =init_bias(self.n_out, sample='zero')

       #1th layer
       self.W_xi_1 = init_weight((self.n_in, self.n_lstm), 'W_xi_1', 'glorot')
       self.W_hi_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hi_1', 'glorot')
       self.W_ci_1 = init_weight((self.n_lstm, self.n_lstm), 'W_ci_1', 'glorot')
       self.b_i_1 = init_bias(self.n_lstm, sample='zero')
       self.W_xf_1 = init_weight((self.n_in, self.n_lstm), 'W_xf_1', 'glorot')
       self.W_hf_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hf_1', 'glorot')
       self.W_cf_1 = init_weight((self.n_lstm, self.n_lstm), 'W_cf_1', 'glorot')
       self.b_f_1 = init_bias(self.n_lstm, sample='one')
       self.W_xc_1 = init_weight((self.n_in, self.n_lstm), 'W_xc_1', 'glorot')
       self.W_hc_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hc_1', 'glorot')
       self.b_c_1 = init_bias(self.n_lstm, sample='zero')
       self.W_xo_1 = init_weight((self.n_in, self.n_lstm), 'W_xo_1', 'glorot')
       self.W_ho_1 = init_weight((self.n_lstm, self.n_lstm), 'W_ho_1', 'glorot')
       self.W_co_1 = init_weight((self.n_lstm, self.n_lstm), 'W_co_1', 'glorot')
       self.b_o_1 = init_bias(self.n_lstm, sample='zero')
       #self.W_hy_1 = init_weight((self.n_lstm, self.n_out), 'W_hy_1')
       #self.b_y_1 = init_bias(self.n_lstm, sample='zero')

       #2th layer
       self.W_xi_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xi_2', 'glorot')
       self.W_hi_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hi_2', 'glorot')
       self.W_ci_2 = init_weight((self.n_lstm, self.n_lstm), 'W_ci_2', 'glorot')
       self.b_i_2 = init_bias(self.n_lstm, sample='zero')
       self.W_xf_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xf_2', 'glorot')
       self.W_hf_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hf_2', 'glorot')
       self.W_cf_2 = init_weight((self.n_lstm, self.n_lstm), 'W_cf_2', 'glorot')
       self.b_f_2 = init_bias(self.n_lstm, sample='one')
       self.W_xc_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xc_2', 'glorot')
       self.W_hc_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hc_2', 'glorot')
       self.b_c_2 = init_bias(self.n_lstm, sample='zero')
       self.W_xo_2 = init_weight((self.n_lstm, self.n_lstm), 'W_xo_2', 'glorot')
       self.W_ho_2 = init_weight((self.n_lstm, self.n_lstm), 'W_ho_2', 'glorot')
       self.W_co_2 = init_weight((self.n_lstm, self.n_lstm), 'W_co_2', 'glorot')
       self.b_o_2 = init_bias(self.n_lstm, sample='zero')
       self.W_hy_2 = init_weight((self.n_lstm, self.n_out), 'W_hy_2', 'glorot')
       self.b_y_2 = init_bias(self.n_out, sample='zero')


       self.params = [
                      self.W_xi_1, self.W_hi_1, self.W_ci_1, self.b_i_1,
                      self.W_xf_1, self.W_hf_1, self.W_cf_1, self.b_f_1,
                      self.W_xc_1, self.W_hc_1, self.b_c_1, self.W_xo_1, self.W_ho_1,
                      self.W_co_1, self.b_o_1, # self.W_hy_1, self.b_y_1,
                      self.W_xi_2, self.W_hi_2, self.W_ci_2, self.b_i_2,
                      self.W_xf_2, self.W_hf_2, self.W_cf_2, self.b_f_2,
                      self.W_xc_2, self.W_hc_2, self.b_c_2, self.W_xo_2,  self.W_ho_2,
                      self.W_co_2, self.b_o_2,  self.W_hy_2, self.b_y_2,
                      self.W_fc1, self.b_fc1,self.W_fc2, self.b_fc2,self.W_fc3, self.b_fc3
                      ]


       def step_lstm(x_t, h_tm1, c_tm1,h_tm2,c_tm2):
           i_t_1 = T.nnet.sigmoid(T.dot(x_t, self.W_xi_1) + T.dot(h_tm1, self.W_hi_1) + T.dot(c_tm1, self.W_ci_1) + self.b_i_1)
           f_t_1 = T.nnet.sigmoid(T.dot(x_t, self.W_xf_1) + T.dot(h_tm1, self.W_hf_1) + T.dot(c_tm1, self.W_cf_1) + self.b_f_1)
           c_t_1 = f_t_1 * c_tm1 + i_t_1 * T.tanh(T.dot(x_t, self.W_xc_1) + T.dot(h_tm1, self.W_hc_1) + self.b_c_1)
           o_t_1 = T.nnet.sigmoid(T.dot(x_t, self.W_xo_1) + T.dot(h_tm1, self.W_ho_1) + T.dot(c_t_1, self.W_co_1) + self.b_o_1)
           h_t_1 = o_t_1 * T.tanh(c_t_1)
           #y_t_1 = output_activation(T.dot(h_t_1, self.W_hy_1) + self.b_y_1)

           i_t_2 = T.nnet.sigmoid(T.dot(h_t_1, self.W_xi_2) + T.dot(h_tm2, self.W_hi_2) + T.dot(c_tm2, self.W_ci_2) + self.b_i_2)
           f_t_2 = T.nnet.sigmoid(T.dot(h_t_1, self.W_xf_2) + T.dot(h_tm2, self.W_hf_2) + T.dot(c_tm2, self.W_cf_2) + self.b_f_2)
           c_t_2 = f_t_2 * c_tm2 + i_t_2 * T.tanh(T.dot(h_t_1, self.W_xc_2) + T.dot(h_tm2, self.W_hc_2) + self.b_c_2)
           o_t_2 = T.nnet.sigmoid(T.dot(h_t_1, self.W_xo_2) + T.dot(h_tm2, self.W_ho_2) + T.dot(c_t_2, self.W_co_2) + self.b_o_2)
           h_t_2 = o_t_2 * T.tanh(c_t_2)
           y_t_2 = T.tanh(T.dot(h_t_2, self.W_hy_2) + self.b_y_2)

           return [h_t_1,c_t_1,h_t_2,c_t_2, y_t_2]

       X = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null)
       h0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       h0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0_2 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

       [h_vals_1, c_vals_1,h_vals_2, c_vals_2, y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=X.dimshuffle(1,0,2),
                                         outputs_info=[h0_1, c0_1,h0_2, c0_2, None])
       #Hidden layers
       fc1_out = T.tanh(T.dot(y_vals, self.W_fc1)  + self.b_fc1)
       fc2_out = T.tanh(T.dot(fc1_out, self.W_fc2)  + self.b_fc2)
       fc3_out = T.tanh(T.dot(fc2_out, self.W_fc3)  + self.b_fc3)

       self.output=fc3_out.dimshuffle(1,0,2)

       cost=get_err_fn(self,cost_function,Y)
       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )

       self.train = theano.function(inputs=[X, Y],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True)
       self.n_param=(n_lstm*n_lstm*4+n_in*n_lstm*4+n_lstm*n_out+n_lstm*3)*2
Esempio n. 26
0
    def __init__(self,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.05,
                 batch_size=64,
                 output_activation=theano.tensor.nnet.relu,
                 cost_function='mse',
                 optimizer=RMSprop):
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_xi = init_weight((self.n_in, self.n_lstm), 'W_xi', 'glorot')
        self.W_hi = init_weight((self.n_lstm, self.n_lstm), 'W_hi', 'ortho')
        self.b_i = init_bias(self.n_lstm, sample='zero')
        self.W_xf = init_weight((self.n_in, self.n_lstm), 'W_xf', 'glorot')
        self.W_hf = init_weight((self.n_lstm, self.n_lstm), 'W_hf', 'ortho')
        self.b_f = init_bias(self.n_lstm, sample='one')
        self.W_xc = init_weight((self.n_in, self.n_lstm), 'W_xc', 'glorot')
        self.W_hc = init_weight((self.n_lstm, self.n_lstm), 'W_hc', 'ortho')
        self.b_c = init_bias(self.n_lstm, sample='zero')
        self.W_xo = init_weight((self.n_in, self.n_lstm), 'W_xo', 'glorot')
        self.W_ho = init_weight((self.n_lstm, self.n_lstm), 'W_ho', 'ortho')
        self.b_o = init_bias(self.n_lstm, sample='zero')
        self.W_hy = init_weight((self.n_lstm, self.n_out), 'W_hy', 'glorot')
        self.b_y = init_bias(self.n_out, sample='zero')

        self.params = [
            self.W_xi, self.W_hi, self.b_i, self.W_xf, self.W_hf, self.b_f,
            self.W_xc, self.W_hc, self.b_c, self.W_xo, self.W_ho, self.b_o,
            self.W_hy, self.b_y
        ]

        def step_lstm(x_t, h_tm1, c_tm1):
            i_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xi) + T.dot(h_tm1, self.W_hi) + self.b_i)
            f_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xf) + T.dot(h_tm1, self.W_hf) + self.b_f)
            c_t = f_t * c_tm1 + i_t * T.tanh(
                T.dot(x_t, self.W_xc) + T.dot(h_tm1, self.W_hc) + self.b_c)
            o_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xo) + T.dot(h_tm1, self.W_ho) + self.b_o)
            h_t = o_t * T.tanh(c_t)
            y_t = T.tanh(T.dot(h_t, self.W_hy) + self.b_y)
            return [h_t, c_t, y_t]

        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3(
        )  # batch of sequence of vector (should be 0 when X is not null)
        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state
        c0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        [h_vals, c_vals,
         y_vals], _ = theano.scan(fn=step_lstm,
                                  sequences=X.dimshuffle(1, 0, 2),
                                  outputs_info=[h0, c0, None])

        self.output = y_vals.dimshuffle(1, 0, 2)
        cxe = T.mean(T.nnet.binary_crossentropy(self.output, Y))
        nll = -T.mean(Y * T.log(self.output) +
                      (1. - Y) * T.log(1. - self.output))
        mse = T.mean((self.output - Y)**2)

        cost = 0
        if cost_function == 'mse':
            cost = mse
        elif cost_function == 'cxe':
            cost = cxe
        else:
            cost = nll
        _optimizer = optimizer(cost, self.params, lr=lr)

        def reset():
            h0 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype))
            c0 = shared(np.zeros(shape=(batch_size, self.n_lstm), dtype=dtype))

        self.train = theano.function(inputs=[X, Y],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X],
                                           outputs=y_vals.dimshuffle(1, 0, 2),
                                           allow_input_downcast=True)
        self.n_param = n_lstm * n_lstm * 4 + n_in * n_lstm * 4 + n_lstm * n_out + n_lstm * 3
Esempio n. 27
0
   def __init__(self, n_in, n_lstm, n_out, lr=0.00001, batch_size=64, output_activation=theano.tensor.nnet.relu,cost_function='mse',optimizer = RMSprop):
       self.n_in = n_in
       self.n_lstm = n_lstm
       self.n_out = n_out
       self.W_xr_1 = init_weight((self.n_in, self.n_lstm), 'W_xr_1', 'glorot')
       self.W_hr_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hr_1', 'ortho')
       self.b_r_1  = init_bias(self.n_lstm, sample='zero')
       self.W_xz_1 = init_weight((self.n_in, self.n_lstm), 'W_xz_1', 'glorot')
       self.W_hz_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hz_1', 'ortho')
       self.b_z_1 = init_bias(self.n_lstm, sample='zero')
       self.W_xh_1 = init_weight((self.n_in, self.n_lstm), 'W_xh_1', 'glorot')
       self.W_hh_1 = init_weight((self.n_lstm, self.n_lstm), 'W_hh_1', 'ortho')
       self.b_h_1 = init_bias(self.n_lstm, sample='zero')
       # self.W_hy_1 = init_weight((self.n_lstm, self.n_out),'W_hy_1', 'glorot')
       # self.b_y_1 = init_bias(self.n_out, sample='zero')


       self.W_xr_2 = init_weight((self.n_in, self.n_lstm), 'W_xr', 'glorot')
       self.W_hr_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hr', 'ortho')
       self.b_r_2  = init_bias(self.n_lstm, sample='zero')
       self.W_xz_2 = init_weight((self.n_in, self.n_lstm), 'W_xz', 'glorot')
       self.W_hz_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hz', 'ortho')
       self.b_z_2 = init_bias(self.n_lstm, sample='zero')
       self.W_xh_2 = init_weight((self.n_in, self.n_lstm), 'W_xh', 'glorot')
       self.W_hh_2 = init_weight((self.n_lstm, self.n_lstm), 'W_hh', 'ortho')
       self.b_h_2 = init_bias(self.n_lstm, sample='zero')
       self.W_hy_2 = init_weight((self.n_lstm, self.n_out),'W_hy', 'glorot')
       self.b_y_2 = init_bias(self.n_out, sample='zero')

       self.params = [self.W_xr_1, self.W_hr_1, self.b_r_1,
                      self.W_xz_1, self.W_hz_1, self.b_z_1,
                      self.W_xh_1, self.W_hh_1, self.b_h_1,

                      self.W_xr_2, self.W_hr_2, self.b_r_2,
                      self.W_xz_2, self.W_hz_2, self.b_z_2,
                      self.W_xh_2, self.W_hh_2, self.b_h_2,
                      self.W_hy_f,self.W_hy_b, self.b_y
                      ]

       def f_step_lstm(x_t, h_tm1_1):
           r_t_1 = T.nnet.sigmoid(T.dot(x_t, self.W_xr_1) + T.dot(h_tm1_1, self.W_hr_1) + self.b_r_1)
           z_t_1 = T.nnet.sigmoid(T.dot(x_t, self.W_xz_1) + T.dot(h_tm1_1, self.W_hz_1)  + self.b_z_1)
           h_t_1 = T.tanh(T.dot(x_t, self.W_xh_1) + T.dot((r_t_1*h_tm1_1),self.W_hh_1)  + self.b_h_1)
           hh_t_1 = z_t_1 * h_t_1 + (1-z_t_1)*h_tm1_1
           return [hh_t_1]

       def b_step_lstm(x_t, h_tm1_2):
           r_t_2 = T.nnet.sigmoid(T.dot(x_t, self.W_xr_2) + T.dot(h_tm1_2, self.W_hr_2) + self.b_r_2)
           z_t_2 = T.nnet.sigmoid(T.dot(x_t, self.W_xz_2) + T.dot(h_tm1_2, self.W_hz_2)  + self.b_z_2)
           h_t_2 = T.tanh(T.dot(x_t, self.W_xh_2) + T.dot((r_t_2*h_tm1_2),self.W_hh_2)  + self.b_h_2)
           hh_t_2 = z_t_2 * h_t_2 + (1-z_t_2)*h_tm1_2
           return [hh_t_2]

       X_f = T.tensor3() # batch of sequence of vector
       X_b = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null)
       h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

       [h_f, c_vals], _ = theano.scan(fn=f_step_lstm,
                                         sequences=X_f.dimshuffle(1,0,2),
                                         outputs_info=[h0, c0])

       [h_b, c_vals], _ = theano.scan(fn=b_step_lstm,
                                         sequences=X_b.dimshuffle(1,0,2),
                                         outputs_info=[h0, c0])
       h_b=h_b[:,::-1]
       y_vals=T.tanh(T.dot(h_f, self.W_hy_f)+T.dot(h_b, self.W_hy_b)+self.b_y)


       self.output = y_vals.dimshuffle(1,0,2)

       cost=get_err_fn(self,cost_function,Y)

       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )


       self.train = theano.function(inputs=[X_f,X_b, Y],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X_f,X_b], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True)
       self.n_param=n_lstm*n_lstm*3+n_in*n_lstm*3+n_lstm*n_out+n_lstm*3
Esempio n. 28
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):
        batch_size = params['batch_size']
        sequence_length = params["seq_length"]

        lr = params['lr']
        self.n_in = 2048
        self.n_lstm = params['n_hidden']
        self.n_out = params['n_output']

        self.W_hy = init_weight((self.n_lstm, self.n_out),
                                rng=rng,
                                name='W_hy',
                                sample='glorot')
        self.b_y = init_bias(self.n_out, rng=rng, sample='zero')

        layer1 = LSTMLayer(rng, 0, self.n_in, self.n_lstm)

        self.params = layer1.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t, h_tm1_1, c_tm1_1):
            [h_t_1, c_t_1, y_t_1] = layer1.run(x_t, h_tm1_1, c_tm1_1)
            y = T.dot(y_t_1, self.W_hy) + self.b_y
            return [h_t_1, c_t_1, y]

        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction
        H = T.matrix(name="H", dtype=dtype)  # initial hidden state
        C = T.matrix(name="C", dtype=dtype)  # initial hidden state

        noise = rng.normal(size=(batch_size, sequence_length, self.n_in),
                           std=0.0002,
                           avg=0.0,
                           dtype=theano.config.floatX)
        X_train = noise + X

        X_tilde = T.switch(T.neq(is_train, 0), X_train, X)

        # h0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
        # c0_1 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial cell state

        [h_t_1, c_t_1,
         y_vals], _ = theano.scan(fn=step_lstm,
                                  sequences=[X_tilde.dimshuffle(1, 0, 2)],
                                  outputs_info=[H, C, None])

        self.output = y_vals.dimshuffle(1, 0, 2)
        cost = get_err_fn(self, cost_function, Y)

        _optimizer = optimizer(cost, self.params, lr=lr)

        self.train = theano.function(inputs=[X, Y, is_train, H, C],
                                     outputs=[cost, h_t_1[-1], c_t_1[-1]],
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(
            inputs=[X, is_train, H, C],
            outputs=[self.output, h_t_1[-1], c_t_1[-1]],
            allow_input_downcast=True)
        self.n_param = count_params(self.params)
Esempio n. 29
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        n_lstm=params['n_hidden']
        n_out=params['n_output']
        batch_size=params["batch_size"]
        sequence_length=params["seq_length"]

        # minibatch)
        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample=(1,1)
        p_1=0.5
        border_mode="valid"
        cnn_batch_size=batch_size*sequence_length
        pool_size=(2,2)

        #Layer1: conv2+pool+drop
        filter_shape=(64,1,9,9)
        input_shape=(cnn_batch_size,1,120,60) #input_shape= (samples, channels, rows, cols)
        input= X.reshape(input_shape)
        c1=ConvLayer(rng, input,filter_shape, input_shape,border_mode,subsample, activation=nn.relu)
        p1=PoolLayer(c1.output,pool_size=pool_size,input_shape=c1.output_shape)
        dl1=DropoutLayer(rng,input=p1.output,prob=p_1,is_train=is_train)

        #Layer2: conv2+pool
        filter_shape=(128,p1.output_shape[1],3,3)
        c2=ConvLayer(rng, dl1.output, filter_shape,p1.output_shape,border_mode,subsample, activation=nn.relu)
        p2=PoolLayer(c2.output,pool_size=pool_size,input_shape=c2.output_shape)


        #Layer3: conv2+pool
        filter_shape=(128,p2.output_shape[1],3,3)
        c3=ConvLayer(rng, p2.output,filter_shape,p2.output_shape,border_mode,subsample, activation=nn.relu)
        p3=PoolLayer(c3.output,pool_size=pool_size,input_shape=c3.output_shape)

        #Layer4: hidden
        n_in= reduce(lambda x, y: x*y, p3.output_shape[1:])
        x_flat = p3.output.flatten(2)
        h1=HiddenLayer(rng,x_flat,n_in,1024,activation=nn.relu)
        n_in=1024
        rnn_input = h1.output.reshape((batch_size,sequence_length, n_in))


        #Layer5: gru
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.W_hy = init_weight((self.n_lstm, self.n_out), rng=rng,name='W_hy', sample= 'glorot')
        self.b_y = init_bias(self.n_out,rng=rng, sample='zero')

        layer1=LSTMLayer(rng,0,self.n_in,self.n_lstm)

        self.params = layer1.params
        self.params.append(self.W_hy)
        self.params.append(self.b_y)

        def step_lstm(x_t,h_tm1,c_tm1):
           [h_t,c_t,y_t]=layer1.run(x_t,h_tm1,c_tm1)
           y = T.dot(y_t, self.W_hy) + self.b_y
           return [h_t,c_t,y]

        H = T.matrix(name="H",dtype=dtype) # initial hidden state
        C = T.matrix(name="C",dtype=dtype) # initial hidden state

        #(1, 0, 2) -> AxBxC to BxAxC
        #(batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        #T.dot(x_t, self.W_xi)x_t=(sequence_length, batch_size ,n_in), W_xi=  [self.n_in, self.n_lstm]

        [h_t,c_t,y_vals], _ = theano.scan(fn=step_lstm,
                                         sequences=[rnn_input.dimshuffle(1,0,2)],
                                         outputs_info=[H, C, None])

        self.output = y_vals.dimshuffle(1,0,2)

        self.params =c1.params+c2.params+c3.params+h1.params+self.params

        cost=get_err_fn(self,cost_function,Y)
        L2_reg=0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param ** 2))

        cost += L2_reg*L2_sqr
        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train,H,C],outputs=[cost,h_t[-1],c_t[-1]],updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train,H,C], outputs = [self.output,h_t[-1],c_t[-1]],allow_input_downcast=True)
        self.n_param=count_params(self.params)
Esempio n. 30
0
    def __init__(self, rng, params, cost_function="mse", optimizer=RMSprop):

        lr = params["lr"]
        n_lstm = params["n_hidden"]
        n_out = params["n_output"]
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        is_train = T.iscalar("is_train")  # pseudo boolean for switching between training and prediction

        # CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "same"
        cnn_batch_size = batch_size * sequence_length
        pool_size = (2, 2)

        n_lstm_layer = 2
        f_dict = dict()
        f_dict["filter_shape_" + str(0)] = (64, 1, 9, 9)
        f_dict["s_filter_shape_" + str(0)] = (64, f_dict["filter_shape_" + str(0)][0], 9, 9)

        f_dict["filter_shape_" + str(1)] = (128, f_dict["filter_shape_" + str(0)][0], 3, 3)
        f_dict["s_filter_shape_" + str(1)] = (128, f_dict["filter_shape_" + str(1)][0], 3, 3)

        f_dict["filter_shape_" + str(2)] = (128, f_dict["filter_shape_" + str(1)][0], 3, 3)
        f_dict["s_filter_shape_" + str(2)] = (128, f_dict["filter_shape_" + str(2)][0], 3, 3)

        input_shape = (batch_size, sequence_length, 1, 120, 60)  # input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape).dimshuffle(1, 0, 2, 3, 4)
        input_shape = (batch_size, 1, 120, 60)  # input_shape= (samples, channels, rows, cols)

        s_dict = dict()
        layer_list = ["p", "l", "p", "d", "l", "p", "l", "p"]
        rows = input_shape[2]
        cols = input_shape[3]
        counter = 0
        outputs_info = []
        for layer in layer_list:
            if layer == "l":
                s_index = str(counter)
                if counter == 0:
                    pre_nfilter = input_shape[1]
                else:
                    pre_nfilter = f_dict["filter_shape_" + str(counter - 1)][0]
                i_shape = (batch_size, pre_nfilter, rows, cols)  # input_shape= (samples, channels, rows, cols)
                s_shape = (
                    batch_size,
                    f_dict["s_filter_shape_" + s_index][0],
                    rows,
                    cols,
                )  # input_shape= (samples, channels, rows, cols)
                h = shared(np.zeros(shape=s_shape, dtype=dtype))  # initial hidden state
                c = shared(np.zeros(shape=s_shape, dtype=dtype))  # initial hidden state
                s_dict["i_shape_" + s_index] = i_shape
                s_dict["s_shape_" + s_index] = s_shape
                outputs_info.append(h)
                outputs_info.append(c)
                counter += 1
            if layer == "p":
                rows = rows / 2
                cols = cols / 2
        s_dict["final_shape"] = (batch_size, sequence_length, pre_nfilter, rows, cols)

        outputs_info.append(None)
        outputs_info = tuple(outputs_info)

        p_dict = dict()
        for index in range(counter):
            s_index = str(index)
            p_dict["W_xi_" + s_index] = u.init_weight(
                f_dict["filter_shape_" + s_index], rng=rng, name="W_xi_" + s_index, sample="glorot"
            )
            p_dict["W_hi_" + s_index] = u.init_weight(
                f_dict["s_filter_shape_" + s_index], rng=rng, name="W_hi_" + s_index, sample="glorot"
            )
            p_dict["W_ci_" + s_index] = u.init_weight(
                f_dict["s_filter_shape_" + s_index], rng=rng, name="W_ci_" + s_index, sample="glorot"
            )
            p_dict["b_i_" + s_index] = u.init_bias(f_dict["filter_shape_" + s_index][0], rng=rng, name="b_i_" + s_index)

            p_dict["W_xf_" + s_index] = u.init_weight(
                f_dict["filter_shape_" + s_index], rng=rng, name="W_xf_" + s_index, sample="glorot"
            )
            p_dict["W_hf_" + s_index] = u.init_weight(
                f_dict["s_filter_shape_" + s_index], rng=rng, name="W_hf_" + s_index, sample="glorot"
            )
            p_dict["W_cf_" + s_index] = u.init_weight(
                f_dict["s_filter_shape_" + s_index], rng=rng, name="W_cf_" + s_index, sample="glorot"
            )
            p_dict["b_f_" + s_index] = u.init_bias(
                f_dict["filter_shape_" + s_index][0], rng=rng, name="b_f_" + s_index, sample="one"
            )

            p_dict["W_xc_" + s_index] = u.init_weight(
                f_dict["filter_shape_" + s_index], rng=rng, name="W_xc_" + s_index, sample="glorot"
            )
            p_dict["W_hc_" + s_index] = u.init_weight(
                f_dict["s_filter_shape_" + s_index], rng=rng, name="W_hc_" + s_index, sample="glorot"
            )
            p_dict["b_c_" + s_index] = u.init_bias(f_dict["filter_shape_" + s_index][0], rng=rng, name="b_c_" + s_index)

            p_dict["W_xo_" + s_index] = u.init_weight(
                f_dict["filter_shape_" + s_index], rng=rng, name="W_xo_" + s_index, sample="glorot"
            )
            p_dict["W_ho_" + s_index] = u.init_weight(
                f_dict["s_filter_shape_" + s_index], rng=rng, name="W_ho_" + s_index, sample="glorot"
            )
            p_dict["W_co_" + s_index] = u.init_weight(
                f_dict["s_filter_shape_" + s_index], rng=rng, name="W_co_" + s_index, sample="glorot"
            )
            p_dict["b_o_" + s_index] = u.init_bias(f_dict["filter_shape_" + s_index][0], rng=rng, name="b_o_" + s_index)

        def step_lstm(x_t, mask, h_tm1_1, c_tm1_1, h_tm1_2, c_tm1_2, h_tm1_3, c_tm1_3):
            p1 = PoolLayer(x_t, pool_size=pool_size, input_shape=s_dict["i_shape_0"])
            layer_1 = CLSTMLayer(rng, 0, p_dict, f_dict, s_dict, p1.output, h_tm1_1, c_tm1_1, border_mode, subsample)
            [h_t_1, c_t_1, y_t_1] = layer_1.output
            p2 = PoolLayer(y_t_1, pool_size=pool_size, input_shape=layer_1.yt_shape)
            dl1 = DropoutLayer(rng, input=p2.output, prob=p_1, is_train=is_train, mask=mask)

            layer_2 = CLSTMLayer(rng, 1, p_dict, f_dict, s_dict, dl1.output, h_tm1_2, c_tm1_2, border_mode, subsample)
            [h_t_2, c_t_2, y_t_2] = layer_2.output
            p2 = PoolLayer(y_t_2, pool_size=pool_size, input_shape=layer_1.yt_shape)

            layer_3 = CLSTMLayer(rng, 2, p_dict, f_dict, s_dict, p2.output, h_tm1_3, c_tm1_3, border_mode, subsample)
            [h_t_3, c_t_3, y_t_3] = layer_3.output
            p3 = PoolLayer(y_t_3, pool_size=pool_size, input_shape=layer_3.yt_shape)

            return [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, p3.output]

        # (1, 0, 2) -> AxBxC to BxAxC
        # (batch_size,sequence_length, n_in) >> (sequence_length, batch_size ,n_in)
        # T.dot(x_t, self.W_xi_1)x_t=(sequence_length, batch_size ,n_in), W_xi_1=  [self.n_in, self.n_lstm]
        # 5.293.568
        # 185.983.658
        # 19.100.202
        # 8.090.154

        s_shape = list(s_dict["i_shape_1"])  # after pooling filter sha[e
        s_shape.insert(0, sequence_length)
        mask_shape_1 = tuple(s_shape)
        mask = rng.binomial(size=mask_shape_1, p=p_1, dtype=input.dtype)

        [h_t_1, c_t_1, h_t_2, c_t_2, h_t_3, c_t_3, y_vals], _ = theano.scan(
            fn=step_lstm, outputs_info=outputs_info, sequences=[input, mask]
        )

        s_dict["final_shape"] = (batch_size, sequence_length, pre_nfilter, rows, cols)
        hidden_input = y_vals.dimshuffle(1, 0, 2, 3, 4)
        n_in = reduce(lambda x, y: x * y, s_dict["final_shape"][2:])
        x_flat = hidden_input.flatten(3)
        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)
        n_in = 1024
        lreg = LogisticRegression(rng, h1.output, n_in, 42)

        self.output = lreg.y_pred
        self.params = p_dict.values()
        self.params.append(h1.params[0])
        self.params.append(h1.params[1])
        self.params.append(lreg.params[0])
        self.params.append(lreg.params[1])
        #
        # tmp = theano.tensor.switch(theano.tensor.isnan(Y),0,Y)
        cost = get_err_fn(self, cost_function, Y)
        L2_reg = 0.0001
        L2_sqr = theano.shared(0.0)
        for param in self.params:
            L2_sqr += T.sum(param ** 2)

        cost += L2_reg * L2_sqr
        _optimizer = optimizer(cost, self.params, lr=lr)
        # self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,allow_input_downcast=True)
        #
        # _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(
            inputs=[X, Y, is_train], outputs=cost, updates=_optimizer.getUpdates(), allow_input_downcast=True
        )
        self.predictions = theano.function(inputs=[X, is_train], outputs=self.output, allow_input_downcast=True)
        self.n_param = count_params(self.params)
Esempio n. 31
0
    def __init__(self,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.05,
                 batch_size=64,
                 output_activation=theano.tensor.nnet.relu,
                 cost_function='mse',
                 optimizer=RMSprop):
        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        #Forward weights
        self.W_xi_f = init_weight((self.n_in, self.n_lstm), 'W_xif', 'glorot')
        self.W_hi_f = init_weight((self.n_lstm, self.n_lstm), 'W_hif', 'ortho')
        self.b_i_f = init_bias(self.n_lstm, sample='zero')
        self.W_xf_f = init_weight((self.n_in, self.n_lstm), 'W_xf', 'glorot')
        self.W_hf_f = init_weight((self.n_lstm, self.n_lstm), 'W_hf', 'ortho')
        self.b_f_f = init_bias(self.n_lstm, sample='one')
        self.W_xc_f = init_weight((self.n_in, self.n_lstm), 'W_xcf', 'glorot')
        self.W_hc_f = init_weight((self.n_lstm, self.n_lstm), 'W_hcf', 'ortho')
        self.b_c_f = init_bias(self.n_lstm, sample='zero')
        self.W_xo_f = init_weight((self.n_in, self.n_lstm), 'W_xof', 'glorot')
        self.W_ho_f = init_weight((self.n_lstm, self.n_lstm), 'W_hof', 'ortho')
        self.b_o_f = init_bias(self.n_lstm, sample='zero')
        self.W_hy_f = init_weight((self.n_lstm, self.n_out), 'W_hyf', 'glorot')

        #Backward weights
        self.W_xi_b = init_weight((self.n_in, self.n_lstm), 'W_xib', 'glorot')
        self.W_hi_b = init_weight((self.n_lstm, self.n_lstm), 'W_hib', 'ortho')
        self.b_i_b = init_bias(self.n_lstm, sample='zero')
        self.W_xf_b = init_weight((self.n_in, self.n_lstm), 'W_xfb', 'glorot')
        self.W_hf_b = init_weight((self.n_lstm, self.n_lstm), 'W_hfb', 'ortho')
        self.b_f_b = init_bias(self.n_lstm, sample='one')
        self.W_xc_b = init_weight((self.n_in, self.n_lstm), 'W_xcb', 'glorot')
        self.W_hc_b = init_weight((self.n_lstm, self.n_lstm), 'W_hcb', 'ortho')
        self.b_c_b = init_bias(self.n_lstm, sample='zero')
        self.W_xo_b = init_weight((self.n_in, self.n_lstm), 'W_xob', 'glorot')
        self.W_ho_b = init_weight((self.n_lstm, self.n_lstm), 'W_hob', 'ortho')
        self.b_o_b = init_bias(self.n_lstm, sample='zero')
        self.W_hy_b = init_weight((self.n_lstm, self.n_out), 'W_hyb', 'glorot')

        self.b_y = init_bias(self.n_out, sample='zero')

        self.params = [
            self.W_xi_f, self.W_hi_f, self.b_i_f, self.W_xf_f, self.W_hf_f,
            self.b_f_f, self.W_xc_f, self.W_hc_f, self.b_c_f, self.W_xo_f,
            self.W_ho_f, self.b_o_f, self.W_hy_f, self.W_xi_b, self.W_hi_b,
            self.b_i_b, self.W_xf_b, self.W_hf_b, self.b_f_b, self.W_xc_b,
            self.W_hc_b, self.b_c_b, self.W_xo_b, self.W_ho_b, self.b_o_b,
            self.W_hy_b, self.b_y
        ]

        def f_step_lstm(x_t, h_tm1, c_tm1):
            i_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xi_f) + T.dot(h_tm1, self.W_hi_f) +
                self.b_i_f)
            f_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xf_f) + T.dot(h_tm1, self.W_hf_f) +
                self.b_f_f)
            c_t = f_t * c_tm1 + i_t * T.tanh(
                T.dot(x_t, self.W_xc_f) + T.dot(h_tm1, self.W_hc_f) +
                self.b_c_f)
            o_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xo_f) + T.dot(h_tm1, self.W_ho_f) +
                self.b_o_f)
            h_t = o_t * T.tanh(c_t)
            return [h_t, c_t]

        def b_step_lstm(x_t, h_tm1, c_tm1):
            i_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xi_b) + T.dot(h_tm1, self.W_hi_b) +
                self.b_i_b)
            f_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xf_b) + T.dot(h_tm1, self.W_hf_b) +
                self.b_f_b)
            c_t = f_t * c_tm1 + i_t * T.tanh(
                T.dot(x_t, self.W_xc_b) + T.dot(h_tm1, self.W_hc_b) +
                self.b_c_b)
            o_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xo_b) + T.dot(h_tm1, self.W_ho_b) +
                self.b_o_b)
            h_t = o_t * T.tanh(c_t)
            return [h_t, c_t]

        X_f = T.tensor3()  # batch of sequence of vector
        X_b = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3(
        )  # batch of sequence of vector (should be 0 when X is not null)
        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state
        c0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        [h_f, c_vals], _ = theano.scan(fn=f_step_lstm,
                                       sequences=X_f.dimshuffle(1, 0, 2),
                                       outputs_info=[h0, c0])

        [h_b, c_vals], _ = theano.scan(fn=b_step_lstm,
                                       sequences=X_b.dimshuffle(1, 0, 2),
                                       outputs_info=[h0, c0])

        h_b = h_b[:, ::-1]
        y_vals = T.tanh(
            T.dot(h_f, self.W_hy_f) + T.dot(h_b, self.W_hy_b) + self.b_y)

        self.output = y_vals.dimshuffle(1, 0, 2)

        cost = get_err_fn(self, cost_function, Y)

        _optimizer = optimizer(cost, self.params, lr=lr)

        self.train = theano.function(inputs=[X_f, X_b, Y],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X_f, X_b],
                                           outputs=y_vals.dimshuffle(1, 0, 2),
                                           allow_input_downcast=True)
        self.n_param = n_lstm * n_lstm * 4 + n_in * n_lstm * 4 + n_lstm * n_out + n_lstm * 3
Esempio n. 32
0
   def __init__(self, n_in, n_lstm, n_out, lr=0.05, batch_size=64, output_activation=theano.tensor.nnet.relu,cost_function='mse',optimizer = RMSprop):
       self.n_in = n_in
       self.n_lstm = n_lstm
       self.n_out = n_out
       #Forward weights
       self.W_xi_f = init_weight((self.n_in, self.n_lstm), 'W_xif', 'glorot')
       self.W_hi_f = init_weight((self.n_lstm, self.n_lstm), 'W_hif', 'ortho')
       self.b_i_f  = init_bias(self.n_lstm, sample='zero')
       self.W_xf_f = init_weight((self.n_in, self.n_lstm), 'W_xf', 'glorot')
       self.W_hf_f = init_weight((self.n_lstm, self.n_lstm), 'W_hf', 'ortho')
       self.b_f_f = init_bias(self.n_lstm, sample='one')
       self.W_xc_f = init_weight((self.n_in, self.n_lstm), 'W_xcf', 'glorot')
       self.W_hc_f = init_weight((self.n_lstm, self.n_lstm), 'W_hcf', 'ortho')
       self.b_c_f = init_bias(self.n_lstm, sample='zero')
       self.W_xo_f = init_weight((self.n_in, self.n_lstm), 'W_xof', 'glorot')
       self.W_ho_f = init_weight((self.n_lstm, self.n_lstm), 'W_hof', 'ortho')
       self.b_o_f = init_bias(self.n_lstm, sample='zero')
       self.W_hy_f = init_weight((self.n_lstm, self.n_out), 'W_hyf', 'glorot')

       #Backward weights
       self.W_xi_b = init_weight((self.n_in, self.n_lstm), 'W_xib', 'glorot')
       self.W_hi_b = init_weight((self.n_lstm, self.n_lstm), 'W_hib', 'ortho')
       self.b_i_b  = init_bias(self.n_lstm, sample='zero')
       self.W_xf_b = init_weight((self.n_in, self.n_lstm), 'W_xfb', 'glorot')
       self.W_hf_b = init_weight((self.n_lstm, self.n_lstm), 'W_hfb', 'ortho')
       self.b_f_b = init_bias(self.n_lstm, sample='one')
       self.W_xc_b = init_weight((self.n_in, self.n_lstm), 'W_xcb', 'glorot')
       self.W_hc_b = init_weight((self.n_lstm, self.n_lstm), 'W_hcb', 'ortho')
       self.b_c_b = init_bias(self.n_lstm, sample='zero')
       self.W_xo_b = init_weight((self.n_in, self.n_lstm), 'W_xob', 'glorot')
       self.W_ho_b = init_weight((self.n_lstm, self.n_lstm), 'W_hob', 'ortho')
       self.b_o_b = init_bias(self.n_lstm, sample='zero')
       self.W_hy_b = init_weight((self.n_lstm, self.n_out), 'W_hyb', 'glorot')

       self.b_y = init_bias(self.n_out, sample='zero')

       self.params = [self.W_xi_f, self.W_hi_f, self.b_i_f, self.W_xf_f,
                      self.W_hf_f, self.b_f_f,  self.W_xc_f, self.W_hc_f,
                      self.b_c_f, self.W_xo_f, self.W_ho_f, self.b_o_f,
                      self.W_hy_f,
                      self.W_xi_b, self.W_hi_b, self.b_i_b, self.W_xf_b,
                      self.W_hf_b, self.b_f_b,  self.W_xc_b, self.W_hc_b,
                      self.b_c_b, self.W_xo_b, self.W_ho_b, self.b_o_b,
                      self.W_hy_b, self.b_y]

       def f_step_lstm(x_t, h_tm1, c_tm1):
           i_t = T.nnet.sigmoid(T.dot(x_t, self.W_xi_f) + T.dot(h_tm1, self.W_hi_f) + self.b_i_f)
           f_t = T.nnet.sigmoid(T.dot(x_t, self.W_xf_f) + T.dot(h_tm1, self.W_hf_f) + self.b_f_f)
           c_t = f_t * c_tm1 + i_t * T.tanh(T.dot(x_t, self.W_xc_f) + T.dot(h_tm1, self.W_hc_f) + self.b_c_f)
           o_t = T.nnet.sigmoid(T.dot(x_t, self.W_xo_f) + T.dot(h_tm1, self.W_ho_f) + self.b_o_f)
           h_t = o_t * T.tanh(c_t)
           return [h_t, c_t]

       def b_step_lstm(x_t, h_tm1, c_tm1):
           i_t = T.nnet.sigmoid(T.dot(x_t, self.W_xi_b) + T.dot(h_tm1, self.W_hi_b) + self.b_i_b)
           f_t = T.nnet.sigmoid(T.dot(x_t, self.W_xf_b) + T.dot(h_tm1, self.W_hf_b) + self.b_f_b)
           c_t = f_t * c_tm1 + i_t * T.tanh(T.dot(x_t, self.W_xc_b) + T.dot(h_tm1, self.W_hc_b) + self.b_c_b)
           o_t = T.nnet.sigmoid(T.dot(x_t, self.W_xo_b) + T.dot(h_tm1, self.W_ho_b) + self.b_o_b)
           h_t = o_t * T.tanh(c_t)
           return [h_t, c_t]

       X_f = T.tensor3() # batch of sequence of vector
       X_b = T.tensor3() # batch of sequence of vector
       Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null)
       h0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state
       c0 = shared(np.zeros(shape=(batch_size,self.n_lstm), dtype=dtype)) # initial hidden state

       [h_f, c_vals], _ = theano.scan(fn=f_step_lstm,
                                         sequences=X_f.dimshuffle(1,0,2),
                                         outputs_info=[h0, c0])

       [h_b, c_vals], _ = theano.scan(fn=b_step_lstm,
                                         sequences=X_b.dimshuffle(1,0,2),
                                         outputs_info=[h0, c0])

       h_b=h_b[:,::-1]
       y_vals=T.tanh(T.dot(h_f, self.W_hy_f)+T.dot(h_b, self.W_hy_b)+self.b_y)

       self.output = y_vals.dimshuffle(1,0,2)

       cost=get_err_fn(self,cost_function,Y)

       _optimizer = optimizer(
            cost,
            self.params,
            lr=lr
        )


       self.train = theano.function(inputs=[X_f,X_b, Y],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
       self.predictions = theano.function(inputs = [X_f,X_b], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True)
       self.n_param=n_lstm*n_lstm*4+n_in*n_lstm*4+n_lstm*n_out+n_lstm*3
Esempio n. 33
0
    def __init__(self,
                 n_in,
                 n_lstm,
                 n_out,
                 lr=0.05,
                 batch_size=64,
                 single_output=True,
                 output_activation=theano.tensor.nnet.relu,
                 cost_function='mse',
                 optimizer=RMSprop):

        self.n_in = n_in
        self.n_lstm = n_lstm
        self.n_out = n_out
        self.n_fc1 = 256
        self.n_fc2 = 256
        self.n_fc3 = 256

        self.W_fc1 = init_weight((self.n_fc1, self.n_fc2), 'W_fc1', 'glorot')
        self.b_fc1 = init_bias(self.n_fc2, sample='zero')

        self.W_fc2 = init_weight((self.n_fc2, self.n_fc3), 'W_fc2', 'glorot')
        self.b_fc2 = init_bias(self.n_fc3, sample='zero')

        self.W_fc3 = init_weight((self.n_fc3, self.n_out), 'w_fc3', 'glorot')
        self.b_fc3 = init_bias(self.n_out, sample='zero')

        self.W_xr = init_weight((self.n_in, self.n_lstm), 'W_xr', 'glorot')
        self.W_hr = init_weight((self.n_lstm, self.n_lstm), 'W_hr', 'ortho')
        self.b_r = init_bias(self.n_lstm, sample='zero')
        self.W_xz = init_weight((self.n_in, self.n_lstm), 'W_xz', 'glorot')
        self.W_hz = init_weight((self.n_lstm, self.n_lstm), 'W_hz', 'ortho')
        self.b_z = init_bias(self.n_lstm, sample='zero')
        self.W_xh = init_weight((self.n_in, self.n_lstm), 'W_xh', 'glorot')
        self.W_hh = init_weight((self.n_lstm, self.n_lstm), 'W_hh', 'ortho')
        self.b_h = init_bias(self.n_lstm, sample='zero')
        self.W_hy = init_weight((self.n_lstm, self.n_fc1), 'W_hy', 'glorot')
        self.b_y = init_bias(self.n_fc1, sample='zero')

        self.params = [
            self.W_xr, self.W_hr, self.b_r, self.W_xz, self.W_hz, self.b_z,
            self.W_xh, self.W_hh, self.b_h, self.W_hy, self.b_y, self.W_fc1,
            self.b_fc1, self.W_fc2, self.b_fc2, self.W_fc3, self.b_fc3
        ]

        def step_lstm(x_t, h_tm1):
            r_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr) + self.b_r)
            z_t = T.nnet.sigmoid(
                T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz) + self.b_z)
            h_t = T.tanh(
                T.dot(x_t, self.W_xh) + T.dot((r_t * h_tm1), self.W_hh) +
                self.b_h)
            hh_t = z_t * h_t + (1 - z_t) * h_tm1
            y_t = T.tanh(T.dot(hh_t, self.W_hy) + self.b_y)
            return [hh_t, y_t]

        X = T.tensor3()  # batch of sequence of vector
        Y = T.tensor3()  # batch of sequence of vector
        #Y_NaN= T.tensor3() # batch of sequence of vector
        h0 = shared(np.zeros(shape=(batch_size, self.n_lstm),
                             dtype=dtype))  # initial hidden state

        [h_vals, y_vals], _ = theano.scan(fn=step_lstm,
                                          sequences=X.dimshuffle(1, 0, 2),
                                          outputs_info=[h0, None])

        #Hidden layer
        fc1_out = T.tanh(T.dot(y_vals, self.W_fc1) + self.b_fc1)
        fc2_out = T.tanh(T.dot(fc1_out, self.W_fc2) + self.b_fc2)
        fc3_out = T.tanh(T.dot(fc2_out, self.W_fc3) + self.b_fc3)

        self.output = fc3_out.dimshuffle(1, 0, 2)

        cost = get_err_fn(self, cost_function, Y)
        _optimizer = optimizer(cost, self.params, lr=lr)

        self.train = theano.function(inputs=[X, Y],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = n_lstm * n_lstm * 4 + n_in * n_lstm * 4 + n_lstm * n_out + n_lstm * 3