Python gradientUpdatesの例、GradUpdates.gradientUpdates Pythonの例

コード例 #1

0

ファイルを表示

    def __theano_build__(self):
        W_xh, W_hy, W_hh, b_h, b_y = self.params
        x = T.ivector('x')
        y = T.ivector('y')

        def forward_prop_step(x_t, h_t_prev, W_xh, W_hy, W_hh, b_h, b_y):
            h_t = T.tanh(W_xh[:, x_t] + T.dot(W_hh, h_t_prev) + b_h)
            o_t = T.nnet.softmax(T.dot(W_hy, h_t) + b_y)
            return [o_t[0], h_t]

        h_0 = T.zeros(self.n_hidden)
        [o, h], _ = theano.scan(forward_prop_step,
                                sequences=x,
                                outputs_info=[None, dict(initial=h_0)],
                                non_sequences=[W_xh, W_hy, W_hh, b_h, b_y],
                                truncate_gradient=self.bptt_truncate,
                                strict=True)

        prediction = T.argmax(o, axis=1)

        learning_rate = T.scalar('learning_rate')
        self.cost = T.sum(T.nnet.categorical_crossentropy(o, y))
        self.gparams, self.updates = gradientUpdates(self.cost, self.params,
                                                     learning_rate)

        # Assign functions
        self.forward_propagation = theano.function([x], o)
        self.predict = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], self.cost)
        self.bptt = theano.function([x, y], self.gparams)

        # SGD
        self.sgd_step = theano.function([x, y, learning_rate], [],
                                        updates=self.updates)

コード例 #2

0

ファイルを表示

    def __theano_build__(self):
        E, W_x, W_h, W_y, b, b_y = self.params
        x = T.ivector('x')
        y = T.ivector('y')
        
        def forward_prop_step(x_t, h1_t_prev, c1_t_prev, h2_t_prev, c2_t_prev, E, W_x, W_h, W_y, b, b_y):
            
            # Layer 1
            x_layer = E[:, x_t]
            i1_t = T.nnet.sigmoid(T.dot(W_x[0], x_layer) + T.dot(W_h[0], h1_t_prev) + b[0])
            f1_t = T.nnet.sigmoid(T.dot(W_x[1], x_layer) + T.dot(W_h[1], h1_t_prev) + b[1])
            o1_t = T.nnet.sigmoid(T.dot(W_x[2], x_layer) + T.dot(W_h[2], h1_t_prev) + b[2])
            c1_t_cap = T.tanh(T.dot(W_x[3], x_layer) + T.dot(W_h[3], h1_t_prev) + b[3])
            c1_t = i1_t * c1_t_cap + f1_t * c1_t_prev
            h1_t = o1_t * T.tanh(c1_t)
            
            
            # Layer 2
            x_layer = h1_t
            i2_t = T.nnet.sigmoid(T.dot(W_x[4], x_layer) + T.dot(W_h[4], h2_t_prev) + b[4])
            f2_t = T.nnet.sigmoid(T.dot(W_x[5], x_layer) + T.dot(W_h[5], h2_t_prev) + b[5])
            o2_t = T.nnet.sigmoid(T.dot(W_x[6], x_layer) + T.dot(W_h[6], h2_t_prev) + b[6])
            c2_t_cap = T.tanh(T.dot(W_x[7], x_layer) + T.dot(W_h[7], h2_t_prev) + b[7])
            c2_t = i2_t * c2_t_cap + f2_t * c2_t_prev
            h2_t = o2_t * T.tanh(c2_t)
            
            y_t = T.nnet.softmax(T.dot(W_y, h1_t) + b_y)
            return [y_t[0], h1_t, c1_t, h2_t, c2_t]
            
        h1_0 = T.zeros(self.n_hidden)
        c1_0 = T.zeros(self.n_hidden)
        h2_0 = T.zeros(self.n_hidden)
        c2_0 = T.zeros(self.n_hidden)
        [o,h1,c1,h2,c2], _ = theano.scan(
            forward_prop_step,
            sequences=x,
            outputs_info=[None, dict(initial=h1_0), dict(initial=c1_0), dict(initial=h2_0), dict(initial=c2_0)],
            non_sequences=[E, W_x, W_h, W_y, b, b_y],
            truncate_gradient=self.bptt_truncate,
            strict=True)
        
        prediction = T.argmax(o, axis=1)
        
        learning_rate = T.scalar('learning_rate')
        self.cost = T.sum(T.nnet.categorical_crossentropy(o, y))
        self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate)
        

        
        # Assign functions
        self.forward_propagation = theano.function([x], o)
        self.predict = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], self.cost)
        self.bptt = theano.function([x, y], self.gparams)
        
        # SGD
        self.sgd_step = theano.function([x,y,learning_rate], [], 
                      updates=self.updates)

コード例 #3

0

ファイルを表示

    def __theano_build__(self):
        f_W_x, b_W_x, f_W_h, b_W_h, f_W_y, b_W_y, f_b, b_b, b_y = self.params
        f_x = T.ivector('f_x')
        b_x = f_x[::-1]
        y = T.ivector('y')
        def forward_prop_step(f_x_t, b_x_t, f_h_t_prev, b_h_t_prev, f_c_t_prev, b_c_t_prev, f_W_x, b_W_x, f_W_h, b_W_h, f_W_y, b_W_y, f_b, b_b, b_y):
            
            # Forward LSTM
            f_i_t = T.nnet.sigmoid(f_W_x[0, :, f_x_t] + T.dot(f_W_h[0], f_h_t_prev) + f_b[0])
            f_f_t = T.nnet.sigmoid(f_W_x[1, :, f_x_t] + T.dot(f_W_h[1], f_h_t_prev) + f_b[1])
            f_o_t = T.nnet.sigmoid(f_W_x[2, :, f_x_t] + T.dot(f_W_h[2], f_h_t_prev) + f_b[2])
            f_c_t_cap = T.tanh(f_W_x[3, :, f_x_t] + T.dot(f_W_h[3], f_h_t_prev) + f_b[3])
            f_c_t = f_i_t * f_c_t_cap + f_f_t * f_c_t_prev
            f_h_t = f_o_t * T.tanh(f_c_t)
            
            # Backward LSTM
            b_i_t = T.nnet.sigmoid(b_W_x[0, :, b_x_t] + T.dot(b_W_h[0], b_h_t_prev) + b_b[0])
            b_f_t = T.nnet.sigmoid(b_W_x[1, :, b_x_t] + T.dot(b_W_h[1], b_h_t_prev) + b_b[1])
            b_o_t = T.nnet.sigmoid(b_W_x[2, :, b_x_t] + T.dot(b_W_h[2], b_h_t_prev) + b_b[2])
            b_c_t_cap = T.tanh(b_W_x[3, :, b_x_t] + T.dot(b_W_h[3], b_h_t_prev) + b_b[3])
            b_c_t = b_i_t * b_c_t_cap + b_f_t * b_c_t_prev
            b_h_t = b_o_t * T.tanh(b_c_t)
            
            y_t = T.nnet.softmax(T.dot(f_W_y, f_h_t) + T.dot(b_W_y, b_h_t) + b_y)
            return [y_t[0], f_h_t, f_c_t, b_h_t, b_c_t]
            
        f_h_0 = T.zeros(self.n_hidden)
        f_c_0 = T.zeros(self.n_hidden)
        b_h_0 = T.zeros(self.n_hidden)
        b_c_0 = T.zeros(self.n_hidden)
        [o,fh,fc, bh, bc], _ = theano.scan(
            forward_prop_step,
            sequences=[f_x, b_x],
            outputs_info=[None, dict(initial=f_h_0), dict(initial=f_c_0), dict(initial=b_h_0), dict(initial=b_c_0)],
            non_sequences=[f_W_x, b_W_x, f_W_h, b_W_h, f_W_y, b_W_y, f_b, b_b, b_y],
            truncate_gradient=self.bptt_truncate,
            strict=True)
        
        prediction = T.argmax(o, axis=1)
        
        learning_rate = T.scalar('learning_rate')
        self.cost = T.sum(T.nnet.categorical_crossentropy(o, y))
        self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate)
        

        
        # Assign functions
        self.forward_propagation = theano.function([x], o)
        self.predict = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], self.cost)
        self.bptt = theano.function([x, y], self.gparams)
        
        # SGD
        self.sgd_step = theano.function([x,y,learning_rate], [], 
                      updates=self.updates)

コード例 #4

0

ファイルを表示

    def __theano_build__(self):
        W_xz, W_xr, W_x_hcap, W_sz, W_sr, W_s_hcap, W_o, b_z, b_r, b_hcap, b_o = self.params
        x = T.ivector('x')
        y = T.ivector('y')

        def forward_prop_step(x_t, s_t_prev, W_xz, W_xr, W_x_hcap, W_sz, W_sr,
                              W_s_hcap, W_o, b_z, b_r, b_hcap, b_o):
            z_t = T.nnet.sigmoid(W_xz[:, x_t] + T.dot(W_sz, s_t_prev) + b_z)
            r_t = T.nnet.sigmoid(W_xr[:, x_t] + T.dot(W_sr, s_t_prev) + b_r)
            h_t_cap = T.tanh(W_x_hcap[:, x_t] +
                             T.dot(W_s_hcap, (r_t * s_t_prev)) + b_hcap)
            s_t = (T.ones_like(z_t) - z_t) + z_t * h_t_cap
            o_t = T.nnet.softmax(T.dot(W_o, s_t) + b_o)
            return [o_t[0], s_t]

        h_0 = T.zeros(self.n_hidden)
        [o, h], _ = theano.scan(fn=forward_prop_step,
                                sequences=x,
                                outputs_info=[None, dict(initial=h_0)],
                                non_sequences=[
                                    W_xz, W_xr, W_x_hcap, W_sz, W_sr, W_s_hcap,
                                    W_o, b_z, b_r, b_hcap, b_o
                                ],
                                truncate_gradient=self.bptt_truncate,
                                strict=True)

        prediction = T.argmax(o, axis=1)

        learning_rate = T.scalar('learning_rate')
        self.cost = T.sum(T.nnet.categorical_crossentropy(o, y))
        self.gparams, self.updates = gradientUpdates(self.cost, self.params,
                                                     learning_rate)

        # Assign functions
        self.forward_propagation = theano.function([x], o)
        self.predict = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], self.cost)
        self.bptt = theano.function([x, y], self.gparams)

        # SGD
        self.sgd_step = theano.function([x, y, learning_rate], [],
                                        updates=self.updates)

コード例 #5

0

ファイルを表示

    def __theano_build__(self):
        W_x, W_h, W_y, b, b_y = self.params
        x = T.ivector('x')
        y = T.ivector('y')
        def forward_prop_step(x_t, h_t_prev, c_t_prev, W_x, W_h, W_y, b, b_y):
            i_t = T.nnet.sigmoid(W_x[0, :, x_t] + T.dot(W_h[0], h_t_prev) + b[0])
            f_t = T.nnet.sigmoid(W_x[1, :, x_t] + T.dot(W_h[1], h_t_prev) + b[1])
            o_t = T.nnet.sigmoid(W_x[2, :, x_t] + T.dot(W_h[2], h_t_prev) + b[2])
            c_t_cap = T.tanh(W_x[3, :, x_t] + T.dot(W_h[3], h_t_prev) + b[3])
            c_t = i_t * c_t_cap + f_t * c_t_prev
            h_t = o_t * T.tanh(c_t)
            y_t = T.nnet.softmax(T.dot(W_y, h_t) + b_y)
            return [y_t[0], h_t, c_t]
        h_0 = T.zeros(self.n_hidden)
        c_0 = T.zeros(self.n_hidden)
        [o,h,c], _ = theano.scan(
            forward_prop_step,
            sequences=x,
            outputs_info=[None, dict(initial=h_0), dict(initial=c_0)],
            non_sequences=[W_x, W_h, W_y, b, b_y],
            truncate_gradient=self.bptt_truncate,
            strict=True)
        
        prediction = T.argmax(o, axis=1)
        
        learning_rate = T.scalar('learning_rate')
        self.cost = T.sum(T.nnet.categorical_crossentropy(o, y))
        self.gparams, self.updates = gradientUpdates(self.cost, self.params, learning_rate)
        

        
        # Assign functions
        self.forward_propagation = theano.function([x], o)
        self.predict = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], self.cost)
        self.bptt = theano.function([x, y], self.gparams)
        
        # SGD
        self.sgd_step = theano.function([x,y,learning_rate], [], 
                      updates=self.updates)