Beispiel #1
0
    def get_updates(self, params, cost):
        grads = self.get_gradients(cost, params)
        accumulators = [shared_zeros(p.get_value().shape) for p in params]
        delta_accumulators = [shared_zeros(p.get_value().shape) for p in params]
        updates = []

        for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
            new_a = self.rho * a + (1 - self.rho) * g ** 2 # update accumulator
            updates.append((a, new_a))

            # use the new accumulator and the *old* delta_accumulator
            update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a + self.epsilon)

            new_p = p - self.lr * update
            updates.append((p, new_p))

            # update delta_accumulator
            new_d_a = self.rho * d_a + (1 - self.rho) * update ** 2
            updates.append((d_a, new_d_a))
        return updates
Beispiel #2
0
    def __init__(self, input_shape, epsilon=1e-6, weights=None):
        self.init = initializations.get("uniform")
        self.input_shape = input_shape
        self.epsilon = epsilon

        self.gamma = self.init((self.input_shape))
        self.beta = shared_zeros(self.input_shape)

        self.params = [self.gamma, self.beta]
        if weights is not None:
            self.set_weights(weights)
Beispiel #3
0
    def __init__(self, input_dim, output_dim=128, 
        init='uniform', inner_init='orthogonal', 
        activation='tanh', inner_activation='hard_sigmoid',
        truncate_gradient=-1, weights=None, return_sequences=False):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.input = T.matrix()

        self.W_i = self.init((self.input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = self.init((self.input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_zeros((self.output_dim))

        self.W_c = self.init((self.input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((self.input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,
        ]

        if weights is not None:
            self.set_weights(weights)
Beispiel #4
0
    def get_updates(self, params, cost):
        grads = self.get_gradients(cost, params)
        accumulators = [shared_zeros(p.get_value().shape) for p in params]
        updates = []

        for p, g, a in zip(params, grads, accumulators):
            new_a = a + g**2  # update accumulator
            updates.append((a, new_a))

            new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon)
            updates.append((p, new_p))
        return updates
Beispiel #5
0
    def get_updates(self, params, cost):
        grads = self.get_gradients(cost, params)
        accumulators = [shared_zeros(p.get_value().shape) for p in params]
        updates = []

        for p, g, a in zip(params, grads, accumulators):
            new_a = a + g ** 2 # update accumulator
            updates.append((a, new_a))

            new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon)
            updates.append((p, new_p))
        return updates
Beispiel #6
0
    def __init__(self, input_dim, output_dim, init='uniform', activation='linear', weights=None):
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.input = T.matrix()
        self.W = self.init((self.input_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim))

        self.params = [self.W, self.b]

        if weights is not None:
            self.set_weights(weights)
 def get_updates(self, loss, params,t=10):
     grads = self.get_gradients(loss, params)
     grad_c = []
     for g in grads:
         grad_c.append(clip_l2(g,t))
     accumulators = [shared_zeros(p.get_value().shape) for p in params]
     delta_accumulators = [shared_zeros(p.get_value().shape) for p in params]
     self.updates = []
     
     for p, g, a, d_a in zip(params, grad_c, accumulators, delta_accumulators):
         new_a = self.rho * a + (1-self.rho) * g**2   #update delta
         self.updates.append((a, new_a))
         
         # use the new accumulator and the *old* delta_accumulator
         delta = (-g) * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a + self.epsilon)
         new_p = p + self.lr * delta
         self.updates.append((p, new_p))
         
         # update delta_accumulator
         new_d_a = self.rho * d_a + (1-self.rho) * delta**2
         self.updates.append((d_a, new_d_a))
         
     return self.updates
Beispiel #8
0
    def init_params(self):

        W = self.init((self.in_dim, self.h_dim))
        self.W = np.concatenate([
            ortho_weight((self.in_dim, self.h_dim)),
            ortho_weight((self.in_dim, self.h_dim)),
            ortho_weight((self.in_dim, self.h_dim)),
            ortho_weight((self.in_dim, self.h_dim))
        ],
                                axis=1)
        self.W = sharedX(self.W)
        self.U = np.concatenate([
            ortho_weight((self.h_dim, self.h_dim)),
            ortho_weight((self.h_dim, self.h_dim)),
            ortho_weight((self.h_dim, self.h_dim)),
            ortho_weight((self.h_dim, self.h_dim))
        ],
                                axis=1)
        self.U = sharedX(self.U)

        self.b = shared_zeros((4 * self.h_dim, ))
        # attention params
        # e^i = Ua(tanh(Wctx.dot(context) + Uctx.dot(h_tm1) + bctx))
        self.Wpc = self.init((self.ctx_dim, self.pctx_dim))
        self.Uph = self.init((self.h_dim, self.pctx_dim))
        self.bc = shared_zeros((self.pctx_dim, ))
        self.Ua = self.init((self.pctx_dim, 1))
        self.ba = shared_zeros((1, ))

        self.Wc = self.init((self.ctx_dim, self.h_dim * 4))
        if self.selector:
            self.Wsel = self.init(
                (self.h_dim, 1)
            )  # if Wsel=h_dim*h_dim  what will happen, is it mean that it could select different feature for different sample?
            self.bsel = shared_zeros((1, ))
        self.pack_params()
Beispiel #9
0
    def get_updates(self, params, cost):
        grads = self.get_gradients(cost, params)
        accumulators = [shared_zeros(p.get_value().shape) for p in params]
        delta_accumulators = [
            shared_zeros(p.get_value().shape) for p in params
        ]
        updates = []

        for p, g, a, d_a in zip(params, grads, accumulators,
                                delta_accumulators):
            new_a = self.rho * a + (1 - self.rho) * g**2  # update accumulator
            updates.append((a, new_a))

            # use the new accumulator and the *old* delta_accumulator
            update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a +
                                                             self.epsilon)

            new_p = p - self.lr * update
            updates.append((p, new_p))

            # update delta_accumulator
            new_d_a = self.rho * d_a + (1 - self.rho) * update**2
            updates.append((d_a, new_d_a))
        return updates
Beispiel #10
0
    def get_updates(self, params, cost):
        grads = self.get_gradients(cost, params)
        lr = self.lr - self.decay * self.iterations
        updates = [(self.iterations, self.iterations + 1.)]

        for p, g in zip(params, grads):
            m = shared_zeros(p.get_value().shape)  # momentum
            v = self.momentum * m - lr * g  # velocity
            updates.append((m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v
            updates.append((p, new_p))
        return updates
Beispiel #11
0
    def get_updates(self, params, cost):
        grads = self.get_gradients(cost, params)
        lr = self.lr * (1.0 / (1.0 + self.decay * self.iterations))
        updates = [(self.iterations, self.iterations+1.)]

        for p, g in zip(params, grads):
            m = shared_zeros(p.get_value().shape) # momentum
            v = self.momentum * m - lr * g # velocity
            updates.append((m, v)) 

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v
            updates.append((p, new_p))
        return updates
Beispiel #12
0
    def __init__(self, input_dim, output_dim, 
        init='uniform', inner_init='orthogonal', activation='sigmoid', weights=None,
        truncate_gradient=-1, return_sequences=False):
        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.activation = activations.get(activation)
        self.return_sequences = return_sequences
        self.input = T.matrix()

        self.W = self.init((self.input_dim, self.output_dim))
        self.U = self.init((self.output_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim))
        self.params = [self.W, self.U, self.b]

        if weights is not None:
            self.set_weights(weights)
Beispiel #13
0
    def __init__(self, nb_filter, stack_size, nb_row, nb_col, 
        init='uniform', activation='linear', weights=None, 
        image_shape=None, border_mode='valid', subsample=(1,1)):

        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.subsample = subsample
        self.border_mode = border_mode
        self.image_shape = image_shape
        
        self.input = T.tensor4()
        self.W_shape = (nb_filter, stack_size, nb_row, nb_col)
        self.W = self.init(self.W_shape)
        self.b = shared_zeros((nb_filter,))

        self.params = [self.W, self.b]

        if weights is not None:
            self.set_weights(weights)
 def get_updates(self, loss, params,t=10):
     
     grads = self.get_gradients(loss, params)
     grads_clipped = []
     for g in grads:
         grads_clipped.append(clip_l2(g,t))
     lr = self.lr * (1.0 / (1.0 + self.decay * self.iterations))
     self.updates = [(self.iterations, self.iterations + 1.)]
     for p, g in zip(params, grads_clipped):
         m = shared_zeros(p.get_value().shape)  # momentum
         v = self.momentum * m - lr * g  # velocity
         self.updates.append((m, v))
         
         if self.nesterov:
             new_p = p + self.momentum * v - lr * g
         else:
             new_p = p + v
         self.updates.append((p, new_p))
     
     return self.updates
Beispiel #15
0
    def init_params(self):

        W = self.init((self.in_dim, self.h_dim))
        self.W = np.concatenate([
            ortho_weight((self.in_dim, self.h_dim)),
            ortho_weight((self.in_dim, self.h_dim)),
            ortho_weight((self.in_dim, self.h_dim)),
            ortho_weight((self.in_dim, self.h_dim))
        ],
                                axis=1)
        self.W = sharedX(self.W)
        self.U = np.concatenate([
            ortho_weight((self.h_dim, self.h_dim)),
            ortho_weight((self.h_dim, self.h_dim)),
            ortho_weight((self.h_dim, self.h_dim)),
            ortho_weight((self.h_dim, self.h_dim))
        ],
                                axis=1)
        self.U = sharedX(self.U)

        self.b = shared_zeros((4 * self.h_dim, ))

        self.pack_params()
Beispiel #16
0
def zero(shape):
    return shared_zeros(shape)
 def __init__(self, input_shape):
     self.alphas = shared_zeros(input_shape)
     self.params = [self.alphas]
Beispiel #18
0
 def init_params(self):
     self.W = self.init((self.in_dim, self.out_dim))
     self.b = shared_zeros((self.out_dim, ))
     self.pack_params()
def zero(shape):
    return shared_zeros(shape)