Beispiel #1
0
 def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, eps=1e-8, **kwargs):
     super(Adam, self).__init__(**kwargs)
     self.lr = sharedX(learning_rate)
     self.iter = sharedX(0)
     self.beta_1 = sharedX(beta_1)
     self.beta_2 = sharedX(beta_2)
     self.eps = sharedX(eps)
Beispiel #2
0
 def __init__(self, learning_rate=0.9, momentum=0., k=1.0, lr_decay_factor=0.9, decay_batch=10000):
     """
     dx = -learning_rate / sqrt(k + sum(gparam^2)) * gparam
     ref : Chris Dyer : Notes on AdaGrad
     """
     self.lr = sharedX(learning_rate)
     self.mom = sharedX(momentum)
     self.k = sharedX(k)
Beispiel #3
0
 def __init__(self, learning_rate=0.9, momentum=0., k=1.0, **kwargs):
     """
     dx = -learning_rate / sqrt(k + sum(gparam^2)) * gparam
     ref : Chris Dyer : Notes on AdaGrad
     """
     super(AdaGrad, self).__init__(**kwargs)
     self.lr = sharedX(learning_rate)
     self.mom = sharedX(momentum)
     self.k = sharedX(k)
Beispiel #4
0
 def __init__(self, eps=1e-6, rho=0.95):
     """
     dx_t = -rms(dx_{t-1}) / rms(gparam_t) * gparam_t
     rms(dx) = sqrt(E_t(dx^2) + eps)
     E_t(dx^s) = rho E_{t-1}(dx^2) + (1-rho) dx^2
     ref : Matthew D. Zeiler: ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
     """
     self.eps = sharedX(eps)
     self.rho = sharedX(rho)
Beispiel #5
0
 def update(self, deltas, params, gparams):
     t = self.iter + 1
     lr_t = self.lr * T.sqrt(1-self.beta_2**t)/(1-self.beta_1**t)
     updates = []
     for delta, param, gparam in zip(deltas, params, gparams):
         m = sharedX(param.get_value() * 0.)
         v = sharedX(param.get_value() * 0.)
         m_t = (self.beta_1 * m) + (1 - self.beta_1) * gparam
         v_t = (self.beta_2 * v) + (1 - self.beta_2) * gparam**2
         param_t = param - lr_t * m_t / (T.sqrt(v_t) + self.eps)
         updates.append((m, m_t))
         updates.append((v, v_t))
         updates.append((param, param_t))
     updates += self.decay()
     return updates
Beispiel #6
0
 def __call__(self, dim, name='W', **kwargs):
     if len(dim) != 2 or dim[0] != dim[1]:
         raise Exception(
             "Identity matrix initialization can only be used for 2D square matrices"
         )
     else:
         return sharedX(self.scale * np.identity(dim[0]), **kwargs)
Beispiel #7
0
 def decay(self):
     updates = []
     new_batch = ifelse(T.gt(self.batch, self.decay_batch), sharedX(0), self.batch+1)
     new_lr = ifelse(T.gt(self.batch, self.decay_batch), self.lr*self.lr_decay_factor, self.lr)
     updates.append((self.batch, new_batch))
     updates.append((self.lr, new_lr))
     return updates
Beispiel #8
0
    def update(self, delta, gparam):
        self.batch += 1
        if T.gt(self.batch, self.decay_batch):
            self.lr.set_value(self.lr.get_value() * self.lr_decay_factor)
            self.batch = sharedX(0)

        return [(delta, self.mom * delta - self.lr * gparam)]
Beispiel #9
0
    def __init__(self, input_dim, output_dim, init=UniformWeight(scale=0.1), weights=None):

        self.input_dim = input_dim
        self.output_dim = output_dim
        if weights is None:
            self.W = init((input_dim, output_dim))
        else:
            self.W = sharedX(weights)
        self.params = [self.W]
Beispiel #10
0
 def __init__(self, dim, alpha=0.2):
     '''
     y = wx + b
     if y > 0 then z = y else z = alpha * y
     return z
     alpha: the gradient of the slope which is updated by backpropagation
     '''
     self.alpha = sharedX(np.ones(dim) * alpha, name='PRELU_gradient')
     self.params = [self.alpha]
Beispiel #11
0
 def __init__(self, dim, alpha=0.2):
     '''
     y = wx + b
     if y > 0 then z = y else z = alpha * y
     return z
     alpha: the gradient of the slope which is updated by backpropagation
     '''
     self.alpha = sharedX(np.ones(dim) * alpha, name='PRELU_gradient')
     self.params = [self.alpha]
Beispiel #12
0
 def __call__(self, dim, name='W', **kwargs):
     ''' From Lasagne
     '''
     flat_shape = (dim[0], np.prod(dim[1:]))
     a = np.random.normal(0.0, 1.0, flat_shape)
     u, _, v = np.linalg.svd(a, full_matrices=False)
     # pick the one with the correct shape
     q = u if u.shape == flat_shape else v
     q = q.reshape(dim)
     return sharedX(name=name, value=self.scale * q[:dim[0],:dim[1]], borrow=True, **kwargs)
Beispiel #13
0
 def __call__(self, dim, name='W', **kwargs):
     ''' From Lasagne
     '''
     flat_shape = (dim[0], np.prod(dim[1:]))
     a = np.random.normal(0.0, 1.0, flat_shape)
     u, _, v = np.linalg.svd(a, full_matrices=False)
     # pick the one with the correct shape
     q = u if u.shape == flat_shape else v
     q = q.reshape(dim)
     return sharedX(name=name,
                    value=self.scale * q[:dim[0], :dim[1]],
                    borrow=True,
                    **kwargs)
Beispiel #14
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 init=UniformWeight(scale=0.1),
                 weights=None):

        self.input_dim = input_dim
        self.output_dim = output_dim
        if weights is None:
            self.W = init((input_dim, output_dim))
        else:
            self.W = sharedX(weights)
        self.params = [self.W]
Beispiel #15
0
 def __call__(self, dim, name='W', **kwargs):
     if len(dim) != 2 or dim[0] != dim[1]:
         raise Exception("Identity matrix initialization can only be used for 2D square matrices")
     else:
         return sharedX(self.scale * np.identity(dim[0]), **kwargs)
Beispiel #16
0
 def __call__(self, dim, name='W', **kwargs):
     W_values = np.random.normal(loc=self.mean, scale=self.std, size=dim)
     return sharedX(name=name, value=W_values, borrow=True, **kwargs)
Beispiel #17
0
 def __init__(self, alpha=0.01):
     self.alpha = sharedX(alpha)
     self.params = []
Beispiel #18
0
 def __call__(self, dim, name='W', **kwargs):
     fan_in, fan_out = get_fans(dim)
     W_values = np.random.uniform(low=-4 * np.sqrt(6. / (fan_in + fan_out)),
                                  high=4 * np.sqrt(6. / (fan_in + fan_out)),
                                  size=dim)
     return sharedX(name=name, value=W_values, borrow=True, **kwargs)
Beispiel #19
0
 def __init__(self, learning_rate=0.01, momentum=0.9, lr_decay_factor=0.9, decay_batch=10000):
     self.lr = sharedX(learning_rate)
     self.mom = sharedX(momentum)
     self.batch = sharedX(0)
     self.decay_batch = sharedX(decay_batch)
     self.lr_decay_factor = asfloatX(lr_decay_factor)
Beispiel #20
0
 def __call__(self, dim, name='W', **kwargs):
     W_values = np.random.uniform(low=-self.scale,
                                  high=self.scale,
                                  size=dim)
     return sharedX(name=name, value=W_values, borrow=True, **kwargs)
Beispiel #21
0
 def __init__(self, learning_rate=0.01, momentum=0.9, **kwargs):
     super(SGD, self).__init__(**kwargs)
     self.lr = sharedX(learning_rate)
     self.mom = sharedX(momentum)
Beispiel #22
0
 def __init__(self, lr_decay_factor=1.0, decay_batch=10000):
     self.batch = sharedX(0)
     self.decay_batch = sharedX(decay_batch)
     self.lr_decay_factor = asfloatX(lr_decay_factor)
Beispiel #23
0
 def __call__(self, dim, name='W', **kwargs):
     W_values = np.random.normal(loc=self.mean, scale=self.std, size=dim)
     return sharedX(name=name, value=W_values, borrow=True, **kwargs)
Beispiel #24
0
 def __call__(self, dim, name='W', **kwargs):
     fan_in, fan_out = get_fans(dim)
     W_values = np.random.uniform(low = -4 * np.sqrt(6. / (fan_in + fan_out)),
                                  high = 4 * np.sqrt(6. / (fan_in + fan_out)),
                                  size = dim)
     return sharedX(name=name, value=W_values, borrow=True, **kwargs)
Beispiel #25
0
 def __init__(self, learning_rate=0.01, eps=1e-6, rho=0.9, **kwargs):
     super(RMSprop, self).__init__(**kwargs)
     self.lr = sharedX(learning_rate)
     self.eps = sharedX(eps)
     self.rho = sharedX(rho)
Beispiel #26
0
 def __call__(self, dim, name='W', **kwargs):
     W_values = np.random.uniform(low=-self.scale, high=self.scale, size=dim)
     return sharedX(name=name, value=W_values, borrow=True, **kwargs)
Beispiel #27
0
 def __init__(self, alpha=0.01):
     self.alpha = sharedX(alpha)
     self.params = []
Beispiel #28
0
 def _layer_stats(self, state_below, layer_output):
     return [('moving_mean', T.mean(self.moving_mean)),
             ('moving_var', T.mean(self.moving_var)),
             ('gamma_mean', T.mean(self.gamma)),
             ('beta_mean', T.mean(self.beta)),
             ('memory', sharedX(self.mem))]