Exemple #1
0
    def updates(self, cost):
        grad = T.grad(cost, self.param)
        grad2 = hessian_diagonal(cost, self.param, grad=grad)
        # calculate memory constants
        tau_rec = 1.0 / self.tau
        tau_inv_rec = 1.0 - tau_rec

        # new moving average of gradient
        g_avg_new = tau_inv_rec * self.g_avg + tau_rec * grad
        # new moving average of squared gradient
        v_avg_new = tau_inv_rec * self.v_avg + tau_rec * grad**2
        # new moving average of hessian diagonal
        h_avg_new = tau_inv_rec * self.h_avg + tau_rec * T.abs_(grad2)

        rate_unsafe = (g_avg_new**2) / (v_avg_new * h_avg_new)
        rate = T.switch(
            T.isinf(rate_unsafe) | T.isnan(rate_unsafe), self.learning_rate,
            rate_unsafe)

        tau_unsafe = (1 - (g_avg_new**2) / v_avg_new) * self.tau + 1
        tau_new = T.switch(
            T.isnan(tau_unsafe) | T.isinf(tau_unsafe), self.tau, tau_unsafe)

        return [(self.g_avg, g_avg_new), (self.v_avg, v_avg_new),
                (self.h_avg, h_avg_new), (self.tau, tau_new),
                (self.last_grad, grad), (self.last_grad2, grad2),
                (self.last_rate, rate), (self.param, self.param - rate * grad)]
    def updates(self, cost):
        grad = T.grad(cost, self.param)
        grad2 = hessian_diagonal(cost, self.param, grad=grad)
        # calculate memory constants
        tau_rec = 1.0 / self.tau
        tau_inv_rec = 1.0 - tau_rec

        # new moving average of gradient
        g_avg_new = tau_inv_rec * self.g_avg + tau_rec * grad
        # new moving average of squared gradient
        v_avg_new = tau_inv_rec * self.v_avg + tau_rec * grad**2
        # new moving average of hessian diagonal
        h_avg_new = tau_inv_rec * self.h_avg + tau_rec * T.abs_(grad2)

        rate_unsafe = (g_avg_new ** 2) / (v_avg_new * h_avg_new)
        rate = T.switch(T.isinf(rate_unsafe) | T.isnan(rate_unsafe), self.learning_rate, rate_unsafe)

        tau_unsafe = (1 - (g_avg_new ** 2) / v_avg_new) * self.tau + 1
        tau_new = T.switch(T.isnan(tau_unsafe) | T.isinf(tau_unsafe), self.tau, tau_unsafe)

        return [(self.g_avg, g_avg_new),
                (self.v_avg, v_avg_new),
                (self.h_avg, h_avg_new),
                (self.tau, tau_new),
                (self.last_grad, grad),
                (self.last_grad2, grad2),
                (self.last_rate, rate),
                (self.param, self.param - rate * grad)]
Exemple #3
0
 def burn_in_updates(self, cost):
     grad = T.grad(cost, self.param)
     grad2 = hessian_diagonal(cost, self.param, grad=grad)
     print 'burn in updates for %s' % self.param
     return [(self.g_avg, self.g_avg + grad),
             (self.h_avg, self.h_avg + T.abs_(grad2)),
             (self.v_avg, self.v_avg + grad**2), (self.N, self.N + 1)]
 def burn_in_updates(self, cost):
     grad = T.grad(cost, self.param)
     grad2 = hessian_diagonal(cost, self.param, grad=grad)
     print 'burn in updates for %s' % self.param
     return [
         (self.g_avg, self.g_avg + grad),
         (self.h_avg, self.h_avg + T.abs_(grad2)),
         (self.v_avg, self.v_avg + grad**2),
         (self.N, self.N + 1)
     ]