Beispiel #1
0
    def forward_pass(self, X, training=True):

        # Initialize running mean and variance if first run
        if self.running_mean is None:
            self.running_mean = R.mean(X, axis=0)
            self.running_var = R.variance(X, axis=0)

        if training and self.trainable:
            mean = R.mean(X, axis=0)
            var = R.variance(X, axis=0)
            self.running_mean = self.momentum * self.running_mean + (
                R.t(1) - self.momentum) * mean
            self.running_var = self.momentum * self.running_var + (
                R.t(1) - self.momentum) * var
        else:
            mean = self.running_mean
            var = self.running_var

        # Statistics saved for backward pass
        self.X_centered = X - mean
        self.stddev_inv = R.div(R.t(1), R.square_root(var + self.eps))

        X_norm = self.X_centered * self.stddev_inv
        output = self.gamma * X_norm + self.beta

        return output
Beispiel #2
0
 def initialize(self, optimizer):
     # Initialize the weights
     limit = R.div(R.t(1), R.square_root(R.t(int(self.input_shape[0]))))
     limit_value = limit()
     self.W = R.t(np.random.uniform(-limit_value, limit_value, (int(self.input_shape[0]), self.n_units)))
     self.w0 = R.t(np.zeros((1,self.n_units)))
     # Weight optimizers
     self.W_opt  = copy.copy(optimizer)
     self.w0_opt = copy.copy(optimizer)
Beispiel #3
0
    def update(self, w, grad_wrt_w):
        # If not initialized
        if self.Eg is None:
            self.Eg = R.t(np.zeros(np.shape(grad_wrt_w())))

        self.Eg = self.rho * self.Eg + (R.t(1) - self.rho) * R.pow(
            grad_wrt_w, R.t(2))
        # Divide the learning rate for a weight by a running average of the magnitudes of recent
        # gradients for that weight
        return w - self.learning_rate * R.div(
            grad_wrt_w, R.square_root(self.Eg + self.eps))
Beispiel #4
0
 def initialize(self, optimizer):
     # Initialize the weights
     filter_height, filter_width = self.filter_shape
     channels = self.input_shape[0]
     limit = R.div(R.t(1), R.square_root(R.t(int(np.prod(self.filter_shape)))))
     limit_value = limit()
     # limit = 1 / math.sqrt(np.prod(self.filter_shape))
     self.W  = R.t(np.random.uniform(-limit_value, limit_value, size=(self.n_filters, channels, filter_height, filter_width)))
     self.w0 = R.t(np.zeros((self.n_filters, 1)))
     # Weight optimizers
     self.W_opt  = copy.copy(optimizer)
     self.w0_opt = copy.copy(optimizer)
Beispiel #5
0
    def update(self, w, grad_wrt_w):
        # If not initialized
        if self.m is None:
            self.m = R.t(np.zeros(np.shape(grad_wrt_w())))
            self.v = R.t(np.zeros(np.shape(grad_wrt_w())))

        self.m = self.b1 * self.m + (R.t(1) - self.b1) * grad_wrt_w
        self.v = self.b2 * self.v + (R.t(1) - self.b2) * R.pow(
            grad_wrt_w, R.t(2))

        m_hat = R.div(self.m, R.t(1) - self.b1)
        v_hat = R.div(self.v, R.t(1) - self.b2)

        self.w_updt = R.div(self.learning_rate * m_hat,
                            R.square_root(v_hat) + self.eps)

        return w - self.w_updt