def forward_pass(self, X, training=True): # Initialize running mean and variance if first run if self.running_mean is None: self.running_mean = R.mean(X, axis=0) self.running_var = R.variance(X, axis=0) if training and self.trainable: mean = R.mean(X, axis=0) var = R.variance(X, axis=0) self.running_mean = self.momentum * self.running_mean + ( R.t(1) - self.momentum) * mean self.running_var = self.momentum * self.running_var + ( R.t(1) - self.momentum) * var else: mean = self.running_mean var = self.running_var # Statistics saved for backward pass self.X_centered = X - mean self.stddev_inv = R.div(R.t(1), R.square_root(var + self.eps)) X_norm = self.X_centered * self.stddev_inv output = self.gamma * X_norm + self.beta return output
def update(self, w, grad_wrt_w): # If not initialized if self.m is None: self.m = R.t(np.zeros(np.shape(grad_wrt_w()))) self.v = R.t(np.zeros(np.shape(grad_wrt_w()))) self.m = self.b1 * self.m + (R.t(1) - self.b1) * grad_wrt_w self.v = self.b2 * self.v + (R.t(1) - self.b2) * R.pow( grad_wrt_w, R.t(2)) m_hat = R.div(self.m, R.t(1) - self.b1) v_hat = R.div(self.v, R.t(1) - self.b2) self.w_updt = R.div(self.learning_rate * m_hat, R.square_root(v_hat) + self.eps) return w - self.w_updt
def initialize(self, optimizer): # Initialize the weights limit = R.div(R.t(1), R.square_root(R.t(int(self.input_shape[0])))) limit_value = limit() self.W = R.t(np.random.uniform(-limit_value, limit_value, (int(self.input_shape[0]), self.n_units))) self.w0 = R.t(np.zeros((1,self.n_units))) # Weight optimizers self.W_opt = copy.copy(optimizer) self.w0_opt = copy.copy(optimizer)
def update(self, w, grad_wrt_w): # If not initialized if self.Eg is None: self.Eg = R.t(np.zeros(np.shape(grad_wrt_w()))) self.Eg = self.rho * self.Eg + (R.t(1) - self.rho) * R.pow( grad_wrt_w, R.t(2)) # Divide the learning rate for a weight by a running average of the magnitudes of recent # gradients for that weight return w - self.learning_rate * R.div( grad_wrt_w, R.square_root(self.Eg + self.eps))
def initialize(self, optimizer): # Initialize the weights filter_height, filter_width = self.filter_shape channels = self.input_shape[0] limit = R.div(R.t(1), R.square_root(R.t(int(np.prod(self.filter_shape))))) limit_value = limit() # limit = 1 / math.sqrt(np.prod(self.filter_shape)) self.W = R.t(np.random.uniform(-limit_value, limit_value, size=(self.n_filters, channels, filter_height, filter_width))) self.w0 = R.t(np.zeros((self.n_filters, 1))) # Weight optimizers self.W_opt = copy.copy(optimizer) self.w0_opt = copy.copy(optimizer)
def backward_pass(self, accum_grad): # Save parameters used during the forward pass gamma = self.gamma # If the layer is trainable the parameters are updated if self.trainable: X_norm = self.X_centered * self.stddev_inv grad_gamma = R.sum(accum_grad * X_norm, axis=0) grad_beta = R.sum(accum_grad, axis=0) self.gamma = self.gamma_opt.update(self.gamma, grad_gamma) self.beta = self.beta_opt.update(self.beta, grad_beta) batch_size = R.t(accum_grad().shape[0]) # The gradient of the loss with respect to the layer inputs (use weights and statistics from forward pass) accum_grad = R.div(R.t(1),batch_size) * gamma * self.stddev_inv * (batch_size * accum_grad - R.sum(accum_grad, axis=0) - self.X_centered * R.square(self.stddev_inv) * R.sum(accum_grad * self.X_centered, axis=0)) return accum_grad
def accuracy_score(y_true, y_pred): """ Compare y_true to y_pred and return the accuracy """ equality = y_true() == y_pred() accuracy = R.div(R.sum(R.t(equality.tolist()), axis=0), R.t(len(y_true()))) return accuracy
def __call__(self, x): return R.div(R.t(1), R.add(R.t(1), R.exp(R.neg(x))))
def __call__(self, x): return R.div(R.t(2), R.t(1) + R.exp(R.neg(R.t(2)) * x)) - R.t(1)
def __call__(self, x): e_x = R.exp(x - R.t(np.max(x(), axis=-1, keepdims=True))) return R.div(e_x, R.t(np.sum(e_x(), axis=-1, keepdims=True)))
def gradient(self, y, p): # Avoid division by zero p = R.t(np.clip(p(), 1e-15, 1 - 1e-15)) return R.neg(R.div(y, p)) + R.div(R.t(1) - y, R.t(1) - p)