def backward_pass(self, accum_grad): # Reshape accumulated gradient into column shape accum_grad = R.t(accum_grad().transpose(1, 2, 3, 0).reshape(self.n_filters, -1)) if self.trainable: # Take dot product between column shaped accum. gradient and column shape # layer input to determine the gradient at the layer with respect to layer weights # grad_w = accum_grad.dot(R.transpose(self.X_col)).reshape(shape=list(self.W().shape)) grad_w = R.t(accum_grad().dot(self.X_col().T).reshape(self.W().shape)) # grad_w = R.reshape(accum_grad.dot(R.transpose(self.X_col)),shape=self.W().shape.to_list()) # The gradient with respect to bias terms is the sum similarly to in Dense layer grad_w0 = R.t(np.sum(accum_grad(), axis=1, keepdims=True)) # Update the layers weights self.W = self.W_opt.update(self.W, grad_w) self.w0 = self.w0_opt.update(self.w0, grad_w0) # Recalculate the gradient which will be propogated back to prev. layer accum_grad = R.transpose(self.W_col).dot(accum_grad) # Reshape from column shape to image shape accum_grad = column_to_image(accum_grad, self.layer_input().shape, self.filter_shape, stride=self.stride, output_shape=self.padding) return accum_grad
def load_weights_file(self): for i in self.layers: l_name=i.get_layer_name() layer_w=self.loaded_weights[l_name] if layer_w is not None: i.W=R.t(layer_w[0]) i.w0=R.t(layer_w[1])
def initialize(self, optimizer): # Initialize the parameters self.gamma = R.t(np.ones(self.input_shape)) self.beta = R.t(np.zeros(self.input_shape)) # parameter optimizers self.gamma_opt = copy.copy(optimizer) self.beta_opt = copy.copy(optimizer)
def forward_pass(self, X, training=True): # Initialize running mean and variance if first run if self.running_mean is None: self.running_mean = R.mean(X, axis=0) self.running_var = R.variance(X, axis=0) if training and self.trainable: mean = R.mean(X, axis=0) var = R.variance(X, axis=0) self.running_mean = self.momentum * self.running_mean + ( R.t(1) - self.momentum) * mean self.running_var = self.momentum * self.running_var + ( R.t(1) - self.momentum) * var else: mean = self.running_mean var = self.running_var # Statistics saved for backward pass self.X_centered = X - mean self.stddev_inv = R.div(R.t(1), R.square_root(var + self.eps)) X_norm = self.X_centered * self.stddev_inv output = self.gamma * X_norm + self.beta return output
def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999): self.learning_rate = R.t(learning_rate) self.eps = R.t(1e-8) self.m = None self.v = None # Decay rates self.b1 = R.t(b1) self.b2 = R.t(b2)
def initialize(self, optimizer): # Initialize the weights limit = R.div(R.t(1), R.square_root(R.t(int(self.input_shape[0])))) limit_value = limit() self.W = R.t(np.random.uniform(-limit_value, limit_value, (int(self.input_shape[0]), self.n_units))) self.w0 = R.t(np.zeros((1,self.n_units))) # Weight optimizers self.W_opt = copy.copy(optimizer) self.w0_opt = copy.copy(optimizer)
def batch_iterator(X, y=None, batch_size=64): """ Simple batch generator """ n_samples = X().shape[0] for i in np.arange(0, n_samples, batch_size): begin, end = i, min(i + batch_size, n_samples) if y is not None: yield R.t(X()[begin:end]), R.t(y()[begin:end]) else: yield R.t(X()[begin:end])
def update(self, w, grad_wrt_w): # If not initialized if self.Eg is None: self.Eg = R.t(np.zeros(np.shape(grad_wrt_w()))) self.Eg = self.rho * self.Eg + (R.t(1) - self.rho) * R.pow( grad_wrt_w, R.t(2)) # Divide the learning rate for a weight by a running average of the magnitudes of recent # gradients for that weight return w - self.learning_rate * R.div( grad_wrt_w, R.square_root(self.Eg + self.eps))
def __init__(self, optimizer, loss, validation_data=None): self.optimizer = optimizer self.layers = [] self.errors = {"training": [], "validation": []} self.loss_function = loss() # self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) self.val_set = None if validation_data: X, y = validation_data self.val_set = {"X": R.t(X), "y": R.t(y)}
def initialize(self, optimizer): # Initialize the weights filter_height, filter_width = self.filter_shape channels = self.input_shape[0] limit = R.div(R.t(1), R.square_root(R.t(int(np.prod(self.filter_shape))))) limit_value = limit() # limit = 1 / math.sqrt(np.prod(self.filter_shape)) self.W = R.t(np.random.uniform(-limit_value, limit_value, size=(self.n_filters, channels, filter_height, filter_width))) self.w0 = R.t(np.zeros((self.n_filters, 1))) # Weight optimizers self.W_opt = copy.copy(optimizer) self.w0_opt = copy.copy(optimizer)
def backward_pass(self, accum_grad): batch_size, _, _, _ = accum_grad().shape channels, height, width = self.input_shape accum_grad = R.t(accum_grad().transpose(2, 3, 0, 1).ravel()) # MaxPool or AveragePool specific method accum_grad_col = self._pool_backward(accum_grad) accum_grad = column_to_image(accum_grad_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0) accum_grad = accum_grad().reshape((batch_size,) + self.input_shape) return R.t(accum_grad)
def forward_pass(self, X, training=True): batch_size, channels, height, width = X().shape self.layer_input = X # Turn image shape into column shape # (enables dot product between input and weights) self.X_col = image_to_column(X, self.filter_shape, stride=self.stride, output_shape=self.padding) # Turn weights into column shape self.W_col = R.t(self.W().reshape((self.n_filters, -1))) # Calculate output output = self.W_col.dot(self.X_col) + self.w0 # Reshape into (n_filters, out_height, out_width, batch_size) output = output().reshape(self.output_shape() + (batch_size, )) # Redistribute axises so that batch size comes first return R.t(output.transpose(3,0,1,2))
def column_to_image(cols, images_shape, filter_shape, stride, output_shape='same'): batch_size, channels, height, width = images_shape pad_h, pad_w = determine_padding(filter_shape, output_shape) height_padded = height + np.sum(pad_h) width_padded = width + np.sum(pad_w) images_padded = np.zeros( (batch_size, channels, height_padded, width_padded)) # Calculate the indices where the dot products are applied between weights # and the image k, i, j = get_im2col_indices(images_shape, filter_shape, (pad_h, pad_w), stride) cols = cols().reshape(channels * np.prod(filter_shape), -1, batch_size) cols = cols.transpose(2, 0, 1) # Add column content to the images at the indices np.add.at(images_padded, (slice(None), k, i, j), cols) # Return image without padding return R.t(images_padded[:, :, pad_h[0]:height + pad_h[0], pad_w[0]:width + pad_w[0]])
def forward_pass(self, X, training=True): self.layer_input = X batch_size, channels, height, width = X().shape _, out_height, out_width = self.output_shape() X = R.t(X().reshape(batch_size*channels, 1, height, width)) X_col = image_to_column(X, self.pool_shape, self.stride, self.padding) # MaxPool or AveragePool specific method output = self._pool_forward(X_col) output = output().reshape(out_height, out_width, batch_size, channels) output = output.transpose(2, 3, 0, 1) return R.t(output)
def fit(self, X, y, n_epochs, batch_size,training=True, callbacks=None): """ Trains the model for a fixed number of epochs """ X = R.t(X) y = R.t(y) # for _ in self.progressbar(range(n_epochs)): if self.loaded_weights is not None: self.load_weights_file() self.callbacks=callbacks cb=Callback(self.callbacks,model=self) if training is True: #callback func on begining training cb.on_train_begin() for epoch in range(1, n_epochs + 1): #callback func on epoch training cb.on_epoch_begin() print('\nEpoch: ', epoch) batch_error = [] for X_batch, y_batch in batch_iterator(X, y, batch_size=batch_size): cb.on_batch_begin() #callbacks on batch begin loss, _ = self.train_on_batch(X_batch, y_batch) batch_error.append(loss()) self.loss=loss() cb.on_batch_end()#Callback on batch ending print("Batch Error: ", batch_error) self.errors["training"].append(np.mean(batch_error)) if self.val_set is not None: val_loss, _ = self.test_on_batch(self.val_set["X"], self.val_set["y"]) self.errors["validation"].append(val_loss()) #callback func on epoch end cb.on_epoch_end() if self.save_weight is True: self.save_model() return self.errors["training"], self.errors["validation"] #callback func on ending training cb.on_train_end()
def fit(self, X, y, n_epochs, batch_size): """ Trains the model for a fixed number of epochs """ X = R.t(X) y = R.t(y) # for _ in self.progressbar(range(n_epochs)): for epoch in range(1, n_epochs + 1): print('\nEpoch: ', epoch) batch_error = [] for X_batch, y_batch in batch_iterator(X, y, batch_size=batch_size): loss, _ = self.train_on_batch(X_batch, y_batch) batch_error.append(loss()) print(" Batch Error: ", batch_error) self.errors["training"].append(np.mean(batch_error)) if self.val_set is not None: val_loss, _ = self.test_on_batch(self.val_set["X"], self.val_set["y"]) self.errors["validation"].append(val_loss()) return self.errors["training"], self.errors["validation"]
def __init__(self, optimizer, loss, validation_data=None,save_weight=None,load_weights=None): self.optimizer = optimizer self.layers = [] self.errors = {"training": [], "validation": []} self.loss_function = loss() self.save_weight=save_weight self.load_weights=load_weights self.loaded_weights=None if self.load_weights is not None: with open(self.load_weights, "rb") as f: weights = json.load(f) self.loaded_weights=weights self.layer_type=[] self.loss=None # print(weights) # print(self.save_weight) # self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) self.val_set = None if validation_data: X, y = validation_data self.val_set = {"X": R.t(X), "y": R.t(y)}
def backward_pass(self, accum_grad): # Save parameters used during the forward pass gamma = self.gamma # If the layer is trainable the parameters are updated if self.trainable: X_norm = self.X_centered * self.stddev_inv grad_gamma = R.sum(accum_grad * X_norm, axis=0) grad_beta = R.sum(accum_grad, axis=0) self.gamma = self.gamma_opt.update(self.gamma, grad_gamma) self.beta = self.beta_opt.update(self.beta, grad_beta) batch_size = R.t(accum_grad().shape[0]) # The gradient of the loss with respect to the layer inputs (use weights and statistics from forward pass) accum_grad = R.div(R.t(1),batch_size) * gamma * self.stddev_inv * (batch_size * accum_grad - R.sum(accum_grad, axis=0) - self.X_centered * R.square(self.stddev_inv) * R.sum(accum_grad * self.X_centered, axis=0)) return accum_grad
def backward_pass(self, accum_grad): # Save weights used during forwards pass W = self.W if self.trainable: # Calculate gradient w.r.t layer weights grad_w = R.transpose(self.layer_input).dot(accum_grad) grad_w0 = R.t(np.sum(accum_grad(), axis=0, keepdims=True)) # Update the layer weights self.W = self.W_opt.update(self.W, grad_w) self.w0 = self.w0_opt.update(self.w0, grad_w0) # Return accumulated gradient for next layer # Calculated based on the weights used during the forward pass accum_grad = accum_grad.dot(R.transpose(W)) return accum_grad
def image_to_column(images, filter_shape, stride, output_shape='same'): filter_height, filter_width = filter_shape pad_h, pad_w = determine_padding(filter_shape, output_shape) # Add padding to the image images_padded = np.pad(images(), ((0, 0), (0, 0), pad_h, pad_w), mode='constant') # Calculate the indices where the dot products are to be applied between weights # and the image k, i, j = get_im2col_indices(images().shape, filter_shape, (pad_h, pad_w), stride) # Get content from image at those indices cols = images_padded[:, k, i, j] channels = images().shape[1] # Reshape content into column shape cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1) return R.t(cols)
def update(self, w, grad_wrt_w): # If not initialized if self.m is None: self.m = R.t(np.zeros(np.shape(grad_wrt_w()))) self.v = R.t(np.zeros(np.shape(grad_wrt_w()))) self.m = self.b1 * self.m + (R.t(1) - self.b1) * grad_wrt_w self.v = self.b2 * self.v + (R.t(1) - self.b2) * R.pow( grad_wrt_w, R.t(2)) m_hat = R.div(self.m, R.t(1) - self.b1) v_hat = R.div(self.v, R.t(1) - self.b2) self.w_updt = R.div(self.learning_rate * m_hat, R.square_root(v_hat) + self.eps) return w - self.w_updt
def accuracy_score(y_true, y_pred): """ Compare y_true to y_pred and return the accuracy """ equality = y_true() == y_pred() accuracy = R.div(R.sum(R.t(equality.tolist()), axis=0), R.t(len(y_true()))) return accuracy
def gradient(self, x): return self.__call__(x) * (R.t(1) - self.__call__(x))
def __call__(self, x): return R.div(R.t(1), R.add(R.t(1), R.exp(R.neg(x))))
def gradient(self, x): return R.t(np.where(x() >= 0, 1, 0))
def __call__(self, x): x_value = x() return R.t(np.where(x_value >= 0, x_value, 0))
def gradient(self, x): return R.t(1) - R.pow(self.__call__(x), R.t(2))
def __call__(self, x): return R.div(R.t(2), R.t(1) + R.exp(R.neg(R.t(2)) * x)) - R.t(1)
def __call__(self, x): e_x = R.exp(x - R.t(np.max(x(), axis=-1, keepdims=True))) return R.div(e_x, R.t(np.sum(e_x(), axis=-1, keepdims=True)))
def parameters(self): return R.t(int(np.prod(self.W().shape))) + R.t(int(np.prod(self.w0().shape)))