def get_weight_delta(self, output_train, target_train): gradients = self.get_gradient(output_train, target_train) gradient = matrix_list_in_one_vector(gradients) if norm(gradient) < self.gradient_tol: raise StopIteration("Gradient norm less than {}" "".format(self.gradient_tol)) train_layers = self.train_layers weight = matrix_list_in_one_vector( (layer.weight for layer in train_layers) ) if hasattr(self, 'prev_gradient'): # In first epoch we didn't have previous weights and # gradients. For this reason we skip quasi coefitient # computation. inverse_hessian = self.update_function( self.prev_inverse_hessian, weight - self.prev_weight, gradient - self.prev_gradient ) else: inverse_hessian = self.h0_scale * eye(weight.size, dtype=int) self.prev_weight = weight.copy() self.prev_gradient = gradient.copy() self.prev_inverse_hessian = inverse_hessian return vector_to_list_of_matrix( -inverse_hessian.dot(gradient), (layer.size for layer in train_layers) )
def get_weight_delta(self, output_train, target_train): gradients = super(ConjugateGradient, self).get_gradient(output_train, target_train) epoch = self.epoch gradient = matrix_list_in_one_vector(gradients) weight_delta = -gradient if epoch > 1 and epoch % self.n_weights == 0: # Must reset after every N iteration, because algoritm # lose conjugacy. self.logs.info("TRAIN", "Reset conjugate gradient vector") del self.prev_gradient if hasattr(self, 'prev_gradient'): gradient_old = self.prev_gradient weight_delta_old = self.prev_weight_delta beta = self.update_function(gradient_old, gradient, weight_delta_old) weight_delta += beta * weight_delta_old weight_deltas = vector_to_list_of_matrix( weight_delta, (layer.size for layer in self.train_layers) ) self.prev_weight_delta = weight_delta.copy() self.prev_gradient = gradient.copy() return weight_deltas
def get_weight_delta(self, output_train, target_train): gradients = super(ConjugateGradient, self).get_gradient(output_train, target_train) epoch = self.epoch gradient = matrix_list_in_one_vector(gradients) weight_delta = -gradient if epoch > 1 and epoch % self.n_weights == 0: # Must reset after every N iteration, because algoritm # lose conjugacy. self.logs.info("TRAIN", "Reset conjugate gradient vector") del self.prev_gradient if hasattr(self, 'prev_gradient'): gradient_old = self.prev_gradient weight_delta_old = self.prev_weight_delta beta = self.update_function(gradient_old, gradient, weight_delta_old) weight_delta += beta * weight_delta_old weight_deltas = vector_to_list_of_matrix( weight_delta, (layer.size for layer in self.train_layers)) self.prev_weight_delta = weight_delta.copy() self.prev_gradient = gradient.copy() return weight_deltas
def get_gradient_by_weights(self, weights): weights = vector_to_list_of_matrix(weights, (layer.size for layer in self.train_layers)) self.set_weights(weights) gradient = self.get_gradient(self.output_train, self.target_train) return matrix_list_in_one_vector(gradient)
def get_gradient_by_weights(self, weights): weights = vector_to_list_of_matrix( weights, (layer.size for layer in self.train_layers) ) self.set_weights(weights) gradient = self.get_gradient(self.output_train, self.target_train) return matrix_list_in_one_vector(gradient)
def update_weights(self, weight_deltas): real_weights = [layer.weight for layer in self.train_layers] weights_vector = matrix_list_in_one_vector(real_weights) gradients_vetor = matrix_list_in_one_vector(self.gradients) res = line_search(self.check_updates, self.get_gradient_by_weights, xk=weights_vector, pk=matrix_list_in_one_vector(weight_deltas), gfk=gradients_vetor, amax=self.maxstep, c1=self.c1, c2=self.c2) step = (res[0] if res[0] is not None else self.step) # SciPy some times ignore `amax` argument and return # bigger result self.step = min(self.maxstep, step) self.set_weights(real_weights) return super(WolfeSearch, self).update_weights(weight_deltas)