Esempio n. 1
0
    def get_weight_delta(self, output_train, target_train):
        gradients = self.get_gradient(output_train, target_train)
        gradient = matrix_list_in_one_vector(gradients)

        if norm(gradient) < self.gradient_tol:
            raise StopIteration("Gradient norm less than {}"
                                "".format(self.gradient_tol))

        train_layers = self.train_layers
        weight = matrix_list_in_one_vector(
            (layer.weight for layer in train_layers)
        )

        if hasattr(self, 'prev_gradient'):
            # In first epoch we didn't have previous weights and
            # gradients. For this reason we skip quasi coefitient
            # computation.
            inverse_hessian = self.update_function(
                self.prev_inverse_hessian,
                weight - self.prev_weight,
                gradient - self.prev_gradient
            )
        else:
            inverse_hessian = self.h0_scale * eye(weight.size, dtype=int)

        self.prev_weight = weight.copy()
        self.prev_gradient = gradient.copy()
        self.prev_inverse_hessian = inverse_hessian

        return vector_to_list_of_matrix(
            -inverse_hessian.dot(gradient),
            (layer.size for layer in train_layers)
        )
Esempio n. 2
0
    def get_weight_delta(self, output_train, target_train):
        gradients = self.get_gradient(output_train, target_train)
        gradient = matrix_list_in_one_vector(gradients)

        if norm(gradient) < self.gradient_tol:
            raise StopIteration("Gradient norm less than {}"
                                "".format(self.gradient_tol))

        train_layers = self.train_layers
        weight = matrix_list_in_one_vector(
            (layer.weight for layer in train_layers)
        )

        if hasattr(self, 'prev_gradient'):
            # In first epoch we didn't have previous weights and
            # gradients. For this reason we skip quasi coefitient
            # computation.
            inverse_hessian = self.update_function(
                self.prev_inverse_hessian,
                weight - self.prev_weight,
                gradient - self.prev_gradient
            )
        else:
            inverse_hessian = self.h0_scale * eye(weight.size, dtype=int)

        self.prev_weight = weight.copy()
        self.prev_gradient = gradient.copy()
        self.prev_inverse_hessian = inverse_hessian

        return vector_to_list_of_matrix(
            -inverse_hessian.dot(gradient),
            (layer.size for layer in train_layers)
        )
Esempio n. 3
0
    def get_weight_delta(self, output_train, target_train):
        gradients = super(ConjugateGradient, self).get_gradient(output_train,
                                                                target_train)
        epoch = self.epoch
        gradient = matrix_list_in_one_vector(gradients)
        weight_delta = -gradient

        if epoch > 1 and epoch % self.n_weights == 0:
            # Must reset after every N iteration, because algoritm
            # lose conjugacy.
            self.logs.info("TRAIN", "Reset conjugate gradient vector")
            del self.prev_gradient

        if hasattr(self, 'prev_gradient'):
            gradient_old = self.prev_gradient
            weight_delta_old = self.prev_weight_delta
            beta = self.update_function(gradient_old, gradient,
                                        weight_delta_old)

            weight_delta += beta * weight_delta_old

        weight_deltas = vector_to_list_of_matrix(
            weight_delta,
            (layer.size for layer in self.train_layers)
        )

        self.prev_weight_delta = weight_delta.copy()
        self.prev_gradient = gradient.copy()

        return weight_deltas
Esempio n. 4
0
    def get_weight_delta(self, output_train, target_train):
        gradients = super(ConjugateGradient,
                          self).get_gradient(output_train, target_train)
        epoch = self.epoch
        gradient = matrix_list_in_one_vector(gradients)
        weight_delta = -gradient

        if epoch > 1 and epoch % self.n_weights == 0:
            # Must reset after every N iteration, because algoritm
            # lose conjugacy.
            self.logs.info("TRAIN", "Reset conjugate gradient vector")
            del self.prev_gradient

        if hasattr(self, 'prev_gradient'):
            gradient_old = self.prev_gradient
            weight_delta_old = self.prev_weight_delta
            beta = self.update_function(gradient_old, gradient,
                                        weight_delta_old)

            weight_delta += beta * weight_delta_old

        weight_deltas = vector_to_list_of_matrix(
            weight_delta, (layer.size for layer in self.train_layers))

        self.prev_weight_delta = weight_delta.copy()
        self.prev_gradient = gradient.copy()

        return weight_deltas
Esempio n. 5
0
 def get_gradient_by_weights(self, weights):
     weights = vector_to_list_of_matrix(weights,
                                        (layer.size
                                         for layer in self.train_layers))
     self.set_weights(weights)
     gradient = self.get_gradient(self.output_train, self.target_train)
     return matrix_list_in_one_vector(gradient)
Esempio n. 6
0
 def get_gradient_by_weights(self, weights):
     weights = vector_to_list_of_matrix(
         weights,
         (layer.size for layer in self.train_layers)
     )
     self.set_weights(weights)
     gradient = self.get_gradient(self.output_train,
                                  self.target_train)
     return matrix_list_in_one_vector(gradient)
Esempio n. 7
0
    def update_weights(self, weight_deltas):
        real_weights = [layer.weight for layer in self.train_layers]

        weights_vector = matrix_list_in_one_vector(real_weights)
        gradients_vetor = matrix_list_in_one_vector(self.gradients)

        res = line_search(self.check_updates,
                          self.get_gradient_by_weights,
                          xk=weights_vector,
                          pk=matrix_list_in_one_vector(weight_deltas),
                          gfk=gradients_vetor,
                          amax=self.maxstep,
                          c1=self.c1,
                          c2=self.c2)

        step = (res[0] if res[0] is not None else self.step)
        # SciPy some times ignore `amax` argument and return
        # bigger result
        self.step = min(self.maxstep, step)
        self.set_weights(real_weights)

        return super(WolfeSearch, self).update_weights(weight_deltas)
Esempio n. 8
0
    def update_weights(self, weight_deltas):
        real_weights = [layer.weight for layer in self.train_layers]

        weights_vector = matrix_list_in_one_vector(real_weights)
        gradients_vetor = matrix_list_in_one_vector(self.gradients)

        res = line_search(self.check_updates,
                          self.get_gradient_by_weights,
                          xk=weights_vector,
                          pk=matrix_list_in_one_vector(weight_deltas),
                          gfk=gradients_vetor,
                          amax=self.maxstep,
                          c1=self.c1,
                          c2=self.c2)

        step = (res[0] if res[0] is not None else self.step)
        # SciPy some times ignore `amax` argument and return
        # bigger result
        self.step = min(self.maxstep, step)
        self.set_weights(real_weights)

        return super(WolfeSearch, self).update_weights(weight_deltas)