Example #1
0
class Momentum(Backpropagation):
    """ Momentum algorithm for :network:`Backpropagation` optimization.

    Parameters
    ----------
    momentum : float
        Control previous gradient ratio. Defaults to ``0.9``.
    {optimizations}
    {raw_predict_param}
    {full_params}

    Methods
    -------
    {supervised_train}
    {full_methods}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> mnet = algorithms.Momentum(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> mnet.train(x_train, y_train)

    See Also
    --------
    :network:`Backpropagation` : Backpropagation algorithm.
    """
    momentum = BetweenZeroAndOneProperty(default=0.9)

    def layer_weight_update(self, delta, layer_number):
        update = super(Momentum, self).layer_weight_update(delta, layer_number)
        if not hasattr(self, 'prev_gradients'):
            return update
        return -self.momentum * self.prev_gradients[layer_number] + update

    def update_weights(self, weight_deltas):
        super(Momentum, self).update_weights(weight_deltas)
        self.prev_gradients = copy.copy(self.gradients)
Example #2
0
class StepOutputLayer(OutputLayer):
    """ The behaviour for this layer is the same as for step function.

    Parameters
    ----------
    output_bounds : tuple
        Value is must be a tuple which contains two elements where first one
        identify lower output value and the second one - bigger. Defaults
        to ``(0, 1)``.
    critical_point : float
        Critical point is setup step function bias.
    {layer_params}
    """
    output_bounds = NumberBoundProperty(default=(0, 1))
    critical_point = BetweenZeroAndOneProperty(default=0.5)

    def format_output(self, value):
        lower_bound, upper_bound = self.output_bounds
        return where(value < self.critical_point, lower_bound, upper_bound)
Example #3
0
class Quickprop(Backpropagation):
    """ Quickprop :network:`Backpropagation` algorithm optimization.

    Parameters
    ----------
    upper_bound : float
        Maximum possible value for weight update. Defaults to ``1``.
    {optimizations}
    {raw_predict_param}
    {full_params}

    Methods
    -------
    {supervised_train}
    {full_methods}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qpnet = algorithms.Quickprop(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> qpnet.train(x_train, y_train)

    See Also
    --------
    :network:`Backpropagation` : Backpropagation algorithm.
    """
    upper_bound = NonNegativeNumberProperty(default=1)
    gradient_tol = BetweenZeroAndOneProperty(default=1e-10)

    def layer_weight_update(self, delta, layer_number):
        if not hasattr(self, 'prev_gradients'):
            weight_delta = delta
        else:
            gradient = self.gradients[layer_number]
            prev_gradient = self.prev_gradients[layer_number]
            prev_weight_delta = self.prev_weight_deltas[layer_number]

            if norm(prev_gradient - gradient) < self.gradient_tol:
                raise StopIteration("Gradient norm after update is "
                                    "less than {}".format(self.gradient_tol))

            weight_delta = prev_weight_delta * (gradient /
                                                (prev_gradient - gradient))
            upper_bound = self.upper_bound
            weight_delta = where(
                np_abs(weight_delta) < upper_bound, weight_delta,
                sign(weight_delta) * upper_bound)

        self.weight_deltas.append(weight_delta)
        return weight_delta

    def update_weights(self, weight_deltas):
        self.weight_deltas = []
        super(Quickprop, self).update_weights(weight_deltas)

        self.prev_weight_deltas = copy.copy(self.weight_deltas)
        self.prev_gradients = copy.copy(self.gradients)
Example #4
0
class HessianDiagonal(Backpropagation):
    """ Hissian diagonal is a Hessian algorithm approximation which require
    only computation of hessian matrix diagonal elements and makes it
    invertion much easier and faster.

    Parameters
    ----------
    min_eigenvalue : float
        Setup min eigenvalue for Hessian diagonale matrix. After few
        iteration elements would be extremly small and matrix inverse
        produce huge number in hessian diagonal elements. This
        parameter control diagonal elements size. Defaults to ``1e-10``.
    {optimizations}
    {raw_predict_param}
    {full_params}

    Methods
    -------
    {supervised_train}
    {full_methods}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> hdnet = algorithms.HessianDiagonal(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> hdnet.train(x_train, y_train)

    Diabets dataset example

    >>> import numpy as np
    >>> from sklearn.cross_validation import train_test_split
    >>> from sklearn import datasets, preprocessing
    >>> from neupy import algorithms, layers
    >>> from neupy.functions import rmsle
    >>>
    >>> np.random.seed(0)
    >>>
    >>> dataset = datasets.load_diabetes()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> input_scaler = preprocessing.StandardScaler()
    >>> target_scaler = preprocessing.StandardScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     input_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.8
    ... )
    >>>
    >>> nw = algorithms.HessianDiagonal(
    ...     connection=[
    ...         layers.SigmoidLayer(10),
    ...         layers.SigmoidLayer(20),
    ...         layers.OutputLayer(1)
    ...     ],
    ...     step=1.5,
    ...     use_raw_predict_at_error=False,
    ...     shuffle_data=False,
    ...     verbose=False,
    ...     min_eigenvalue=1e-10
    ... )
    >>> nw.train(x_train, y_train, epochs=10)
    >>> y_predict = nw.predict(x_test)
    >>>
    >>> error = rmsle(target_scaler.inverse_transform(y_test),
    ...               target_scaler.inverse_transform(y_predict).round())
    >>> error
    0.50315919814691346

    See Also
    --------
    :network:`Backpropagation` : Backpropagation algorithm.
    """
    min_eigenvalue = BetweenZeroAndOneProperty(default=1e-10)

    def get_weight_delta(self, output_train, target_train):
        weight_deltas = []
        gradients = self.gradients = []
        state_delta = self.delta = []

        update_first_order = self.error.deriv(output_train, target_train)
        min_eigenvalue = self.min_eigenvalue
        prev_weight = None
        update_second_order = None

        for i, layer in enumerate(reversed(self.train_layers), start=1):
            summated_data = self.summated_data[-i]
            current_layer_input = self.layer_outputs[-i]
            weight = layer.weight_without_bias.T
            weight_shape = layer.weight.shape

            activation_function_deriv = layer.activation_function.deriv
            deriv = activation_function_deriv(summated_data)
            second_deriv = activation_function_deriv.deriv(summated_data)

            if i == 1:
                # For last layer update
                delta = deriv ** 2 - update_first_order * second_deriv
            else:
                # For the hidden layers
                update_first_order = update_first_order.dot(prev_weight)
                delta = (
                    deriv ** 2 * update_second_order +
                    update_first_order * second_deriv
                )

            update_first_order *= deriv
            update_second_order = delta.dot(weight ** 2)

            # Compute gradient
            gradient = current_layer_input.T.dot(update_first_order).ravel()
            gradients.insert(0, reshape(gradient, weight_shape))

            # Compute hessian matrix
            weight_delta = asmatrix(dot(current_layer_input.T ** 2, delta))
            hessain_shape = (weight_delta.size, weight_delta.size)
            inverted_hessian = lil_matrix(hessain_shape)
            # Inverse for diagonal matrix is just reciprocal
            # every diagonal element
            full_gradients = weight_delta.ravel().T
            full_gradients = where(
                full_gradients < min_eigenvalue,
                min_eigenvalue,
                full_gradients
            )
            inverted_hessian.setdiag(1 / weight_delta.ravel().T)
            weight_delta = inverted_hessian.dot(gradient)

            weight_deltas.insert(0, reshape(-weight_delta, weight_shape))
            state_delta.insert(0, delta)
            prev_weight = weight

        return weight_deltas
Example #5
0
class RPROP(Backpropagation):
    """ RPROP :network:`Backpropagation` algorithm optimization.

    Parameters
    ----------
    {rprop_params}
    {optimizations}
    {full_params}

    Methods
    -------
    {supervised_train}
    {raw_predict}
    {full_methods}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> rpropnet = algorithms.RPROP(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> rpropnet.train(x_train, y_train)

    See Also
    --------
    :network:`IRPROPPlus` : iRPROP+ algorithm.
    :network:`Backpropagation` : Backpropagation algorithm.
    """

    __rprop_params = """minimum_step : float
        Minimum possible value for step. Defaults to ``0.1``.
    maximum_step : float
        Maximum possible value for step. Defaults to ``50``.
    increase_factor : float
        Increase factor for step in case when gradient doesn't change
        sign compare to previous epoch.
    decrease_factor : float
        Decrease factor for step in case when gradient changes sign
        compare to previous epoch.
    """

    shared_docs = {"rprop_params": __rprop_params}

    # This properties correct upper and lower bounds for steps.
    minimum_step = NonNegativeNumberProperty(default=0.1)
    maximum_step = NonNegativeNumberProperty(default=50)

    # This properties increase/decrease step by deviding it to
    # some coeffitient.
    increase_factor = NonNegativeNumberProperty(min_size=1, default=1.2)
    decrease_factor = BetweenZeroAndOneProperty(default=0.5)

    def init_layers(self):
        super(RPROP, self).init_layers()
        steps = self.steps = []

        for layer in self.train_layers:
            steps.append(ones(layer.size) * self.step)

    def get_flip_sign_weight_delta(self, layer_number):
        return self.prev_weight_deltas[layer_number]

    def layer_weight_update(self, delta, layer_number):
        if not hasattr(self, 'prev_gradients'):
            prev_gradient = 0
            prev_weight_delta = 0
        else:
            prev_gradient = self.prev_gradients[layer_number]
            prev_weight_delta = self.get_flip_sign_weight_delta(layer_number)

        step = self.steps[layer_number]
        gradient = self.gradients[layer_number]

        grad_product = prev_gradient * gradient
        negative_gradients = grad_product < 0

        step = self.steps[layer_number] = clip(
            where(
                grad_product > 0,
                # Increase step for gradients which switch signs
                step * self.increase_factor,
                where(
                    negative_gradients,
                    # Decrease step for gradients whcih switch signs
                    step * self.decrease_factor,
                    # Setup the same step value
                    step)),
            self.minimum_step,
            self.maximum_step,
        )

        output = where(negative_gradients, -prev_weight_delta,
                       -sign(gradient) * step)
        gradient[negative_gradients] = 0

        self.weight_deltas.append(output)

        del negative_gradients
        del grad_product

        return output

    def update_weights(self, weight_deltas):
        self.weight_deltas = []
        super(RPROP, self).update_weights(weight_deltas)

        self.prev_weight_deltas = self.weight_deltas
        self.prev_gradients = self.gradients
        self.prev_steps = self.steps
Example #6
0
class WolfeSearch(SingleStep):
    """ Wolfe line search for the step selection.

    Parameters
    ----------
    maxstep : float
        Maximum step value. Defaults to ``50``.
    c1 : float
        Parameter for Armijo condition rule. Defaults to ``1e-4``.
    c2 : float
        Parameter for curvature condition rule. Defaults to ``0.9``.

    Attributes
    ----------
    {first_step}

    Warns
    -----
    {bp_depending}

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn import datasets, metrics
    >>> from sklearn.cross_validation import StratifiedShuffleSplit
    >>> from neupy import algorithms, layers
    >>>
    >>> np.random.seed(0)
    >>>
    >>> X, y = datasets.make_classification(n_samples=100, n_features=10,
    ...                                     random_state=33)
    >>> shuffle_split = StratifiedShuffleSplit(y, 1, train_size=0.6,
    ...                                        random_state=33)
    >>>
    >>> train_index, test_index = next(shuffle_split.__iter__())
    >>> x_train, x_test = X[train_index], X[test_index]
    >>> y_train, y_test = y[train_index], y[test_index]
    >>>
    ... qnnet = algorithms.QuasiNewton(
    ...     connection=[
    ...         layers.SigmoidLayer(10, init_method='ortho'),
    ...         layers.SigmoidLayer(20, init_method='ortho'),
    ...         layers.OutputLayer(1)
    ...     ],
    ...     step=0.1,
    ...     use_raw_predict_at_error=False,
    ...     shuffle_data=True,
    ...     show_epoch=20,
    ...     verbose=False,
    ...
    ...     update_function='bfgs',
    ...     h0_scale=5,
    ...     gradient_tol=1e-5,
    ...     optimizations=[algorithms.WolfeSearch]
    ... )
    >>> qnnet.train(x_train, y_train, x_test, y_test, epochs=10)
    >>> result = qnnet.predict(x_test).round()
    >>>
    >>> roc_curve_score = metrics.roc_auc_score(result, y_test)
    >>> metrics.roc_auc_score(result, y_test)
    0.91666666666666674
    """

    maxstep = NonNegativeNumberProperty(default=50)
    c1 = BetweenZeroAndOneProperty(default=1e-4)
    c2 = BetweenZeroAndOneProperty(default=0.9)

    def set_weights(self, new_weights):
        for layer, new_weight in zip(self.train_layers, new_weights):
            layer.weight = new_weight.copy()

    def check_updates(self, new_step):
        weights = vector_to_list_of_matrix(new_step,
                                           (layer.size
                                            for layer in self.train_layers))
        self.set_weights(weights)
        predicted_output = self.predict(self.input_train)
        return self.error(predicted_output, self.target_train)

    def get_gradient_by_weights(self, weights):
        weights = vector_to_list_of_matrix(weights,
                                           (layer.size
                                            for layer in self.train_layers))
        self.set_weights(weights)
        gradient = self.get_gradient(self.output_train, self.target_train)
        return matrix_list_in_one_vector(gradient)

    def update_weights(self, weight_deltas):
        real_weights = [layer.weight for layer in self.train_layers]

        weights_vector = matrix_list_in_one_vector(real_weights)
        gradients_vetor = matrix_list_in_one_vector(self.gradients)

        res = line_search(self.check_updates,
                          self.get_gradient_by_weights,
                          xk=weights_vector,
                          pk=matrix_list_in_one_vector(weight_deltas),
                          gfk=gradients_vetor,
                          amax=self.maxstep,
                          c1=self.c1,
                          c2=self.c2)

        step = (res[0] if res[0] is not None else self.step)
        # SciPy some times ignore `amax` argument and return
        # bigger result
        self.step = min(self.maxstep, step)
        self.set_weights(real_weights)

        return super(WolfeSearch, self).update_weights(weight_deltas)
Example #7
0
class ART1(Clustering, BaseNetwork):
    """ Adaptive Resonance Theory (ART1) Network for binary
    data clustering.

    Notes
    -----
    * Weights are not random, so the result will be always reproduceble.

    Parameters
    ----------
    rho : float
        Control reset action in trainig process. Value must be
        between ``0`` and ``1``, defaults to ``0.5``.
    n_clusters : int
        Number of clusters, defaults to ``2``. Min value is also ``2``.
    {full_params}

    Methods
    -------
    train(input_data):
        Trains network until it has clustered all samples
    {predict}
    {plot_errors}
    {last_error}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([
    ...     [0, 1, 0],
    ...     [1, 0, 0],
    ...     [1, 1, 0],
    ... ])
    >>>>
    >>> artnet = algorithms.ART1(
    ...     step=2,
    ...     rho=0.7,
    ...     n_clusters=2,
    ...     verbose=False
    ... )
    >>> artnet.predict(data)
    array([ 0.,  1.,  1.])
    """
    rho = BetweenZeroAndOneProperty(default=0.5)
    n_clusters = NonNegativeIntProperty(default=2, min_size=2)

    def __init__(self, **options):
        super(ART1, self).__init__(FAKE_CONNECTION, **options)

    def train(self, input_data):
        input_data = format_data(input_data)

        if input_data.ndim != 2:
            raise ValueError("Input value must be 2 dimentional, got "
                             "{0}".format(input_data.ndim))

        data_size = input_data.shape[1]
        n_clusters = self.n_clusters
        step = self.step
        rho = self.rho

        if list(sort(unique(input_data))) != [0, 1]:
            raise ValueError("ART1 Network works only with binary matrix, "
                             "all matix must contains only 0 and 1")

        if not hasattr(self, 'weight_21'):
            self.weight_21 = ones((data_size, n_clusters))

        if not hasattr(self, 'weight_12'):
            self.weight_12 = step / (step + n_clusters - 1) * self.weight_21.T

        weight_21 = self.weight_21
        weight_12 = self.weight_12

        if data_size != weight_21.shape[0]:
            raise ValueError(
                "Data dimention is invalid. Get {} columns data set. "
                "Must be - {} columns".format(data_size, weight_21.shape[0]))

        classes = zeros(input_data.shape[0])

        # Train network
        for i, p in enumerate(input_data):
            disabled_neurons = []
            reseted_values = []
            reset = True

            while reset:
                output1 = p
                input2 = dot(weight_12, output1.T)

                output2 = zeros(input2.size)
                input2[disabled_neurons] = -inf
                winner_index = input2.argmax()
                output2[winner_index] = 1

                expectation = dot(weight_21, output2)
                output1 = logical_and(p, expectation).astype(int)

                reset_value = dot(output1.T, output1) / dot(p.T, p)
                reset = reset_value < rho

                if reset:
                    disabled_neurons.append(winner_index)
                    reseted_values.append((reset_value, winner_index))

                if len(disabled_neurons) >= n_clusters:
                    # Got this case only if we test all possible clusters
                    reset = False
                    winner_index = None

                if not reset:
                    if winner_index is not None:
                        weight_12[winner_index, :] = (step * output1) / (
                            step + dot(output1.T, output1) - 1)
                        weight_21[:, winner_index] = output1
                    else:
                        # Get result with the best `rho`
                        winner_index = max(reseted_values)[1]

                    classes[i] = winner_index

        return classes

    def predict(self, input_data):
        return self.train(input_data)

    def train_epoch(self, input_data, target_data):
        pass
Example #8
0
class QuasiNewton(Backpropagation):
    """ Quasi-Newton :network:`Backpropagation` algorithm optimization.

    Parameters
    ----------
    update_function : {{'bfgs', 'dfp', 'psb', 'sr1'}}
        Update function. Defaults to ``bfgs``.
    h0_scale : float
        Factor that scale indentity matrix H0 on the first
        iteration step. Defaults to ``1``.
    gradient_tol : float
        In the gradient less than this value algorithm will stop training
        procedure. Defaults to ``1e-5``.
    {optimizations}
    {raw_predict_param}
    {full_params}

    Methods
    -------
    {supervised_train}
    {full_methods}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qnnet = algorithms.QuasiNewton(
    ...     (2, 3, 1),
    ...     update_function='bfgs',
    ...     verbose=False
    ... )
    >>> qnnet.train(x_train, y_train)

    See Also
    --------
    :network:`Backpropagation` : Backpropagation algorithm.
    """
    update_function = ChoiceProperty(
        default='bfgs',
        choices={
            'bfgs': bfgs,
            'dfp': dfp,
            'psb': psb,
            'sr1': sr1,
        }
    )
    h0_scale = NonNegativeNumberProperty(default=1)
    gradient_tol = BetweenZeroAndOneProperty(default=1e-5)

    default_optimizations = [WolfeSearch]

    def get_weight_delta(self, output_train, target_train):
        gradients = self.get_gradient(output_train, target_train)
        gradient = matrix_list_in_one_vector(gradients)

        if norm(gradient) < self.gradient_tol:
            raise StopIteration("Gradient norm less than {}"
                                "".format(self.gradient_tol))

        train_layers = self.train_layers
        weight = matrix_list_in_one_vector(
            (layer.weight for layer in train_layers)
        )

        if hasattr(self, 'prev_gradient'):
            # In first epoch we didn't have previous weights and
            # gradients. For this reason we skip quasi coefitient
            # computation.
            inverse_hessian = self.update_function(
                self.prev_inverse_hessian,
                weight - self.prev_weight,
                gradient - self.prev_gradient
            )
        else:
            inverse_hessian = self.h0_scale * eye(weight.size, dtype=int)

        self.prev_weight = weight.copy()
        self.prev_gradient = gradient.copy()
        self.prev_inverse_hessian = inverse_hessian

        return vector_to_list_of_matrix(
            -inverse_hessian.dot(gradient),
            (layer.size for layer in train_layers)
        )
Example #9
0
class ErrorDifferenceStepUpdate(SingleStep):
    """ This algorithm make step update base on error difference between
    epochs.

    Parameters
    ----------
    update_for_smaller_error : float
        Multiplies this option to ``step`` in if the error was less than in
        previous epochs. Defaults to ``1.05``. Value can't be less
        than ``1``.
    update_for_bigger_error : float
        Multiplies this option to ``step`` in if the error was more than in
        previous epochs. Defaults to ``0.7``.
    error_difference : float
        The value indicates how many had to increase the error from the
        previous epochs that would produce a reduction step. Defaults
        to ``1.04``. Value can't be less than ``1``.

    Attributes
    ----------
    {first_step}

    Warns
    -----
    {bp_depending}

    Examples
    --------
    >>> from neupy import algorithms
    >>>
    >>> bpnet = algorithms.Backpropagation(
    ...     (2, 4, 1),
    ...     step=0.1,
    ...     verbose=False,
    ...     optimizations=[algorithms.ErrorDifferenceStepUpdate]
    ... )
    >>>
    """
    update_for_smaller_error = NonNegativeNumberProperty(default=1.05,
                                                         min_size=1)
    update_for_bigger_error = BetweenZeroAndOneProperty(default=0.7)
    error_difference = NonNegativeNumberProperty(default=1.04, min_size=1)

    def new_step(self):
        current_step = self.step

        if not self.errors_in:
            return current_step

        last_error = self.last_error_in()
        previous_error = self.previous_error()

        if previous_error is None:
            return current_step

        elif last_error < previous_error:
            return self.update_for_smaller_error * current_step

        elif last_error >= self.error_difference * previous_error:
            return self.update_for_bigger_error * current_step

        return current_step

    def after_weight_update(self, input_train, target_train):
        super(ErrorDifferenceStepUpdate,
              self).after_weight_update(input_train, target_train)
        self.step = self.new_step()
Example #10
0
class LeakStepAdaptation(MultiSteps):
    """ Leak Learning Rate Adaptation algorithm for step adaptation procedure
    in backpropagation algortihm. By default every layer has the same value
    as ``step`` parameter in network, but after first training epoch they
    must be different.

    Parameters
    ----------
    leak_size : float
        Leak size control ratio of update variable which combine weight
        deltas from previous epochs, defaults to ``0.5``.
    alpha : float
        The ``alpha`` is control total step update ratio (It's similar to
        step role in weight update procedure). Defaults to ``0.5``.
    beta : float
        This similar to ``alpha``, but it control ration only for update
        matrix norms. Defaults to ``0.5``.

    Attributes
    ----------
    {steps}

    Warns
    -----
    {bp_depending}

    Examples
    --------
    >>> from neupy import algorithms
    >>>
    >>> bpnet = algorithms.Backpropagation(
    ...     (2, 4, 1),
    ...     step=0.1,
    ...     verbose=False,
    ...     optimizations=[algorithms.LeakStepAdaptation]
    ... )
    >>>
    """
    leak_size = BetweenZeroAndOneProperty(default=0.5)
    alpha = NonNegativeNumberProperty(default=0.5)
    beta = NonNegativeNumberProperty(default=0.5)

    def init_layers(self):
        super(LeakStepAdaptation, self).init_layers()
        updates = self.updates = []

        for layer in self.train_layers:
            updates.append(zeros(layer.size))

    def after_weight_update(self, input_train, target_train):
        super(LeakStepAdaptation, self).after_weight_update(input_train,
                                                            target_train)
        alpha = self.alpha
        beta = self.beta
        leak_size = self.leak_size

        weight_delta = self.weight_delta
        steps = self.steps
        updates = self.updates

        for i, layer in enumerate(self.train_layers):
            step = steps[i]
            update = updates[i]

            updates[i] = (1 - leak_size) * update + (
                leak_size * weight_delta[i]
            )
            steps[i] += alpha * step * (beta * norm(updates[i]) - step)
Example #11
0
class DeltaBarDelta(MultiSteps):
    beta = BetweenZeroAndOneProperty(default=0.5)
    increase_factor = NonNegativeNumberProperty(default=0.1)
    decrease_factor = BetweenZeroAndOneProperty(default=0.9)