class AveragePooling(BasePooling):
    """
    Average pooling layer.

    Parameters
    ----------
    mode : {{'include_padding', 'exclude_padding'}}
        Gives you the choice to include or exclude padding.
        Defaults to ``include_padding``.
    {BasePooling.Parameters}

    Methods
    -------
    {BasePooling.Methods}

    Attributes
    ----------
    {BasePooling.Attributes}
    """
    mode = ChoiceProperty(default='include_padding',
                          choices={
                              'include_padding': 'average_inc_pad',
                              'exclude_padding': 'average_exc_pad'
                          })

    def output(self, input_value):
        return pool.pool_2d(input_value,
                            ds=self.size,
                            mode=self.mode,
                            ignore_border=True,
                            st=self.stride_size,
                            padding=self.padding)
Exemple #2
0
class AveragePooling(BasePooling):
    """
    Average pooling layer.

    Parameters
    ----------
    mode : {{``include_padding``, ``exclude_padding``}}
        Give a choice to include or exclude padding.
        Defaults to ``include_padding``.

    {BasePooling.Parameters}

    Methods
    -------
    {BasePooling.Methods}

    Attributes
    ----------
    {BasePooling.Attributes}

    Examples
    --------
    2D pooling

    >>> from neupy import layers
    >>>
    >>> network = layers.join(
    ...     layers.Input((3, 10, 10)),
    ...     layers.AveragePooling((2, 2)),
    ... )
    >>> network.output_shape
    (3, 5, 5)

    1D pooling

    >>> from neupy import layers
    >>>
    >>> network = layers.join(
    ...     layers.Input((10, 30)),
    ...     layers.Reshape((10, 30, 1)),
    ...     layers.AveragePooling((2, 1)),
    ... )
    >>> network.output_shape
    (10, 15, 1)
    """
    mode = ChoiceProperty(default='include_padding',
                          choices={
                              'include_padding': 'average_inc_pad',
                              'exclude_padding': 'average_exc_pad'
                          })

    def output(self, input_value):
        return pool.pool_2d(input_value,
                            ws=self.size,
                            mode=self.mode,
                            ignore_border=self.ignore_border,
                            stride=self.stride,
                            pad=self.padding)
Exemple #3
0
class DiscreteMemory(BaseSkeleton, Configurable):
    """
    Base class for discrete memory networks.

    Notes
    -----
    - Input and output vectors should contain only binary values.

    Parameters
    ----------
    mode : {{``sync``, ``async``}}
        Indentify pattern recovery mode.

        - ``sync`` mode tries to recover pattern using all
          values from the input vector.

        - ``async`` mode choose randomly some values from the
          input vector and iteratively repeat this procedure.
          Number of iterations defines by the ``n_times``
          parameter.

        Defaults to ``sync``.

    n_times : int
        Available only in ``async`` mode. Identify number
        of random trials. Defaults to ``100``.
    """

    mode = ChoiceProperty(default='sync', choices=['async', 'sync'])
    n_times = IntProperty(default=100, minval=1)

    def __init__(self, **options):
        super(DiscreteMemory, self).__init__(**options)
        self.weight = None

        if 'n_times' in options and self.mode != 'async':
            self.logs.warning("You can use `n_times` property only in "
                              "`async` mode.")

    def discrete_validation(self, matrix):
        """
        Validate discrete matrix.

        Parameters
        ----------
        matrix : array-like
            Matrix for validation.
        """
        if np.any(~np.isin(matrix, [0, 1])):
            raise ValueError(
                "This network expects only descrete inputs. It mean that "
                "it's possible to can use only matrices with binary values "
                "(0 and 1)."
            )
Exemple #4
0
class DiscreteMemory(BaseSkeleton, Configurable):
    """ Base class for discrete memory networks.

    Notes
    -----
    * {discrete_data_note}
    """
    __discrete_data_note = """ Input and output data must contains only \
    binary values.
    """

    __discrete_params = """mode : {'sync', 'async'}
        Indentify pattern recovery mode. ``sync`` mode try recovery a pattern
        using the all input vector. ``async`` mode randomly chose some
        values from the input vector and repeat this procedure the number
        of times a given variable ``n_times``. Defaults to ``sync``.
    n_times : int
        Available only in ``async`` mode. Identify number of random trials.
        Defaults to ``100``.
    """

    shared_docs = {
        'discrete_data_note': __discrete_data_note,
        'discrete_params': __discrete_params
    }

    mode = ChoiceProperty(default='sync', choices=['async', 'sync'])
    n_times = NonNegativeIntProperty(default=100)

    def __init__(self, **options):
        super(DiscreteMemory, self).__init__(**options)
        self.weight = None

        if 'n_times' in options and self.mode != 'async':
            self.logs.warning("You can use `n_times` property only in "
                              "`async` mode.")

    def discrete_validation(self, matrix):
        """ Validate discrete matrix.

        Parameters
        ----------
        matrix : array-like
            Matrix for validation.

        Returns
        -------
        bool
            Got ``True`` all ``matrix`` discrete values are in
            `discrete_values` list and `False` otherwise.
        """
        if np_any((matrix != 0) & (matrix != 1)):
            raise ValueError("This network is descrete. This mean that you "
                             "can use data which contains 0 and 1 values")
Exemple #5
0
class BaseLayer(with_metaclass(LayerMeta, ChainConnection, BaseConfigurable)):
    """ Base class for all layers.

    Parameters
    ----------
    {layer_params}
    """
    __layer_params = """input_size : int
        Layer input size.
    weight : 2D array-like or None
        Define your layer weights. `None` means that your weights will be
        generate randomly dependence on property `init_method`.
        `None` by default.
    init_method : {'gauss', 'bounded', 'ortho'}
        Weight initialization method.
        `gauss` will generate random weights dependence on Standard
        Normal Distribution.
        `bounded` generate uniform random weghts in initialized bounds.
        `ortho` generate random orthogonal matrix.
    random_weight_bound : tuple of two int
        Available only for `init_method` eqaul to `bounded`, defaults
        to `(0, 1)`.
    """
    shared_docs = {'layer_params': __layer_params}

    input_size = IntProperty()
    weight = ArrayProperty(default=None)
    random_weight_bound = NumberBoundProperty(default=(0, 1))
    init_method = ChoiceProperty(default=GAUSSIAN,
                                 choices=[GAUSSIAN, BOUNDED, ORTHOGONAL])

    def __init__(self, input_size, **options):
        super(BaseLayer, self).__init__()

        self.input_size = input_size
        self.use_bias = False

        # Default variables which will change after initialization
        self.relate_to_layer = None
        self.size = None

        # If you will set class method function variable, python understend
        # that this is new class method and will call it with `self`
        # first parameter.
        if hasattr(self.__class__, 'activation_function'):
            self.activation_function = self.__class__.activation_function

        # Initialize default options
        BaseConfigurable.__init__(self, **options)

    def relate_to(self, right_layer):
        self.relate_to_layer = right_layer

    def initialize(self, with_bias=False):
        self.use_bias = with_bias
        size = self.input_size + self.use_bias
        self.size = (size, self.relate_to_layer.input_size)
        self.weight = self._init_weight()

    # --------------- Weights manipulations --------------- #

    def _init_weight(self):
        if self.weight is not None:
            return self.weight

        init_method = self.init_method

        if init_method == GAUSSIAN:
            return randn(*self.size)

        elif init_method == BOUNDED:
            return random_bounded(self.size, *self.random_weight_bound)

        elif init_method == ORTHOGONAL:
            return random_orthogonal(self.size)

    @property
    def weight_without_bias(self):
        if self.use_bias:
            return self.weight[1:, :]
        return self.weight

    # --------------- Layer operations --------------- #

    def summator(self, input_value):
        return dot(input_value, self.weight)

    def output(self, input_value):
        input_data = self.preformat_input(input_value)
        summated = self.summator(input_data)
        return self.activation_function(summated)

    def preformat_input(self, input_data):
        if self.use_bias:
            input_data = add_bias_column(input_data)
        return input_data

    def __repr__(self):
        return '{name}({size})'.format(name=self.__class__.__name__,
                                       size=self.input_size)
Exemple #6
0
class SOFM(Kohonen):
    """
    Self-Organizing Feature Map (SOFM or SOM).

    Notes
    -----
    - Training data samples should have normalized features.

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    n_outputs : int or None
        Number of outputs. Parameter is optional in case if
        ``feature_grid`` was specified.

        .. code-block:: python

            if n_outputs is None:
                n_outputs = np.prod(feature_grid)

    learning_radius : int
        Parameter defines radius within which we consider all
        neurons as neighbours to the winning neuron. The bigger
        the value the more neurons will be updated after each
        iteration.

        The ``0`` values means that we don't update
        neighbour neurons.

        Defaults to ``0``.

    std : int, float
        Parameters controls learning rate for each neighbour.
        The further neighbour  neuron from the winning neuron
        the smaller that learning rate for it. Learning rate
        scales based on the factors produced by the normal
        distribution with center in the place of a winning
        neuron and standard deviation specified as a parameter.
        The learning rate for the winning neuron is always equal
        to the value specified in the ``step`` parameter and for
        neighbour neurons it's always lower.

        The bigger the value for this parameter the bigger
        learning rate for the neighbour neurons.

        Defaults to ``1``.

    features_grid : list, tuple, None
        Feature grid defines shape of the output neurons.
        The new shape should be compatible with the number
        of outputs. It means that the following condition
        should be true:

        .. code-block:: python

            np.prod(features_grid) == n_outputs

        SOFM implementation supports n-dimensional grids.
        For instance, in order to specify grid as cube instead of
        the regular rectangular shape we can set up options as
        the following:

        .. code-block:: python

            SOFM(
                ...
                features_grid=(5, 5, 5),
                ...
            )

        Defaults to ``(n_outputs, 1)``.

    grid_type : {{``rect``, ``hexagon``}}
        Defines connection type in feature grid. Type defines
        which neurons we will consider as closest to the winning
        neuron during the training.

        - ``rect`` - Connections between neurons will be organized
          in hexagonal grid.

        - ``hexagon`` - Connections between neurons will be organized
          in hexagonal grid. It works only for 1d or 2d grids.

        Defaults to ``rect``.

    distance : {{``euclid``, ``dot_product``, ``cos``}}
        Defines function that will be used to compute
        closest weight to the input sample.

        - ``dot_product``: Just a regular dot product between
          data sample and network's weights

        - ``euclid``: Euclidean distance between data sample
          and network's weights

        - ``cos``: Cosine distance between data sample and
          network's weights

        Defaults to ``euclid``.

    reduce_radius_after : int or None
        Every specified number of epochs ``learning_radius``
        parameter will be reduced by ``1``. Process continues
        until ``learning_radius`` equal to ``0``.

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    reduce_step_after : int or None
        Defines reduction rate at which parameter ``step`` will
        be reduced using the following formula:

        .. code-block:: python

            step = step / (1 + current_epoch / reduce_step_after)

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    reduce_std_after : int or None
        Defines reduction rate at which parameter ``std`` will
        be reduced using the following formula:

        .. code-block:: python

            std = std / (1 + current_epoch / reduce_std_after)

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    weight : array-like, Initializer or {{``init_pca``, ``sample_from_data``}}
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.

        Also, it's possible to initialized weights base on the
        training data. There are two options:

        - ``sample_from_data`` - Before starting the training will
          randomly take number of training samples equal to number
          of expected outputs.

        - ``init_pca`` - Before training starts SOFM will applies PCA
          on a covariance matrix build from the training samples.
          Weights will be generated based on the two eigenvectors
          associated with the largest eigenvalues.

        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.signals}

    {Verbose.verbose}

    Methods
    -------
    init_weights(train_data)
        Initialized weights based on the input data. It works only
        for the `init_pca` and `sample_from_data` options. For other
        cases it will throw an error.

    {BaseSkeleton.predict}

    {BaseAssociative.train}

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms, utils
    >>>
    >>> utils.reproducible()
    >>>
    >>> data = np.array([
    ...     [0.1961, 0.9806],
    ...     [-0.1961, 0.9806],
    ...     [-0.5812, -0.8137],
    ...     [-0.8137, -0.5812],
    ... ])
    >>>
    >>> sofm = algorithms.SOFM(
    ...     n_inputs=2,
    ...     n_outputs=2,
    ...     step=0.1,
    ...     learning_radius=0
    ... )
    >>> sofm.train(data, epochs=100)
    >>> sofm.predict(data)
    array([[0, 1],
           [0, 1],
           [1, 0],
           [1, 0]])
    """
    n_outputs = IntProperty(minval=1, allow_none=True, default=None)
    weight = SOFMWeightParameter(default=init.Normal(),
                                 choices={
                                     'init_pca': linear_initialization,
                                     'sample_from_data': sample_data,
                                 })
    features_grid = TypedListProperty(allow_none=True, default=None)

    DistanceParameter = namedtuple('DistanceParameter', 'name func')
    distance = ChoiceProperty(default='euclid',
                              choices={
                                  'dot_product':
                                  DistanceParameter(name='dot_product',
                                                    func=np.dot),
                                  'euclid':
                                  DistanceParameter(name='euclid',
                                                    func=neg_euclid_distance),
                                  'cos':
                                  DistanceParameter(name='cosine',
                                                    func=cosine_similarity),
                              })

    GridTypeMethods = namedtuple('GridTypeMethods',
                                 'name find_neighbours find_step_scaler')

    grid_type = ChoiceProperty(
        default='rect',
        choices={
            'rect':
            GridTypeMethods(name='rectangle',
                            find_neighbours=find_neighbours_on_rect_grid,
                            find_step_scaler=find_step_scaler_on_rect_grid),
            'hexagon':
            GridTypeMethods(name='hexagon',
                            find_neighbours=find_neighbours_on_hexagon_grid,
                            find_step_scaler=find_step_scaler_on_hexagon_grid)
        })

    learning_radius = IntProperty(default=0, minval=0)
    std = NumberProperty(minval=0, default=1)

    reduce_radius_after = IntProperty(default=100, minval=1, allow_none=True)
    reduce_std_after = IntProperty(default=100, minval=1, allow_none=True)
    reduce_step_after = IntProperty(default=100, minval=1, allow_none=True)

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)

        if self.n_outputs is None and self.features_grid is None:
            raise ValueError("One of the following parameters has to be "
                             "specified: n_outputs, features_grid")

        elif self.n_outputs is None:
            self.n_outputs = np.prod(self.features_grid)

        n_grid_elements = np.prod(self.features_grid)
        invalid_feature_grid = (self.features_grid is not None
                                and n_grid_elements != self.n_outputs)

        if invalid_feature_grid:
            raise ValueError(
                "Feature grid should contain the same number of elements "
                "as in the output layer: {0}, but found: {1} (shape: {2})"
                "".format(self.n_outputs, n_grid_elements, self.features_grid))

        if self.features_grid is None:
            self.features_grid = (self.n_outputs, 1)

        if len(self.features_grid) > 2 and self.grid_type.name == 'hexagon':
            raise ValueError("SOFM with hexagon grid type should have "
                             "one or two dimensional feature grid, but got "
                             "{}d instead (shape: {!r})".format(
                                 len(self.features_grid), self.features_grid))

        is_pca_init = (isinstance(options.get('weight'), six.string_types)
                       and options.get('weight') == 'init_pca')

        self.initialized = False
        if not callable(self.weight):
            super(Kohonen, self).init_weights()
            self.initialized = True

            if self.distance.name == 'cosine':
                self.weight /= np.linalg.norm(self.weight, axis=0)

        elif is_pca_init and self.grid_type.name != 'rectangle':
            raise WeightInitializationError(
                "Cannot apply PCA weight initialization for non-rectangular "
                "grid. Grid type: {}".format(self.grid_type.name))

    def predict_raw(self, X):
        X = format_data(X, is_feature1d=(self.n_inputs == 1))

        if X.ndim != 2:
            raise ValueError("Only 2D inputs are allowed")

        n_samples = X.shape[0]
        output = np.zeros((n_samples, self.n_outputs))

        for i, input_row in enumerate(X):
            output[i, :] = self.distance.func(input_row.reshape(1, -1),
                                              self.weight)

        return output

    def update_indexes(self, layer_output):
        neuron_winner = layer_output.argmax(axis=1).item(0)
        winner_neuron_coords = np.unravel_index(neuron_winner,
                                                self.features_grid)

        learning_radius = self.learning_radius
        step = self.step
        std = self.std

        if self.reduce_radius_after is not None:
            learning_radius -= self.last_epoch // self.reduce_radius_after
            learning_radius = max(0, learning_radius)

        if self.reduce_step_after is not None:
            step = decay_function(step, self.last_epoch,
                                  self.reduce_step_after)

        if self.reduce_std_after is not None:
            std = decay_function(std, self.last_epoch, self.reduce_std_after)

        methods = self.grid_type
        output_grid = np.reshape(layer_output, self.features_grid)

        output_with_neighbours = methods.find_neighbours(
            grid=output_grid,
            center=winner_neuron_coords,
            radius=learning_radius)

        step_scaler = methods.find_step_scaler(grid=output_grid,
                                               center=winner_neuron_coords,
                                               std=std)

        index_y, = np.nonzero(output_with_neighbours.reshape(self.n_outputs))

        step_scaler = step_scaler.reshape(self.n_outputs)
        return index_y, step * step_scaler[index_y]

    def init_weights(self, X_train):
        if self.initialized:
            raise WeightInitializationError(
                "Weights have been already initialized")

        weight_initializer = self.weight
        self.weight = weight_initializer(X_train, self.features_grid)
        self.initialized = True

        if self.distance.name == 'cosine':
            self.weight /= np.linalg.norm(self.weight, axis=0)

    def train(self, X_train, epochs=100):
        if not self.initialized:
            self.init_weights(X_train)
        super(SOFM, self).train(X_train, epochs=epochs)

    def one_training_update(self, X_train, y_train=None):
        step = self.step
        predict = self.predict
        update_indexes = self.update_indexes

        error = 0
        for input_row in X_train:
            input_row = np.reshape(input_row, (1, input_row.size))
            layer_output = predict(input_row)

            index_y, step = update_indexes(layer_output)
            distance = input_row.T - self.weight[:, index_y]
            updated_weights = (self.weight[:, index_y] + step * distance)

            if self.distance.name == 'cosine':
                updated_weights /= np.linalg.norm(updated_weights, axis=0)

            self.weight[:, index_y] = updated_weights
            error += np.abs(distance).mean()

        return error / len(X_train)
Exemple #7
0
class QuasiNewton(Backpropagation):
    """ Quasi-Newton :network:`Backpropagation` algorithm optimization.

    Parameters
    ----------
    update_function : {{'bfgs', 'dfp', 'psb', 'sr1'}}
        Update function. Defaults to ``bfgs``.
    h0_scale : float
        Factor that scale indentity matrix H0 on the first
        iteration step. Defaults to ``1``.
    gradient_tol : float
        In the gradient less than this value algorithm will stop training
        procedure. Defaults to ``1e-5``.
    {optimizations}
    {raw_predict_param}
    {full_params}

    Methods
    -------
    {supervised_train}
    {full_methods}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qnnet = algorithms.QuasiNewton(
    ...     (2, 3, 1),
    ...     update_function='bfgs',
    ...     verbose=False
    ... )
    >>> qnnet.train(x_train, y_train)

    See Also
    --------
    :network:`Backpropagation` : Backpropagation algorithm.
    """
    update_function = ChoiceProperty(
        default='bfgs',
        choices={
            'bfgs': bfgs,
            'dfp': dfp,
            'psb': psb,
            'sr1': sr1,
        }
    )
    h0_scale = NonNegativeNumberProperty(default=1)
    gradient_tol = BetweenZeroAndOneProperty(default=1e-5)

    default_optimizations = [WolfeSearch]

    def get_weight_delta(self, output_train, target_train):
        gradients = self.get_gradient(output_train, target_train)
        gradient = matrix_list_in_one_vector(gradients)

        if norm(gradient) < self.gradient_tol:
            raise StopIteration("Gradient norm less than {}"
                                "".format(self.gradient_tol))

        train_layers = self.train_layers
        weight = matrix_list_in_one_vector(
            (layer.weight for layer in train_layers)
        )

        if hasattr(self, 'prev_gradient'):
            # In first epoch we didn't have previous weights and
            # gradients. For this reason we skip quasi coefitient
            # computation.
            inverse_hessian = self.update_function(
                self.prev_inverse_hessian,
                weight - self.prev_weight,
                gradient - self.prev_gradient
            )
        else:
            inverse_hessian = self.h0_scale * eye(weight.size, dtype=int)

        self.prev_weight = weight.copy()
        self.prev_gradient = gradient.copy()
        self.prev_inverse_hessian = inverse_hessian

        return vector_to_list_of_matrix(
            -inverse_hessian.dot(gradient),
            (layer.size for layer in train_layers)
        )
Exemple #8
0
class ConjugateGradient(WolfeLineSearchForStep, BaseOptimizer):

    """
    Conjugate Gradient algorithm.

    Parameters
    ----------
    update_function : ``fletcher_reeves``, ``polak_ribiere``,\
    ``hentenes_stiefel``, ``dai_yuan``, ``liu_storey``
        Update function. Defaults to ``fletcher_reeves``.

    epsilon : float
        Ensures computational stability during the division in
        ``update_function`` when denominator is very small number.
        Defaults to ``1e-7``.

    {WolfeLineSearchForStep.Parameters}

    {BaseOptimizer.network}

    {BaseOptimizer.loss}

    {BaseOptimizer.show_epoch}

    {BaseOptimizer.shuffle_data}

    {BaseOptimizer.signals}

    {BaseOptimizer.verbose}

    {BaseOptimizer.regularizer}

    Attributes
    ----------
    {BaseOptimizer.Attributes}

    Methods
    -------
    {BaseOptimizer.Methods}

    Examples
    --------
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.model_selection import train_test_split
    >>> from neupy import algorithms, layers
    >>>
    >>> dataset = datasets.load_boston()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     test_size=0.15
    ... )
    >>>
    >>> cgnet = algorithms.ConjugateGradient(
    ...     network=[
    ...         layers.Input(13),
    ...         layers.Sigmoid(50),
    ...         layers.Sigmoid(1),
    ...     ],
    ...     update_function='fletcher_reeves',
    ...     verbose=False
    ... )
    >>>
    >>> cgnet.train(x_train, y_train, epochs=100)
    >>> y_predict = cgnet.predict(x_test).round(1)
    >>>
    >>> real = target_scaler.inverse_transform(y_test)
    >>> predicted = target_scaler.inverse_transform(y_predict)

    References
    ----------
    [1] Jorge Nocedal, Stephen J. Wright, Numerical Optimization.
        Chapter 5, Conjugate Gradient Methods, p. 101-133
    """
    epsilon = NumberProperty(default=1e-7, minval=0)
    update_function = ChoiceProperty(
        default='fletcher_reeves',
        choices={
            'fletcher_reeves': fletcher_reeves,
            'polak_ribiere': polak_ribiere,
            'hentenes_stiefel': hentenes_stiefel,
            'liu_storey': liu_storey,
            'dai_yuan': dai_yuan,
        }
    )
    step = WithdrawProperty()

    def init_functions(self):
        n_parameters = self.network.n_parameters
        self.variables.update(
            prev_delta=tf.Variable(
                tf.zeros([n_parameters]),
                name="conj-grad/prev-delta",
                dtype=tf.float32,
            ),
            prev_gradient=tf.Variable(
                tf.zeros([n_parameters]),
                name="conj-grad/prev-gradient",
                dtype=tf.float32,
            ),
            iteration=tf.Variable(
                asfloat(self.last_epoch),
                name='conj-grad/current-iteration',
                dtype=tf.float32
            ),
        )
        super(ConjugateGradient, self).init_functions()

    def init_train_updates(self):
        iteration = self.variables.iteration
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = self.network.n_parameters
        variables = self.network.variables
        parameters = [var for var in variables.values() if var.trainable]
        param_vector = make_single_vector(parameters)

        gradients = tf.gradients(self.variables.loss, parameters)
        full_gradient = make_single_vector(gradients)

        beta = self.update_function(
            previous_gradient, full_gradient, previous_delta, self.epsilon)

        parameter_delta = tf.where(
            tf.equal(tf.mod(iteration, n_parameters), 0),
            -full_gradient,
            -full_gradient + beta * previous_delta
        )

        step = self.find_optimal_step(param_vector, parameter_delta)
        updated_parameters = param_vector + step * parameter_delta
        updates = setup_parameter_updates(parameters, updated_parameters)

        # We have to compute these values first, otherwise
        # parallelization, in tensorflow, can mix update order
        # and, for example, previous gradient can be equal to
        # current gradient value. It happens because tensorflow
        # try to execute operations in parallel.
        with tf.control_dependencies([full_gradient, parameter_delta]):
            updates.extend([
                previous_gradient.assign(full_gradient),
                previous_delta.assign(parameter_delta),
                iteration.assign(iteration + 1),
            ])

        return updates
Exemple #9
0
class BasePooling(BaseLayer):
    """
    Base class for the pooling layers.

    Parameters
    ----------
    size : tuple with 2 integers
        Factor by which to downscale ``(vertical, horizontal)``.
        ``(2, 2)`` will halve the image in each dimension.

    stride : tuple or int.
        Stride size, which is the number of shifts over
        rows/cols to get the next pool region. If stride is
        None, it is considered equal to ds (no overlap on
        pooling regions).

    padding : {{``valid``, ``same``}}
        ``(pad_h, pad_w)``, pad zeros to extend beyond four borders of
        the images, pad_h is the size of the top and bottom margins,
        and pad_w is the size of the left and right margins.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}
    """
    size = TypedListProperty(required=True, element_type=int)
    stride = Spatial2DProperty(allow_none=True)
    padding = ChoiceProperty(choices=('SAME', 'VALID', 'same', 'valid'))
    pooling_type = None

    def __init__(self, size, stride=None, padding='valid', name=None):
        super(BasePooling, self).__init__(name=name)

        self.size = size
        self.stride = stride
        self.padding = padding

    def fail_if_shape_invalid(self, input_shape):
        if input_shape and input_shape.ndims != 4:
            raise LayerConnectionError(
                "Pooling layer expects an input with 4 "
                "dimensions, got {} with shape {}. Layer: {}"
                "".format(len(input_shape), input_shape, self))

    def get_output_shape(self, input_shape):
        input_shape = tf.TensorShape(input_shape)

        if input_shape.ndims is None:
            return tf.TensorShape((None, None, None, None))

        self.fail_if_shape_invalid(input_shape)

        n_samples, rows, cols, n_kernels = input_shape
        row_filter_size, col_filter_size = self.size

        stride = self.size if self.stride is None else self.stride
        row_stride, col_stride = stride

        output_rows = pooling_output_shape(
            rows, row_filter_size, self.padding, row_stride)

        output_cols = pooling_output_shape(
            cols, col_filter_size, self.padding, col_stride)

        # In python 2, we can get float number after rounding procedure
        # and it might break processing in the subsequent layers.
        return tf.TensorShape((n_samples, output_rows, output_cols, n_kernels))

    def output(self, input_value, **kwargs):
        return tf.nn.pool(
            input_value,
            self.size,
            pooling_type=self.pooling_type,
            padding=self.padding.upper(),
            strides=self.stride or self.size,
            data_format="NHWC")

    def __repr__(self):
        return self._repr_arguments(
            self.size,
            name=self.name,
            stride=self.stride,
            padding=self.padding,
        )
Exemple #10
0
class ConjugateGradient(Backpropagation):
    """ Conjugate Gradient algorithm.

    Parameters
    ----------
    update_function : {{'fletcher_reeves', 'polak_ribiere',\
    'hentenes_stiefel', 'conjugate_descent', 'liu_storey', 'dai_yuan'}}
        Update function. Defaults to ``fletcher_reeves``.
    {optimizations}
    {full_params}

    Methods
    -------
    {supervised_train}
    {raw_predict}
    {full_methods}

    Examples
    --------
    >>> import numpy as np
    >>> np.random.seed(0)
    >>>
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, layers
    >>> from neupy.functions import rmsle
    >>>
    >>> dataset = datasets.load_boston()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.85
    ... )
    >>>
    >>> cgnet = algorithms.ConjugateGradient(
    ...     connection=[
    ...         layers.SigmoidLayer(13),
    ...         layers.SigmoidLayer(50),
    ...         layers.OutputLayer(1),
    ...     ],
    ...     search_method='golden',
    ...     update_function='fletcher_reeves',
    ...     optimizations=[algorithms.LinearSearch],
    ...     verbose=False
    ... )
    >>>
    >>> cgnet.train(x_train, y_train, epochs=100)
    >>> y_predict = cgnet.predict(x_test)
    >>>
    >>> real = target_scaler.inverse_transform(y_test)
    >>> predicted = target_scaler.inverse_transform(y_predict)
    >>>
    >>> error = rmsle(real, predicted.round(1))
    >>> error
    0.20752676697596578

    See Also
    --------
    :network:`Backpropagation`: Backpropagation algorithm.
    :network:`LinearSearch`: Linear Search important algorithm for step \
    selection in Conjugate Gradient algorithm.
    """
    update_function = ChoiceProperty(default='fletcher_reeves',
                                     choices={
                                         'fletcher_reeves': fletcher_reeves,
                                         'polak_ribiere': polak_ribiere,
                                         'hentenes_stiefel': hentenes_stiefel,
                                         'conjugate_descent':
                                         conjugate_descent,
                                         'liu_storey': liu_storey,
                                         'dai_yuan': dai_yuan,
                                     })

    def init_layers(self):
        super(ConjugateGradient, self).init_layers()
        self.n_weights = sum(mul(*layer.size) for layer in self.train_layers)

    def get_weight_delta(self, output_train, target_train):
        gradients = super(ConjugateGradient,
                          self).get_gradient(output_train, target_train)
        epoch = self.epoch
        gradient = matrix_list_in_one_vector(gradients)
        weight_delta = -gradient

        if epoch > 1 and epoch % self.n_weights == 0:
            # Must reset after every N iteration, because algoritm
            # lose conjugacy.
            self.logs.info("TRAIN", "Reset conjugate gradient vector")
            del self.prev_gradient

        if hasattr(self, 'prev_gradient'):
            gradient_old = self.prev_gradient
            weight_delta_old = self.prev_weight_delta
            beta = self.update_function(gradient_old, gradient,
                                        weight_delta_old)

            weight_delta += beta * weight_delta_old

        weight_deltas = vector_to_list_of_matrix(
            weight_delta, (layer.size for layer in self.train_layers))

        self.prev_weight_delta = weight_delta.copy()
        self.prev_gradient = gradient.copy()

        return weight_deltas
Exemple #11
0
class SOFM(Kohonen):
    """
    Self-Organizing Feature Map (SOFM).

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    {BaseAssociative.n_outputs}

    learning_radius : int
        Learning radius.

    features_grid : list, tuple, None
        Feature grid defines shape of the output neurons.
        The new shape should be compatible with the number
        of outputs. Defaults to ``(n_outputs, 1)``.

    transform : {{``linear``, ``euclid``, ``cos``}}
        Indicate transformation operation related to the
        input layer.

        - The ``linear`` value mean that input data would be
          multiplied by weights in typical way.

        - The ``euclid`` method will identify the closest
          weight vector to the input one.

        - The ``cos`` transformation identifies cosine
          similarity between input dataset and
          network's weights.

        Defaults to ``linear``.

    {BaseAssociative.weight}

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    {BaseAssociative.train}

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> data = np.array([
    ...     [0.1961, 0.9806],
    ...     [-0.1961, 0.9806],
    ...     [-0.5812, -0.8137],
    ...     [-0.8137, -0.5812],
    ... ])
    >>>
    >>> sofmnet = algorithms.SOFM(
    ...     n_inputs=2,
    ...     n_outputs=2,
    ...     step=0.1,
    ...     learning_radius=0,
    ...     features_grid=(2, 1),
    ... )
    >>> sofmnet.train(data, epochs=100)
    >>> sofmnet.predict(data)
    array([[0, 1],
           [0, 1],
           [1, 0],
           [1, 0]])
    """
    learning_radius = IntProperty(default=0, minval=0)
    features_grid = TypedListProperty(allow_none=True, default=None)
    transform = ChoiceProperty(default='linear',
                               choices={
                                   'linear': np.dot,
                                   'euclid': neg_euclid_distance,
                                   'cos': cosine_similarity,
                               })

    def __init__(self, **options):
        super(SOFM, self).__init__(**options)

        invalid_feature_grid = (self.features_grid is not None
                                and mul(*self.features_grid) != self.n_outputs)
        if invalid_feature_grid:
            raise ValueError(
                "Feature grid should contain the same number of elements as "
                "in the output layer: {0}, but found: {1} ({2}x{3})"
                "".format(self.n_outputs, mul(*self.features_grid),
                          self.features_grid[0], self.features_grid[1]))

        if self.features_grid is None:
            self.features_grid = (self.n_outputs, 1)

    def predict_raw(self, input_data):
        input_data = format_data(input_data)
        n_samples = input_data.shape[0]
        output = np.zeros((n_samples, self.n_outputs))

        for i, input_row in enumerate(input_data):
            output[i, :] = self.transform(input_row.reshape(1, -1),
                                          self.weight)

        return output

    def update_indexes(self, layer_output):
        neuron_winner = layer_output.argmax(axis=1)
        feature_bound = self.features_grid[1]

        output_with_neightbours = neuron_neighbours(
            np.reshape(layer_output, self.features_grid),
            (neuron_winner // feature_bound, neuron_winner % feature_bound),
            self.learning_radius)
        index_y, _ = np.nonzero(
            np.reshape(output_with_neightbours, (self.n_outputs, 1)))
        return index_y
Exemple #12
0
 class A(Configurable):
     choice = ChoiceProperty(choices='test')
Exemple #13
0
 class A(Configurable):
     choice = ChoiceProperty(choices=['one', 'two', 'three'],
                             default='two')
Exemple #14
0
 class A(Configurable):
     choice = ChoiceProperty(choices={'one': 1, 'two': 2, 'three': 3})
Exemple #15
0
class LevenbergMarquardt(BaseOptimizer):
    """
    Levenberg-Marquardt algorithm is a variation of the Newton's method.
    It minimizes MSE error. The algorithm approximates Hessian matrix using
    dot product between two jacobian matrices.

    Notes
    -----
    - Method requires all training data during propagation, which means
      it's not allowed to use mini-batches.

    - Network minimizes only Mean Squared Error (MSE) loss function.

    - Efficient for small training datasets, because it
      computes gradient per each sample separately.

    - Efficient for small-sized networks.

    Parameters
    ----------
    {BaseOptimizer.network}

    mu : float
        Control invertion for J.T * J matrix, defaults to ``0.1``.

    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor. Defaults to ``1.2``

    error : {{``mse``}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.

    {BaseOptimizer.show_epoch}

    {BaseOptimizer.shuffle_data}

    {BaseOptimizer.signals}

    {BaseOptimizer.verbose}

    Attributes
    ----------
    {BaseOptimizer.Attributes}

    Methods
    -------
    {BaseOptimizer.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>> from neupy.layers import *
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> network = Input(2) >> Sigmoid(3) >> Sigmoid(1)
    >>> optimizer = algorithms.LevenbergMarquardt(network)
    >>> optimizer.train(x_train, y_train)

    See Also
    --------
    :network:`BaseOptimizer` : BaseOptimizer algorithm.
    """
    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=1.2, minval=1)
    loss = ChoiceProperty(default='mse', choices={'mse': objectives.mse})

    step = WithdrawProperty()
    regularizer = WithdrawProperty()

    def init_functions(self):
        self.variables.update(
            mu=tf.Variable(self.mu, name='lev-marq/mu'),
            last_error=tf.Variable(np.nan, name='lev-marq/last-error'),
        )
        super(LevenbergMarquardt, self).init_functions()

    def init_train_updates(self):
        training_outputs = self.network.training_outputs
        last_error = self.variables.last_error
        error_func = self.variables.loss
        mu = self.variables.mu

        new_mu = tf.where(
            tf.less(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        err_for_each_sample = flatten((self.target - training_outputs)**2)

        variables = self.network.variables
        params = [var for var in variables.values() if var.trainable]
        param_vector = make_single_vector(params)

        J = compute_jacobian(err_for_each_sample, params)
        J_T = tf.transpose(J)
        n_params = J.shape[1]

        parameter_update = tf.matrix_solve(
            tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value),
            tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1)))
        updated_params = param_vector - flatten(parameter_update)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def one_training_update(self, X_train, y_train):
        if self.errors.train:
            last_error = self.errors.train[-1]
            self.variables.last_error.load(last_error, tensorflow_session())

        return super(LevenbergMarquardt,
                     self).one_training_update(X_train, y_train)
Exemple #16
0
class ConjugateGradient(NoMultipleStepSelection, GradientDescent):
    """
    Conjugate Gradient algorithm.

    Parameters
    ----------
    update_function : {{``fletcher_reeves``, ``polak_ribiere``,\
    ``hentenes_stiefel``, ``conjugate_descent``, ``liu_storey``,\
    ``dai_yuan``}}
        Update function. Defaults to ``fletcher_reeves``.

    {GradientDescent.Parameters}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    Examples
    --------
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.model_selection import train_test_split
    >>> from neupy import algorithms, layers, estimators, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_boston()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     test_size=0.15
    ... )
    >>>
    >>> cgnet = algorithms.ConjugateGradient(
    ...     connection=[
    ...         layers.Input(13),
    ...         layers.Sigmoid(50),
    ...         layers.Sigmoid(1),
    ...     ],
    ...     search_method='golden',
    ...     update_function='fletcher_reeves',
    ...     addons=[algorithms.LinearSearch],
    ...     verbose=False
    ... )
    >>>
    >>> cgnet.train(x_train, y_train, epochs=100)
    >>> y_predict = cgnet.predict(x_test).round(1)
    >>>
    >>> real = target_scaler.inverse_transform(y_test)
    >>> predicted = target_scaler.inverse_transform(y_predict)
    >>>
    >>> error = estimators.rmsle(real, predicted)
    >>> error
    0.2472330191179734

    See Also
    --------
    :network:`GradientDescent`: GradientDescent algorithm.
    :network:`LinearSearch`: Linear Search important algorithm for step \
    selection in Conjugate Gradient algorithm.
    """
    update_function = ChoiceProperty(default='fletcher_reeves',
                                     choices={
                                         'fletcher_reeves': fletcher_reeves,
                                         'polak_ribiere': polak_ribiere,
                                         'hentenes_stiefel': hentenes_stiefel,
                                         'conjugate_descent':
                                         conjugate_descent,
                                         'liu_storey': liu_storey,
                                         'dai_yuan': dai_yuan,
                                     })

    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self.connection)

        self.variables.update(prev_delta=theano.shared(
            name="conj-grad/prev-delta",
            value=asfloat(np.zeros(n_parameters)),
        ),
                              prev_gradient=theano.shared(
                                  name="conj-grad/prev-gradient",
                                  value=asfloat(np.zeros(n_parameters)),
                              ))

    def init_train_updates(self):
        step = self.variables.step
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in parameters])

        gradients = T.grad(self.variables.error_func, wrt=parameters)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        beta = self.update_function(previous_gradient, full_gradient,
                                    previous_delta)
        parameter_delta = ifelse(
            T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient,
            -full_gradient + beta * previous_delta)
        updated_parameters = param_vector + step * parameter_delta

        updates = [
            (previous_gradient, full_gradient),
            (previous_delta, parameter_delta),
        ]
        parameter_updates = setup_parameter_updates(parameters,
                                                    updated_parameters)
        updates.extend(parameter_updates)

        return updates
Exemple #17
0
class ConjugateGradient(NoMultipleStepSelection, GradientDescent):
    """ Conjugate Gradient algorithm.

    Parameters
    ----------
    update_function : {{'fletcher_reeves', 'polak_ribiere',\
    'hentenes_stiefel', 'conjugate_descent', 'liu_storey', 'dai_yuan'}}
        Update function. Defaults to ``fletcher_reeves``.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {ConstructableNetwork.error}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    Examples
    --------
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, layers, estimators, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_boston()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.85
    ... )
    >>>
    >>> cgnet = algorithms.ConjugateGradient(
    ...     connection=[
    ...         layers.Sigmoid(13),
    ...         layers.Sigmoid(50),
    ...         layers.RoundedOutput(1, decimals=1),
    ...     ],
    ...     search_method='golden',
    ...     update_function='fletcher_reeves',
    ...     addons=[algorithms.LinearSearch],
    ...     verbose=False
    ... )
    >>>
    >>> cgnet.train(x_train, y_train, epochs=100)
    >>> y_predict = cgnet.predict(x_test)
    >>>
    >>> real = target_scaler.inverse_transform(y_test)
    >>> predicted = target_scaler.inverse_transform(y_predict)
    >>>
    >>> error = estimators.rmsle(real, predicted)
    >>> error
    0.20752676697596578

    See Also
    --------
    :network:`GradientDescent`: GradientDescent algorithm.
    :network:`LinearSearch`: Linear Search important algorithm for step \
    selection in Conjugate Gradient algorithm.
    """
    update_function = ChoiceProperty(default='fletcher_reeves',
                                     choices={
                                         'fletcher_reeves': fletcher_reeves,
                                         'polak_ribiere': polak_ribiere,
                                         'hentenes_stiefel': hentenes_stiefel,
                                         'conjugate_descent':
                                         conjugate_descent,
                                         'liu_storey': liu_storey,
                                         'dai_yuan': dai_yuan,
                                     })

    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self)

        self.variables.update(prev_delta=theano.shared(
            name="prev_delta",
            value=asfloat(np.zeros(n_parameters)),
        ),
                              prev_gradient=theano.shared(
                                  name="prev_gradient",
                                  value=asfloat(np.zeros(n_parameters)),
                              ))

    def init_train_updates(self):
        step = self.variables.step
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self)
        parameters = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=parameters)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        beta = self.update_function(previous_gradient, full_gradient,
                                    previous_delta)
        parameter_delta = ifelse(
            T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient,
            -full_gradient + beta * previous_delta)
        updated_parameters = param_vector + step * parameter_delta

        updates = [
            (previous_gradient, full_gradient),
            (previous_delta, parameter_delta),
        ]
        parameter_updates = setup_parameter_updates(parameters,
                                                    updated_parameters)
        updates.extend(parameter_updates)

        return updates
class QuasiNewton(NoStepSelection, GradientDescent):
    """
    Quasi-Newton algorithm optimization.

    Parameters
    ----------
    {GradientDescent.Parameters}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qnnet = algorithms.QuasiNewton(
    ...     (2, 3, 1),
    ...     update_function='bfgs',
    ...     verbose=False
    ... )
    >>> qnnet.train(x_train, y_train, epochs=10)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """
    update_function = ChoiceProperty(default='bfgs',
                                     choices={
                                         'bfgs': bfgs,
                                         'dfp': dfp,
                                         'psb': psb,
                                         'sr1': sr1,
                                     })
    h0_scale = NumberProperty(default=1, minval=0)
    gradient_tol = ProperFractionProperty(default=1e-5)

    def init_variables(self):
        super(QuasiNewton, self).init_variables()
        n_params = sum(p.get_value().size for p in iter_parameters(self))
        self.variables.update(
            inv_hessian=theano.shared(
                name='inv_hessian',
                value=asfloat(self.h0_scale * np.eye(int(n_params))),
            ),
            prev_params=theano.shared(
                name='prev_params',
                value=asfloat(np.zeros(n_params)),
            ),
            prev_full_gradient=theano.shared(
                name='prev_full_gradient',
                value=asfloat(np.zeros(n_params)),
            ),
        )

    def init_train_updates(self):
        network_input = self.variables.network_input
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1), inv_hessian,
            self.update_function(inv_hessian, param_vector - prev_params,
                                 full_gradient - prev_full_gradient))
        param_delta = -new_inv_hessian.dot(full_gradient)

        def prediction(step):
            # TODO: I need to update this ugly solution later
            updated_params = param_vector + step * param_delta

            layer_input = network_input
            start_pos = 0
            for layer in self.layers:
                for param in layer.parameters:
                    end_pos = start_pos + param.size
                    parameter_name, parameter_id = param.name.split('_')
                    setattr(
                        layer, parameter_name,
                        T.reshape(updated_params[start_pos:end_pos],
                                  param.shape))
                    start_pos = end_pos
                layer_input = layer.output(layer_input)
            return layer_input

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates
Exemple #19
0
class LevenbergMarquardt(NoStepSelection, GradientDescent):
    """ Levenberg-Marquardt algorithm.

    Notes
    -----
    * Network minimizes only Mean Squared Error function.

    Parameters
    ----------
    mu : float
        Control invertion for J.T * J matrix, defaults to `0.1`.
    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor.
    error: {{'mse'}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    Examples
    --------
    Simple example

    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> lmnet = algorithms.LevenbergMarquardt(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> lmnet.train(x_train, y_train)

    Diabets dataset example

    >>> import numpy as np
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, layers
    >>> from neupy.estimators import rmsle
    >>>
    >>> dataset = datasets.load_diabetes()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.85
    ... )
    >>>
    >>> # Network
    ... lmnet = algorithms.LevenbergMarquardt(
    ...     connection=[
    ...         layers.Sigmoid(10),
    ...         layers.Sigmoid(40),
    ...         layers.Output(1),
    ...     ],
    ...     mu_update_factor=2,
    ...     mu=0.1,
    ...     step=0.25,
    ...     show_epoch=10,
    ...     use_bias=False,
    ...     verbose=False
    ... )
    >>> lmnet.train(x_train, y_train, epochs=100)
    >>> y_predict = lmnet.predict(x_test)
    >>>
    >>> error = rmsle(target_scaler.inverse_transform(y_test),
    ...               target_scaler.inverse_transform(y_predict).round())
    >>> error
    0.47548200957888398

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """

    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=5, minval=1)
    error = ChoiceProperty(default='mse', choices={'mse': errors.mse})

    def init_variables(self):
        super(LevenbergMarquardt, self).init_variables()
        self.variables.update(
            mu=theano.shared(name='mu', value=asfloat(self.mu)),
            last_error=theano.shared(name='last_error', value=np.nan),
        )

    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        mse_for_each_sample = T.mean((network_output - prediction_func)**2,
                                     axis=1)

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        J = compute_jaccobian(mse_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - T.nlinalg.matrix_inverse(
            J.T.dot(J) + new_mu * T.eye(n_params)).dot(
                J.T).dot(mse_for_each_sample)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def on_epoch_start_update(self, epoch):
        super(LevenbergMarquardt, self).on_epoch_start_update(epoch)

        last_error = self.errors.last()
        if last_error is not None:
            self.variables.last_error.set_value(last_error)
Exemple #20
0
class QuasiNewton(WolfeLineSearchForStep, BaseGradientDescent):
    """
    Quasi-Newton algorithm. Every iteration quasi-Network method approximates
    inverse Hessian matrix with iterative updates. It doesn't have ``step``
    parameter. Instead, algorithm applies line search for the step parameter
    that satisfies strong Wolfe condition. Parameters that control wolfe
    search start with the ``wolfe_`` prefix.

    Parameters
    ----------
    update_function : ``bfgs``, ``dfp``, ``sr1``
        Update function for the iterative inverse hessian matrix
        approximation. Defaults to ``bfgs``.

        - ``bfgs`` -  It's rank 2 formula update. It can suffer from
          round-off error and inaccurate line searches.

        - ``dfp`` - DFP is a method very similar to BFGS. It's rank 2 formula
          update. It can suffer from round-off error and inaccurate line
          searches.

        - ``sr1`` - Symmetric rank 1 (SR1). Generates update for the
          inverse hessian matrix adding symmetric rank-1 matrix. It's
          possible that there is no rank 1 updates for the matrix and in
          this case update won't be applied and original inverse hessian
          will be returned.

    h0_scale : float
        Default Hessian matrix is an identity matrix. The
        ``h0_scale`` parameter scales identity matrix.
        Defaults to ``1``.

    epsilon : float
        Controls numerical stability for the ``update_function`` parameter.
        Defaults to ``1e-7``.

    {WolfeLineSearchForStep.Parameters}

    {BaseGradientDescent.connection}

    {BaseGradientDescent.error}

    {BaseGradientDescent.show_epoch}

    {BaseGradientDescent.shuffle_data}

    {BaseGradientDescent.epoch_end_signal}

    {BaseGradientDescent.train_end_signal}

    {BaseGradientDescent.verbose}

    {BaseGradientDescent.addons}

    Notes
    -----
    - Method requires all training data during propagation, which means
      it's not allowed to use mini-batches.

    Attributes
    ----------
    {BaseGradientDescent.Attributes}

    Methods
    -------
    {BaseGradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qnnet = algorithms.QuasiNewton(
    ...     (2, 3, 1),
    ...     update_function='bfgs'
    ... )
    >>> qnnet.train(x_train, y_train, epochs=10)

    References
    ----------
    [1] Yang Ding, Enkeleida Lushi, Qingguo Li,
        Investigation of quasi-Newton methods for unconstrained optimization.
        http://people.math.sfu.ca/~elushi/project_833.pdf

    [2] Jorge Nocedal, Stephen J. Wright, Numerical Optimization.
        Chapter 6, Quasi-Newton Methods, p. 135-163
    """
    update_function = ChoiceProperty(default='bfgs',
                                     choices={
                                         'bfgs': bfgs,
                                         'dfp': dfp,
                                         'sr1': sr1,
                                     })
    epsilon = NumberProperty(default=1e-7, minval=0)
    h0_scale = NumberProperty(default=1, minval=0)

    step = WithdrawProperty()

    def init_variables(self):
        super(QuasiNewton, self).init_variables()
        n_parameters = count_parameters(self.connection)

        self.variables.update(
            inv_hessian=tf.Variable(
                asfloat(self.h0_scale) * tf.eye(n_parameters),
                name="quasi-newton/inv-hessian",
                dtype=tf.float32,
            ),
            prev_params=tf.Variable(
                tf.zeros([n_parameters]),
                name="quasi-newton/prev-params",
                dtype=tf.float32,
            ),
            prev_full_gradient=tf.Variable(
                tf.zeros([n_parameters]),
                name="quasi-newton/prev-full-gradient",
                dtype=tf.float32,
            ),
        )

    def init_train_updates(self):
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = parameter_values(self.connection)
        param_vector = make_single_vector(params)

        gradients = tf.gradients(self.variables.error_func, params)
        full_gradient = make_single_vector(gradients)

        new_inv_hessian = tf.where(
            tf.equal(self.variables.epoch, 1), inv_hessian,
            self.update_function(inv_H=inv_hessian,
                                 delta_w=param_vector - prev_params,
                                 delta_grad=full_gradient - prev_full_gradient,
                                 epsilon=self.epsilon))
        param_delta = -dot(new_inv_hessian, full_gradient)
        step = self.find_optimal_step(param_vector, param_delta)
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        # We have to compute these values first, otherwise
        # parallelization in tensorflow can mix update order
        # and, for example, previous gradient can be equal to
        # current gradient value. It happens because tensorflow
        # try to execute operations in parallel.
        required_variables = [new_inv_hessian, param_vector, full_gradient]
        with tf.control_dependencies(required_variables):
            updates.extend([
                inv_hessian.assign(new_inv_hessian),
                prev_params.assign(param_vector),
                prev_full_gradient.assign(full_gradient),
            ])

        return updates
Exemple #21
0
 class B(Configurable):
     choice = ChoiceProperty(choices=[])
Exemple #22
0
class LevenbergMarquardt(StepSelectionBuiltIn, BaseGradientDescent):
    """
    Levenberg-Marquardt algorithm is a variation of the Newton's method.
    It minimizes MSE error. The algorithm approximates Hessian matrix using
    dot product between two jacobian matrices.

    Notes
    -----
    - Method requires all training data during propagation, which means
      it's not allowed to use mini-batches.

    - Network minimizes only Mean Squared Error (MSE) loss function.

    - Efficient for small training datasets, because it
      computes gradient per each sample separately.

    - Efficient for small-sized networks.

    Parameters
    ----------
    {BaseGradientDescent.connection}

    mu : float
        Control invertion for J.T * J matrix, defaults to ``0.1``.

    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor. Defaults to ``1.2``

    error : {{``mse``}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.

    {BaseGradientDescent.show_epoch}

    {BaseGradientDescent.shuffle_data}

    {BaseGradientDescent.epoch_end_signal}

    {BaseGradientDescent.train_end_signal}

    {BaseGradientDescent.verbose}

    {BaseGradientDescent.addons}

    Attributes
    ----------
    {BaseGradientDescent.Attributes}

    Methods
    -------
    {BaseGradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> lmnet = algorithms.LevenbergMarquardt((2, 3, 1))
    >>> lmnet.train(x_train, y_train)

    See Also
    --------
    :network:`BaseGradientDescent` : BaseGradientDescent algorithm.
    """
    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=1.2, minval=1)
    error = ChoiceProperty(default='mse', choices={'mse': errors.mse})

    step = WithdrawProperty()

    def init_variables(self):
        super(LevenbergMarquardt, self).init_variables()
        self.variables.update(
            mu=tf.Variable(self.mu, name='lev-marq/mu'),
            last_error=tf.Variable(np.nan, name='lev-marq/last-error'),
        )

    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = tf.where(
            tf.less(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        err_for_each_sample = flatten((network_output - prediction_func) ** 2)

        params = parameter_values(self.connection)
        param_vector = make_single_vector(params)

        J = compute_jacobian(err_for_each_sample, params)
        J_T = tf.transpose(J)
        n_params = J.shape[1]

        parameter_update = tf.matrix_solve(
            tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value),
            tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1))
        )
        updated_params = param_vector - flatten(parameter_update)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def on_epoch_start_update(self, epoch):
        super(LevenbergMarquardt, self).on_epoch_start_update(epoch)

        last_error = self.errors.last()
        if last_error is not None:
            self.variables.last_error.load(last_error, tensorflow_session())
Exemple #23
0
 def test_choice_property_on_unknown_instance(self):
     prop = ChoiceProperty(choices=[1, 2, 3])
     self.assertEqual(None, prop.__get__(None, None))
Exemple #24
0
class QuasiNewton(StepSelectionBuiltIn, GradientDescent):
    """
    Quasi-Newton algorithm optimization.

    Parameters
    ----------
    update_function : {{'bfgs', 'dfp', 'psb', 'sr1'}}
        Update function. Defaults to ``bfgs``.

    h0_scale : float
        Default Hessian matrix is an identity matrix. The
        ``h0_scale`` parameter scales identity matrix.
        Defaults to ``1``.

    {GradientDescent.connection}

    {GradientDescent.error}

    {GradientDescent.show_epoch}

    {GradientDescent.shuffle_data}

    {GradientDescent.epoch_end_signal}

    {GradientDescent.train_end_signal}

    {GradientDescent.verbose}

    {GradientDescent.addons}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> qnnet = algorithms.QuasiNewton(
    ...     (2, 3, 1),
    ...     update_function='bfgs'
    ... )
    >>> qnnet.train(x_train, y_train, epochs=10)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """
    update_function = ChoiceProperty(default='bfgs',
                                     choices={
                                         'bfgs': bfgs,
                                         'dfp': dfp,
                                         'psb': psb,
                                         'sr1': sr1,
                                     })
    h0_scale = NumberProperty(default=1, minval=0)

    step = WithdrawProperty()

    def init_variables(self):
        super(QuasiNewton, self).init_variables()
        n_params = count_parameters(self.connection)
        self.variables.update(
            inv_hessian=theano.shared(
                name='algo:quasi-newton/matrix:inv-hessian',
                value=asfloat(self.h0_scale * np.eye(int(n_params))),
            ),
            prev_params=theano.shared(
                name='algo:quasi-newton/vector:prev-params',
                value=asfloat(np.zeros(n_params)),
            ),
            prev_full_gradient=theano.shared(
                name='algo:quasi-newton/vector:prev-full-gradient',
                value=asfloat(np.zeros(n_params)),
            ),
        )

    def init_train_updates(self):
        network_inputs = self.variables.network_inputs
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in params])

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1), inv_hessian,
            self.update_function(inv_hessian, param_vector - prev_params,
                                 full_gradient - prev_full_gradient))
        param_delta = -new_inv_hessian.dot(full_gradient)
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            updated_params = param_vector + step * param_delta

            # This trick allow us to replace shared variables
            # with theano variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + param.size
                updated_param_value = T.reshape(
                    updated_params[start_pos:end_pos], param.shape)
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(*network_inputs)

            # Restore previous parameters
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates
Exemple #25
0
class LinearSearch(SingleStepConfigurable):
    """ Linear search for the step selection. Basicly this algorithms
    try different steps and compute your predicted error, after few
    iteration it will chose one which was better.

    Parameters
    ----------
    tol : float
        Tolerance for termination, default to ``0.1``. Can be any number
        greater that zero.
    search_method : 'gloden', 'brent'
        Linear search method. Can be ``golden`` for golden search or ``brent``
        for Brent's search, default to ``golden``.

    Warns
    -----
    {SingleStepConfigurable.Warns}

    Examples
    --------
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, layers, estimators, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> dataset = datasets.load_boston()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.85
    ... )
    >>>
    >>> cgnet = algorithms.ConjugateGradient(
    ...     connection=[
    ...         layers.Input(13),
    ...         layers.Sigmoid(50),
    ...         layers.Sigmoid(1),
    ...     ],
    ...     search_method='golden',
    ...     addons=[algorithms.LinearSearch],
    ...     verbose=False
    ... )
    >>>
    >>> cgnet.train(x_train, y_train, epochs=100)
    >>> y_predict = cgnet.predict(x_test).round(1)
    >>>
    >>> real = target_scaler.inverse_transform(y_test)
    >>> predicted = target_scaler.inverse_transform(y_predict)
    >>>
    >>> error = estimators.rmsle(real, predicted)
    >>> error
    0.20752676697596578

    See Also
    --------
    :network:`ConjugateGradient`
    """

    tol = BoundedProperty(default=0.1, minval=0)
    maxiter = BoundedProperty(default=10, minval=1)
    search_method = ChoiceProperty(choices=['golden', 'brent'],
                                   default='golden')

    def train_epoch(self, input_train, target_train):
        train_epoch = self.methods.train_epoch
        prediction_error = self.methods.prediction_error

        params = [param for param, _ in self.init_train_updates()]
        param_defaults = [param.get_value() for param in params]

        def setup_new_step(new_step):
            for param_default, param in zip(param_defaults, params):
                param.set_value(param_default)

            self.variables.step.set_value(asfloat(new_step))
            train_epoch(input_train, target_train)
            # Train epoch returns neural network error that was before
            # training epoch step, that's why we need to compute
            # it second time.
            error = prediction_error(input_train, target_train)

            return np.where(np.isnan(error), np.inf, error)

        options = {'xtol': self.tol}
        if self.search_method == 'brent':
            options['maxiter'] = self.maxiter

        res = minimize_scalar(
            setup_new_step,
            tol=self.tol,
            method=self.search_method,
            options=options,
        )

        return setup_new_step(res.x)
Exemple #26
0
 def test_choice_property_on_unknown_instance(self):
     prop = ChoiceProperty(choices=[1, 2, 3])
     self.assertEqual(None, prop.__get__(None, None))
Exemple #27
0
class SOFM(Kohonen):
    """ Self-Organizing Feature Map.

    Parameters
    ----------
    learning_radius : int
        Learning radius.
    features_grid : int
        Learning radius.
    transform : {{'linear', 'euclid', 'cos'}}
        Indicate transformation operation related to the input layer.
        The ``linear`` value mean that input data would be multiplied by
        weights in typical way. The ``euclid`` method will identify the
        closest weight vector to the input one. The ``cos`` made the same
        as ``euclid``, but instead of euclid distance it uses cosine
        similarity. Defaults to ``linear``.
    {BaseAssociative.n_inputs}
    {BaseAssociative.n_outputs}
    {BaseAssociative.weight}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {BaseAssociative.train}
    {BaseSkeleton.fit}
    """

    learning_radius = IntProperty(default=0, minval=0)
    features_grid = TypedListProperty()
    transform = ChoiceProperty(default='linear',
                               choices={
                                   'linear': dot_product,
                                   'euclid': neg_euclid_distance,
                                   'cos': cosine_similarity,
                               })

    def __init__(self, **options):
        super(SOFM, self).__init__(**options)

        invalid_feature_grid = (self.features_grid is not None
                                and mul(*self.features_grid) != self.n_outputs)
        if invalid_feature_grid:
            raise ValueError(
                "Feature grid should contain the same number of elements as "
                "in the output layer: {0}, but found: {1} ({2}x{3})"
                "".format(self.n_outputs, mul(*self.features_grid),
                          self.features_grid[0], self.features_grid[1]))

    def init_properties(self):
        super(SOFM, self).init_properties()

        if self.features_grid is None:
            self.features_grid = (self.n_outputs, 1)

    def predict_raw(self, input_data):
        input_data = format_data(input_data)
        output = np.zeros((input_data.shape[0], self.n_outputs))
        for i, input_row in enumerate(input_data):
            output[i, :] = self.transform(input_row.reshape(1, -1),
                                          self.weight)
        return output

    def update_indexes(self, layer_output):
        neuron_winner = layer_output.argmax(axis=1)
        feature_bound = self.features_grid[1]

        output_with_neightbours = neuron_neighbours(
            np.reshape(layer_output, self.features_grid),
            (neuron_winner // feature_bound, neuron_winner % feature_bound),
            self.learning_radius)
        index_y, _ = np.nonzero(
            np.reshape(output_with_neightbours, (self.n_outputs, 1)))
        return index_y
Exemple #28
0
class ParameterBasedLayer(BaseLayer):
    """ Layer that creates weight and bias parameters.

    Parameters
    ----------
    size : int
        Layer input size.
    weight : 2D array-like or None
        Define your layer weights. ``None`` means that your weights will be
        generate randomly dependence on property ``init_method``.
        ``None`` by default.
    bias : 1D array-like or None
        Define your layer bias. ``None`` means that your weights will be
        generate randomly dependence on property ``init_method``.
    init_method : {{'bounded', 'normal', 'ortho', 'xavier_normal',\
    'xavier_uniform', 'he_normal', 'he_uniform'}}
        Weight initialization method. Defaults to ``xavier_normal``.

        * ``normal`` will generate random weights from normal distribution \
        with standard deviation equal to ``0.01``.

        * ``bounded`` generate random weights from Uniform distribution.

        * ``ortho`` generate random orthogonal matrix.

        * ``xavier_normal`` generate random matrix from normal distrubtion \
        where variance equal to :math:`\\frac{{2}}{{fan_{{in}} + \
        fan_{{out}}}}`. Where :math:`fan_{{in}}` is a number of \
        layer input units and :math:`fan_{{out}}` - number of layer \
        output units.

        * ``xavier_uniform`` generate random matrix from uniform \
        distribution \ where :math:`w_{{ij}} \in \
        [-\\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}, \
        \\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}`].

        * ``he_normal`` generate random matrix from normal distrubtion \
        where variance equal to :math:`\\frac{{2}}{{fan_{{in}}}}`. \
        Where :math:`fan_{{in}}` is a number of layer input units.

        * ``he_uniform`` generate random matrix from uniformal \
        distribution where :math:`w_{{ij}} \in [\
        -\\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}, \
        \\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}]`

    bounds : tuple of two float
        Available only for ``init_method`` equal to ``bounded``.  Value
        identify minimum and maximum possible value in random weights.
        Defaults to ``(0, 1)``.
    """
    size = IntProperty(minval=1)
    weight = SharedArrayProperty(default=None)
    bias = SharedArrayProperty(default=None)
    bounds = TypedListProperty(default=(0, 1), element_type=(int, float))
    init_method = ChoiceProperty(default=XAVIER_NORMAL,
                                 choices=VALID_INIT_METHODS)

    def __init__(self, size, **options):
        if size is not None:
            options['size'] = size
        super(ParameterBasedLayer, self).__init__(**options)

    def weight_shape(self):
        output_size = self.relate_to_layer.size
        return (self.size, output_size)

    def bias_shape(self):
        output_size = self.relate_to_layer.size
        return (output_size,)

    def initialize(self):
        super(ParameterBasedLayer, self).initialize()

        self.weight = create_shared_parameter(
            value=self.weight,
            name='weight_{}'.format(self.layer_id),
            shape=self.weight_shape(),
            bounds=self.bounds,
            init_method=self.init_method,
        )
        self.bias = create_shared_parameter(
            value=self.bias,
            name='bias_{}'.format(self.layer_id),
            shape=self.bias_shape(),
            bounds=self.bounds,
            init_method=self.init_method,
        )
        self.parameters = [self.weight, self.bias]

    def __repr__(self):
        classname = self.__class__.__name__
        return '{name}({size})'.format(name=classname, size=self.size)
Exemple #29
0
class LevenbergMarquardt(NoStepSelection, GradientDescent):
    """ Levenberg-Marquardt algorithm.

    Notes
    -----
    * Network minimizes only Mean Squared Error function.

    Parameters
    ----------
    mu : float
        Control invertion for J.T * J matrix, defaults to `0.1`.
    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor. Defaults to ``1.2``
    error: {{'mse'}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.
    {GradientDescent.addons}
    {ConstructableNetwork.connection}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> lmnet = algorithms.LevenbergMarquardt(
    ...     (2, 3, 1),
    ...     verbose=False
    ... )
    >>> lmnet.train(x_train, y_train)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """

    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=1.2, minval=1)
    error = ChoiceProperty(default='mse', choices={'mse': errors.mse})

    def init_variables(self):
        super(LevenbergMarquardt, self).init_variables()
        self.variables.update(
            mu=theano.shared(name='mu', value=asfloat(self.mu)),
            last_error=theano.shared(name='last_error', value=np.nan),
        )

    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        mse_for_each_sample = T.mean((network_output - prediction_func)**2,
                                     axis=1)

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        J = compute_jaccobian(mse_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - T.nlinalg.matrix_inverse(
            J.T.dot(J) + new_mu * T.eye(n_params)).dot(
                J.T).dot(mse_for_each_sample)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def on_epoch_start_update(self, epoch):
        super(LevenbergMarquardt, self).on_epoch_start_update(epoch)

        last_error = self.errors.last()
        if last_error is not None:
            self.variables.last_error.set_value(last_error)
Exemple #30
0
class LinearSearch(SingleStep):
    """ Linear search for the step selection. Basicly this algorithms
    try different steps and compute your predicted error, after few
    iteration it will chose one which was better.

    Parameters
    ----------
    tol : float
        Tolerance for termination, default to ``0.3``. Can be any number
        greater that zero.
    search_method : 'gloden', 'brent'
        Linear search method. Can be ``golden`` for golden search or ``brent``
        for Brent's search, default to ``golden``.

    Attributes
    ----------
    {first_step}

    Warns
    -----
    {bp_depending}

    Examples
    --------
    >>> import numpy as np
    >>> np.random.seed(0)
    >>>
    >>> from sklearn import datasets, preprocessing
    >>> from sklearn.cross_validation import train_test_split
    >>> from neupy import algorithms, layers
    >>> from neupy.functions import rmsle
    >>>
    >>> dataset = datasets.load_boston()
    >>> data, target = dataset.data, dataset.target
    >>>
    >>> data_scaler = preprocessing.MinMaxScaler()
    >>> target_scaler = preprocessing.MinMaxScaler()
    >>>
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     data_scaler.fit_transform(data),
    ...     target_scaler.fit_transform(target),
    ...     train_size=0.85
    ... )
    >>>
    >>> cgnet = algorithms.ConjugateGradient(
    ...     connection=[
    ...         layers.SigmoidLayer(13),
    ...         layers.SigmoidLayer(50),
    ...         layers.OutputLayer(1),
    ...     ],
    ...     search_method='golden',
    ...     optimizations=[algorithms.LinearSearch],
    ...     verbose=False
    ... )
    >>>
    >>> cgnet.train(x_train, y_train, epochs=100)
    >>> y_predict = cgnet.predict(x_test)
    >>>
    >>> real = target_scaler.inverse_transform(y_test)
    >>> predicted = target_scaler.inverse_transform(y_predict)
    >>>
    >>> error = rmsle(real, predicted.round(1))
    >>> error
    0.20752676697596578

    See Also
    --------
    :network:`ConjugateGradient`
    """
    tol = NonNegativeNumberProperty(default=0.3)
    search_method = ChoiceProperty(choices=['golden', 'brent'],
                                   default='golden')

    def set_weights(self, new_weights):
        for layer, new_weight in zip(self.train_layers, new_weights):
            layer.weight = new_weight.copy()

    def check_updates(self, new_step, weights, delta):
        self.set_weights(weights)
        self.step = new_step

        super(LinearSearch, self).update_weights(delta)
        predicted_output = self.predict(self.input_train)
        return self.error(predicted_output, self.target_train)

    def update_weights(self, weight_deltas):
        real_weights = [layer.weight for layer in self.train_layers]
        res = minimize_scalar(self.check_updates,
                              args=(real_weights, weight_deltas),
                              tol=self.tol,
                              method=self.search_method,
                              options={'xtol': self.tol})

        self.set_weights(real_weights)
        self.step = res.x

        return super(LinearSearch, self).update_weights(weight_deltas)
class LevenbergMarquardt(StepSelectionBuiltIn, GradientDescent):
    """
    Levenberg-Marquardt algorithm.

    Notes
    -----
    - Network minimizes only Mean Squared Error function.
    - Efficient for small training datasets, because it
      computes gradient per each sample separately.
    - Efficient for small-sized networks.

    Parameters
    ----------
    {GradientDescent.connection}

    mu : float
        Control invertion for J.T * J matrix, defaults to `0.1`.

    mu_update_factor : float
        Factor to decrease the mu if update decrese the error, otherwise
        increse mu by the same factor. Defaults to ``1.2``

    error : {{``mse``}}
        Levenberg-Marquardt works only for quadratic functions.
        Defaults to ``mse``.

    {GradientDescent.show_epoch}

    {GradientDescent.shuffle_data}

    {GradientDescent.epoch_end_signal}

    {GradientDescent.train_end_signal}

    {GradientDescent.verbose}

    {GradientDescent.addons}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> lmnet = algorithms.LevenbergMarquardt((2, 3, 1))
    >>> lmnet.train(x_train, y_train)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """
    mu = BoundedProperty(default=0.01, minval=0)
    mu_update_factor = BoundedProperty(default=1.2, minval=1)
    error = ChoiceProperty(default='mse', choices={'mse': errors.mse})

    step = WithdrawProperty()

    def init_variables(self):
        super(LevenbergMarquardt, self).init_variables()
        self.variables.update(
            mu=theano.shared(name='lev-marq/mu', value=asfloat(self.mu)),
            last_error=theano.shared(name='lev-marq/last-error', value=np.nan),
        )

    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = ifelse(
            T.lt(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        se_for_each_sample = ((network_output - prediction_func)**2).ravel()

        params = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in params])

        J = compute_jacobian(se_for_each_sample, params)
        n_params = J.shape[1]

        updated_params = param_vector - slinalg.solve(
            J.T.dot(J) + new_mu * T.eye(n_params), J.T.dot(se_for_each_sample))

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates

    def on_epoch_start_update(self, epoch):
        super(LevenbergMarquardt, self).on_epoch_start_update(epoch)

        last_error = self.errors.last()
        if last_error is not None:
            self.variables.last_error.set_value(last_error)