class Momentum(MinibatchGradientDescent):
    """
    Momentum algorithm.

    Parameters
    ----------
    momentum : float
        Control previous gradient ratio. Defaults to ``0.9``.

    nesterov : bool
        Instead of classic momentum computes Nesterov momentum.
        Defaults to ``False``.

    {MinibatchGradientDescent.Parameters}

    Attributes
    ----------
    {MinibatchGradientDescent.Attributes}

    Methods
    -------
    {MinibatchGradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> mnet = algorithms.Momentum((2, 3, 1))
    >>> mnet.train(x_train, y_train)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """
    momentum = ProperFractionProperty(default=0.9)
    nesterov = Property(default=False, expected_type=bool)

    def init_param_updates(self, layer, parameter):
        step = self.variables.step

        parameter_shape = parameter.get_value().shape
        previous_velocity = theano.shared(
            name="{}/previous-velocity".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)
        velocity = self.momentum * previous_velocity - step * gradient

        if self.nesterov:
            velocity = self.momentum * velocity - step * gradient

        return [
            (parameter, parameter + velocity),
            (previous_velocity, velocity),
        ]
Example #2
0
class Momentum(GradientDescent):
    """
    Momentum algorithm.

    Parameters
    ----------
    momentum : float
        Control previous gradient ratio. Defaults to ``0.9``.

    nesterov : bool
        Instead of classic momentum computes Nesterov momentum.
        Defaults to ``False``.

    {GradientDescent.Parameters}

    Attributes
    ----------
    {GradientDescent.Attributes}

    Methods
    -------
    {GradientDescent.Methods}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>> from neupy.layers import *
    >>>
    >>> x_train = np.array([[1, 2], [3, 4]])
    >>> y_train = np.array([[1], [0]])
    >>>
    >>> network = Input(2) >> Sigmoid(3) >> Sigmoid(1)
    >>> optimizer = algorithms.Momentum(network)
    >>> optimizer.train(x_train, y_train)

    See Also
    --------
    :network:`GradientDescent` : GradientDescent algorithm.
    """
    momentum = ProperFractionProperty(default=0.9)
    nesterov = Property(default=False, expected_type=bool)

    def init_train_updates(self):
        optimizer = tf.train.MomentumOptimizer(
            use_nesterov=self.nesterov,
            momentum=self.momentum,
            learning_rate=self.step,
        )
        self.functions.optimizer = optimizer
        return [optimizer.minimize(self.variables.loss)]
Example #3
0
class GlobalPooling(BaseLayer):
    """
    Global pooling layer.

    Parameters
    ----------
    function : callable
        Function that aggregates over dimensions.
        Defaults to ``theano.tensor.mean``.

        .. code-block:: python

            def agg_func(x, axis=None):
                pass

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}

    Examples
    --------
    >>> from neupy import layers
    >>>
    >>> network = layers.join(
    ...     layers.Input((16, 4, 4)),
    ...     layers.GlobalPooling(),
    ... )
    >>> network.output_shape
    (16,)
    """
    function = Property(default=T.mean)

    @property
    def output_shape(self):
        if self.input_shape is not None:
            return as_tuple(self.input_shape[0])

    def output(self, input_value):
        if input_value.ndim in (1, 2):
            return input_value

        agg_axis = range(2, input_value.ndim)
        return self.function(input_value, axis=list(agg_axis))
Example #4
0
class BaseRNNLayer(BaseLayer):
    """
    Base class for the recurrent layers

    Parameters
    ----------
    n_units : int
        Number of hidden units in the layer.

    only_return_final : bool
        If ``True``, only return the final sequential output
        (e.g. for tasks where a single target value for the entire
        sequence is desired). In this case, Tensorfow makes an
        optimization which saves memory. Defaults to ``True``.

    {BaseLayer.name}
    """
    n_units = IntProperty(minval=1)
    only_return_final = Property(expected_type=bool)

    def __init__(self, n_units, only_return_final=True, name=None):
        super(BaseRNNLayer, self).__init__(name=name)
        self.only_return_final = only_return_final
        self.n_units = n_units

    def fail_if_shape_invalid(self, input_shape):
        if input_shape and input_shape.ndims != 3:
            clsname = self.__class__.__name__
            raise LayerConnectionError(
                "{} layer was expected input with three dimensions, "
                "but got input with {} dimensions instead. Layer: {}"
                "".format(clsname, input_shape.ndims, self))

    def get_output_shape(self, input_shape):
        input_shape = tf.TensorShape(input_shape)
        n_samples = input_shape[0]

        self.fail_if_shape_invalid(input_shape)

        if self.only_return_final:
            return tf.TensorShape((n_samples, self.n_units))

        n_time_steps = input_shape[1]
        return tf.TensorShape((n_samples, n_time_steps, self.n_units))
Example #5
0
class BaseRNNLayer(BaseLayer):
    """
    Base class for the recurrent layers

    Parameters
    ----------
    size : int
        Number of hidden units in the layer.

    only_return_final : bool
        If ``True``, only return the final sequential output
        (e.g. for tasks where a single target value for the entire
        sequence is desired). In this case, Theano makes an
        optimization which saves memory. Defaults to ``True``.

    {BaseLayer.Parameters}
    """
    size = IntProperty(minval=1)
    only_return_final = Property(default=True, expected_type=bool)

    def __init__(self, size, **kwargs):
        super(BaseRNNLayer, self).__init__(size=size, **kwargs)

    def validate(self, input_shape):
        n_input_dims = len(input_shape) + 1  # +1 for batch dimension
        clsname = self.__class__.__name__

        if n_input_dims < 3:
            raise LayerConnectionError(
                "{} layer was expected input with at least three "
                "dimensions, got input with {} dimensions instead"
                "".format(clsname, n_input_dims))

    @property
    def output_shape(self):
        if self.only_return_final:
            return as_tuple(self.size)

        n_time_steps = self.input_shape[0]
        return as_tuple(n_time_steps, self.size)
Example #6
0
 class A(Configurable):
     int_property = Property(expected_type=int)
class BaseNetwork(BaseSkeleton):
    """
    Base class for Neural Network algorithms.

    Parameters
    ----------
    step : float
        Learning rate, defaults to ``0.1``.

    show_epoch : int or str
        This property controls how often the network will
        display information about training. There are two
        main syntaxes for this property.

        - You can define it as a positive integer number. It
          defines how offen would you like to see summary
          output in terminal. For instance, number `100` mean
          that network shows summary at 100th, 200th,
          300th ... epochs.

        - String defines number of times you want to see output in
          terminal. For instance, value ``'2 times'`` mean that
          the network will show output twice with approximately
          equal period of epochs and one additional output would
          be after the finall epoch.

        Defaults to ``1``.

    shuffle_data : bool
        If it's ``True`` class shuffles all your training data before
        training your network, defaults to ``True``.

    epoch_end_signal : function
        Calls this function when train epoch finishes.

    train_end_signal : function
        Calls this function when train process finishes.

    {Verbose.Parameters}

    Attributes
    ----------
    errors : ErrorHistoryList
        Contains list of training errors. This object has the same
        properties as list and in addition there are three additional
        useful methods: `last`, `previous` and `normalized`.

    train_errors : ErrorHistoryList
        Alias to the ``errors`` attribute.

    validation_errors : ErrorHistoryList
        The same as `errors` attribute, but it contains only validation
        errors.

    last_epoch : int
        Value equals to the last trained epoch. After initialization
        it is equal to ``0``.
    """
    step = NumberProperty(default=0.1, minval=0)

    show_epoch = ShowEpochProperty(minval=1, default=1)
    shuffle_data = Property(default=False, expected_type=bool)

    epoch_end_signal = Property(expected_type=types.FunctionType)
    train_end_signal = Property(expected_type=types.FunctionType)

    def __init__(self, *args, **options):
        self.errors = self.train_errors = ErrorHistoryList()
        self.validation_errors = ErrorHistoryList()
        self.training = AttributeKeyDict()
        self.last_epoch = 0

        super(BaseNetwork, self).__init__(*args, **options)

        if self.verbose:
            show_network_options(self, highlight_options=options)

    def predict(self, input_data):
        """
        Return prediction results for the input data.

        Parameters
        ----------
        input_data : array-like

        Returns
        -------
        array-like
        """
        raise NotImplementedError

    def on_epoch_start_update(self, epoch):
        """
        Function would be trigger before run all training procedure
        related to the current epoch.

        Parameters
        ----------
        epoch : int
            Current epoch number.
        """
        self.last_epoch = epoch

    def train_epoch(self, input_train, target_train=None):
        raise NotImplementedError()

    def prediction_error(self, input_test, target_test):
        raise NotImplementedError()

    def train(self, input_train, target_train=None, input_test=None,
              target_test=None, epochs=100, epsilon=None,
              summary='table'):
        """
        Method train neural network.

        Parameters
        ----------
        input_train : array-like

        target_train : array-like or None

        input_test : array-like or None

        target_test : array-like or None

        epochs : int
            Defaults to `100`.

        epsilon : float or None
            Defaults to ``None``.
        """
        show_epoch = self.show_epoch
        logs = self.logs
        training = self.training = AttributeKeyDict()

        if epochs <= 0:
            raise ValueError("Number of epochs needs to be greater than 0.")

        if epsilon is not None and epochs <= 2:
            raise ValueError("Network should train at teast 3 epochs before "
                             "check the difference between errors")

        logging_info_about_the_data(self, input_train, input_test)
        logging_info_about_training(self, epochs, epsilon)
        logs.newline()

        if summary == 'table':
            summary = SummaryTable(
                table_builder=table.TableBuilder(
                    table.Column(name="Epoch #"),
                    table.NumberColumn(name="Train err", places=4),
                    table.NumberColumn(name="Valid err", places=4),
                    table.TimeColumn(name="Time", width=10),
                    stdout=logs.write
                ),
                network=self,
                delay_limit=1.,
                delay_history_length=10,
            )

        elif summary == 'inline':
            summary = InlineSummary(network=self)

        else:
            raise ValueError("`{}` is unknown summary type"
                             "".format(summary))

        iterepochs = create_training_epochs_iterator(self, epochs, epsilon)
        show_epoch = parse_show_epoch_property(self, epochs, epsilon)
        training.show_epoch = show_epoch

        # Storring attributes and methods in local variables we prevent
        # useless __getattr__ call a lot of times in each loop.
        # This variables speed up loop in case on huge amount of
        # iterations.
        training_errors = self.errors
        validation_errors = self.validation_errors
        shuffle_data = self.shuffle_data

        train_epoch = self.train_epoch
        epoch_end_signal = self.epoch_end_signal
        train_end_signal = self.train_end_signal
        on_epoch_start_update = self.on_epoch_start_update

        is_first_iteration = True
        can_compute_validation_error = (input_test is not None)
        last_epoch_shown = 0
#############################################        
	symMatrix = tt.dmatrix("symMatrix")
        symEigenvalues, eigenvectors = tt.nlinalg.eig(symMatrix)
        get_Eigen = theano.function([symMatrix], [symEigenvalues, eigenvectors])
#############################################
        with logs.disable_user_input():
            for epoch in iterepochs:
                validation_error = None
                epoch_start_time = time.time()
                on_epoch_start_update(epoch)

                if shuffle_data:
                    data = shuffle(*as_tuple(input_train, target_train))
                    input_train, target_train = data[:-1], data[-1]

                try:
                    train_error = train_epoch(input_train, target_train)
		    print epoch
		    name=str(self)
		    if(name.split('(')[0]=='Hessian'):
		    	H=self.variables.hessian.get_value()
		   	ev,_=get_Eigen(H)
			print "positive EV ",np.sum(ev>0)
			print "Just zero EV", np.sum(ev==0)
			print "Zero EV ", np.sum(ev==0)+np.sum((ev < 0) & (ev > (np.min(ev)/2.0)))
			print "Neg EV ", np.sum(ev<0)
			print "Max EV ",np.max(ev)
			print "Min EV ",np.min(ev)
			s=str(self.itr)+'.npy'
			np.save(s,ev)
                    if can_compute_validation_error:
                        validation_error = self.prediction_error(input_test,
                                                                 target_test)

                    training_errors.append(train_error)
                    validation_errors.append(validation_error)

                    epoch_finish_time = time.time()
                    training.epoch_time = epoch_finish_time - epoch_start_time

                    if epoch % training.show_epoch == 0 or is_first_iteration:
                        summary.show_last()
                        last_epoch_shown = epoch

                    if epoch_end_signal is not None:
                        epoch_end_signal(self)

                    is_first_iteration = False

                except StopTraining as err:
                    # TODO: This notification breaks table view in terminal.
                    # I need to show it in a different way.
                    logs.message("TRAIN", "Epoch #{} stopped. {}"
                                          "".format(epoch, str(err)))
                    break

            if epoch != last_epoch_shown:
                summary.show_last()

            if train_end_signal is not None:
                train_end_signal(self)

            summary.finish()
            logs.newline()
Example #8
0
 class D(A):
     property_d = Property()
Example #9
0
 class A(object):
     # Doesn't have Configurable as a parent class
     property_a = Property()
Example #10
0
class BaseNetwork(BaseSkeleton):
    """
    Base class for Neural Network algorithms.

    Parameters
    ----------
    step : float
        Learning rate, defaults to ``0.1``.

    show_epoch : int
        This property controls how often the network will display
        information about training. It has to be defined as positive
        integer. For instance, number ``100`` mean that network shows
        summary at 1st, 100th, 200th, 300th ... and last epochs.

        Defaults to ``1``.

    shuffle_data : bool
        If it's ``True`` than training data will be shuffled before
        the training. Defaults to ``True``.

    signals : dict, list or function
        Function that will be triggered after certain events during
        the training.

    {Verbose.Parameters}

    Methods
    -------
    {BaseSkeleton.fit}

    predict(X)
        Propagates input ``X`` through the network and
        returns produced output.

    plot_errors(logx=False, show=True, **figkwargs)
        Using errors collected during the training this method
        generates plot that can give additional insight into the
        performance reached during the training.

    Attributes
    ----------
    errors : list
        Information about errors. It has two main attributes, namely
        ``train`` and ``valid``. These attributes provide access to
        the training and validation errors respectively.

    last_epoch : int
        Value equals to the last trained epoch. After initialization
        it is equal to ``0``.

    n_updates_made : int
        Number of training updates applied to the network.
    """
    step = NumberProperty(default=0.1, minval=0)
    show_epoch = IntProperty(minval=1, default=1)
    shuffle_data = Property(default=False, expected_type=bool)
    signals = Property(expected_type=object)

    def __init__(self, *args, **options):
        super(BaseNetwork, self).__init__(*args, **options)

        self.last_epoch = 0
        self.n_updates_made = 0
        self.errors = base_signals.ErrorCollector()

        signals = list(
            as_tuple(
                base_signals.ProgressbarSignal(),
                base_signals.PrintLastErrorSignal(),
                self.errors,
                self.signals,
            ))

        for i, signal in enumerate(signals):
            if inspect.isfunction(signal):
                signals[i] = base_signals.EpochEndSignal(signal)

            elif inspect.isclass(signal):
                signals[i] = signal()

        self.events = Events(network=self, signals=signals)

    def one_training_update(self, X_train, y_train=None):
        """
        Function would be trigger before run all training procedure
        related to the current epoch.

        Parameters
        ----------
        epoch : int
            Current epoch number.
        """
        raise NotImplementedError()

    def score(self, X, y):
        raise NotImplementedError()

    def plot_errors(self, logx=False, show=True, **figkwargs):
        return plot_optimizer_errors(optimizer=self,
                                     logx=logx,
                                     show=show,
                                     **figkwargs)

    def train(self,
              X_train,
              y_train=None,
              X_test=None,
              y_test=None,
              epochs=100,
              batch_size=None):
        """
        Method train neural network.

        Parameters
        ----------
        X_train : array-like
        y_train : array-like or None
        X_test : array-like or None
        y_test : array-like or None

        epochs : int
            Defaults to ``100``.

        epsilon : float or None
            Defaults to ``None``.
        """
        if epochs <= 0:
            raise ValueError("Number of epochs needs to be a positive number")

        epochs = int(epochs)
        first_epoch = self.last_epoch + 1
        batch_size = batch_size or getattr(self, 'batch_size', None)

        self.events.trigger(
            name='train_start',
            X_train=X_train,
            y_train=y_train,
            epochs=epochs,
            batch_size=batch_size,
            store_data=False,
        )

        try:
            for epoch in range(first_epoch, first_epoch + epochs):
                self.events.trigger('epoch_start')

                self.last_epoch = epoch
                iterator = iters.minibatches(
                    (X_train, y_train),
                    batch_size,
                    self.shuffle_data,
                )

                for X_batch, y_batch in iterator:
                    self.events.trigger('update_start')
                    update_start_time = time.time()

                    train_error = self.one_training_update(X_batch, y_batch)
                    self.n_updates_made += 1

                    self.events.trigger(
                        name='train_error',
                        value=train_error,
                        eta=time.time() - update_start_time,
                        epoch=epoch,
                        n_updates=self.n_updates_made,
                        n_samples=iters.count_samples(X_batch),
                        store_data=True,
                    )
                    self.events.trigger('update_end')

                if X_test is not None:
                    test_start_time = time.time()
                    validation_error = self.score(X_test, y_test)
                    self.events.trigger(
                        name='valid_error',
                        value=validation_error,
                        eta=time.time() - test_start_time,
                        epoch=epoch,
                        n_updates=self.n_updates_made,
                        n_samples=iters.count_samples(X_test),
                        store_data=True,
                    )

                self.events.trigger('epoch_end')

        except StopTraining as err:
            self.logs.message(
                "TRAIN",
                "Epoch #{} was stopped. Message: {}".format(epoch, str(err)))

        self.events.trigger('train_end')
Example #11
0
 class B(A):
     property_b = Property()
Example #12
0
    def test_property_repr_with_name(self):
        prop = Property(default=3)
        prop.name = 'test'

        self.assertEqual('Property(name="test")', repr(prop))
class DiscreteHopfieldNetwork(DiscreteMemory):
    """
    Discrete Hopfield Network. It can memorize binary samples
    and reconstruct them from corrupted samples.

    Notes
    -----
    - Works only with binary data. Input matrix should
      contain only zeros and ones.

    Parameters
    ----------
    {DiscreteMemory.mode}

    {DiscreteMemory.n_times}

    check_limit : bool
        Option enable a limit of patterns control for the
        network using logarithmically proportion rule.
        Defaults to ``True``.

        .. math::

            \\frac{{n_{{features}}}}{{2 \\cdot log_{{e}}(n_{{features}})}}

    Methods
    -------
    energy(input_data)
        Compute Discrete Hopfield Energy.

    train(input_data)
        Save input data pattern into the network memory.

    predict(input_data, n_times=None)
        Recover data from the memory using input pattern.
        For the prediction procedure you can control number
        of iterations. If you set up this value equal to ``None``
        then the value would be equal to the value that you
        set up for the property with the same name - ``n_times``.

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> def draw_bin_image(image_matrix):
    ...     for row in image_matrix.tolist():
    ...         print('| ' + ' '.join(' *'[val] for val in row))
    ...
    >>> zero = np.matrix([
    ...     0, 1, 1, 1, 0,
    ...     1, 0, 0, 0, 1,
    ...     1, 0, 0, 0, 1,
    ...     1, 0, 0, 0, 1,
    ...     1, 0, 0, 0, 1,
    ...     0, 1, 1, 1, 0
    ... ])
    >>>
    >>> one = np.matrix([
    ...     0, 1, 1, 0, 0,
    ...     0, 0, 1, 0, 0,
    ...     0, 0, 1, 0, 0,
    ...     0, 0, 1, 0, 0,
    ...     0, 0, 1, 0, 0,
    ...     0, 0, 1, 0, 0
    ... ])
    >>>
    >>> two = np.matrix([
    ...     1, 1, 1, 0, 0,
    ...     0, 0, 0, 1, 0,
    ...     0, 0, 0, 1, 0,
    ...     0, 1, 1, 0, 0,
    ...     1, 0, 0, 0, 0,
    ...     1, 1, 1, 1, 1,
    ... ])
    >>>
    >>> half_zero = np.matrix([
    ...     0, 1, 1, 1, 0,
    ...     1, 0, 0, 0, 1,
    ...     1, 0, 0, 0, 1,
    ...     0, 0, 0, 0, 0,
    ...     0, 0, 0, 0, 0,
    ...     0, 0, 0, 0, 0,
    ... ])
    >>>
    >>> draw_bin_image(zero.reshape((6, 5)))
    |   * * *
    | *       *
    | *       *
    | *       *
    | *       *
    |   * * *
    >>> draw_bin_image(half_zero.reshape((6, 5)))
    |   * * *
    | *       *
    | *       *
    |
    |
    |
    >>> data = np.concatenate([zero, one, two], axis=0)
    >>>
    >>> dhnet = algorithms.DiscreteHopfieldNetwork()
    >>> dhnet.train(data)
    >>>
    >>> result = dhnet.predict(half_zero)
    >>> draw_bin_image(result.reshape((6, 5)))
    |   * * *
    | *       *
    | *       *
    | *       *
    | *       *
    |   * * *

    See Also
    --------
    :ref:`password-recovery`: Password recovery with Discrete Hopfield Network.
    :ref:`discrete-hopfield-network`: Discrete Hopfield Network article.
    """
    check_limit = Property(default=True, expected_type=bool)

    def __init__(self, **options):
        super(DiscreteHopfieldNetwork, self).__init__(**options)
        self.n_memorized_samples = 0

    def train(self, input_data):
        self.discrete_validation(input_data)

        input_data = bin2sign(input_data)
        input_data = format_data(input_data, is_feature1d=False)

        n_rows, n_features = input_data.shape
        n_rows_after_update = self.n_memorized_samples + n_rows

        if self.check_limit:
            memory_limit = math.ceil(n_features / (2 * math.log(n_features)))

            if n_rows_after_update > memory_limit:
                raise ValueError("You can't memorize more than {0} "
                                 "samples".format(memory_limit))

        weight_shape = (n_features, n_features)

        if self.weight is None:
            self.weight = np.zeros(weight_shape, dtype=int)

        if self.weight.shape != weight_shape:
            n_features_expected = self.weight.shape[1]
            raise ValueError("Input data has invalid number of features. "
                             "Got {} features instead of {}."
                             "".format(n_features, n_features_expected))

        self.weight = input_data.T.dot(input_data)
        np.fill_diagonal(self.weight, np.zeros(len(self.weight)))
        self.n_memorized_samples = n_rows_after_update

    def predict(self, input_data, n_times=None):
        self.discrete_validation(input_data)
        input_data = format_data(bin2sign(input_data), is_feature1d=False)

        if self.mode == 'async':
            if n_times is None:
                n_times = self.n_times

            _, n_features = input_data.shape
            output_data = input_data

            for _ in range(n_times):
                position = np.random.randint(0, n_features - 1)
                raw_new_value = output_data.dot(self.weight[:, position])
                output_data[:, position] = np.sign(raw_new_value)
        else:
            output_data = input_data.dot(self.weight)

        return step_function(output_data).astype(int)

    def energy(self, input_data):
        self.discrete_validation(input_data)
        input_data = bin2sign(input_data)
        input_data = format_data(input_data, is_feature1d=False)
        n_rows, n_features = input_data.shape

        if n_rows == 1:
            return hopfield_energy(self.weight, input_data, input_data)

        output = np.zeros(n_rows)
        for i, row in enumerate(input_data):
            output[i] = hopfield_energy(self.weight, row, row)

        return output
Example #14
0
 def test_property_get_method(self):
     prop = Property(default=3)
     self.assertEqual(None, prop.__get__(None, None))
Example #15
0
 def test_property_repr(self):
     prop = Property(default=3)
     self.assertEqual('Property()', repr(prop))
Example #16
0
 class A(Configurable):
     required_prop = Property(required=True)
Example #17
0
 class B(Configurable):
     int_property = Property(expected_type=(str, set))
Example #18
0
 class A(Configurable):
     prop = Property(default=3)
Example #19
0
class BaseLayer(BaseConnection, Configurable):
    """
    Base class for all layers.

    Parameters
    ----------
    name : str or None
        Layer's identifier. If name is equal to ``None`` than name
        will be generated automatically. Defaults to ``None``.

    Methods
    -------
    disable_training_state()
        Swith off trainig state.

    initialize()
        Set up important configurations related to the layer.

    Attributes
    ----------
    input_shape : tuple
        Layer's input shape.

    output_shape : tuple
        Layer's output shape.

    training_state : bool
        Defines whether layer in training state or not.

    parameters : dict
        Trainable parameters.

    graph : LayerGraph instance
        Graphs that stores all relations between layers.
    """
    name = Property(expected_type=six.string_types)

    # Stores global identifier index for each layer class
    global_identifiers_map = {}

    def __new__(cls, *args, **kwargs):
        if cls not in cls.global_identifiers_map:
            cls.global_identifiers_map[cls] = 1
        return super(BaseLayer, cls).__new__(cls)

    def __init__(self, *args, **options):
        super(BaseLayer, self).__init__(*args)

        self.updates = []
        self.parameters = OrderedDict()
        self.name = generate_layer_name(layer=self)
        self.input_shape_ = None

        self.graph.add_layer(self)

        Configurable.__init__(self, **options)

    def validate(self, input_shape):
        """
        Validate input shape value before assigning it.

        Parameters
        ----------
        input_shape : tuple with int
        """

    @property
    def input_shape(self):
        return self.input_shape_

    @input_shape.setter
    def input_shape(self, shape):
        self.validate(shape)
        self.input_shape_ = shape

    @property
    def output_shape(self):
        return self.input_shape

    def output(self, input_value):
        return input_value

    def add_parameter(self, value, name, shape=None, trainable=True):
        theano_name = 'layer:{layer_name}/{parameter_name}'.format(
            layer_name=self.name,
            parameter_name=name.replace('_', '-'))

        parameter = create_shared_parameter(value, theano_name, shape)
        parameter.trainable = trainable

        self.parameters[name] = parameter

        setattr(self, name, parameter)
        return parameter

    def __repr__(self):
        classname = self.__class__.__name__
        return '{name}()'.format(name=classname)
Example #20
0
class LSTM(BaseRNNLayer):
    """
    Long Short Term Memory (LSTM) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    input_weights : Initializer, ndarray
        Weight parameters for input connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    hidden_weights : Initializer, ndarray
        Weight parameters for hidden connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    cell_weights : Initializer, ndarray
        Weight parameters for cell connection. Require only when
        ``peepholes=True`` otherwise it will be ignored.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    bias : Initializer, ndarray
        Bias parameters for all gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import tensorflow as tf
            dict(
                ingate=tf.nn.sigmoid,
                forgetgate=tf.nn.sigmoid,
                outgate=tf.nn.sigmoid,
                cell=tf.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(ingate=tf.tanh)

        Other parameters like ``forgetgate`` or ``outgate`` will be
        equal to their default values.

    learn_init : bool
        If ``True``, make ``cell_init`` and ``hidden_init`` trainable
        variables. Defaults to ``False``.

    cell_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial cell state (:math:`c_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    hidden_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``

    {BaseRNNLayer.only_return_final}

    peepholes : bool
        If ``True``, the LSTM uses peephole connections.
        When ``False``, cell parameters  are ignored.
        Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    gradient_clipping : float or int
        If nonzero, the gradient messages are clipped to the
        given value during the backward pass. Defaults to ``0``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.LSTM(20),
                layers.Sigmoid(1),
            ]
        )
    """
    input_weights = ParameterProperty(default=init.HeNormal())
    hidden_weights = ParameterProperty(default=init.HeNormal())
    cell_weights = ParameterProperty(default=init.HeNormal())
    biases = ParameterProperty(default=init.Constant(0))

    activation_functions = MultiCallableProperty(default=dict(
        ingate=tf.nn.sigmoid,
        forgetgate=tf.nn.sigmoid,
        outgate=tf.nn.sigmoid,
        cell=tf.tanh,
    ))

    learn_init = Property(default=False, expected_type=bool)
    cell_init = ParameterProperty(default=init.Constant(0))
    hidden_init = ParameterProperty(default=init.Constant(0))

    unroll_scan = Property(default=False, expected_type=bool)
    backwards = Property(default=False, expected_type=bool)
    peepholes = Property(default=False, expected_type=bool)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(LSTM, self).initialize()
        n_inputs = np.prod(self.input_shape[1:])

        # If peephole (cell to gate) connections were enabled, initialize
        # peephole connections.  These are elementwise products with the cell
        # state, so they are represented as vectors.
        if self.peepholes:
            self.weight_cell_to_ingate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_ingate',
                shape=(self.size, ))
            self.weight_cell_to_forgetgate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_forgetgate',
                shape=(self.size, ))
            self.weight_cell_to_outgate = self.add_parameter(
                value=self.cell_weights,
                name='weight_cell_to_outgate',
                shape=(self.size, ))

        self.input_weights = self.add_parameter(
            value=self.input_weights,
            name='input_weights',
            shape=(n_inputs, 4 * self.size),
        )
        self.hidden_weights = self.add_parameter(
            value=self.hidden_weights,
            name='hidden_weights',
            shape=(self.size, 4 * self.size),
        )
        self.biases = self.add_parameter(
            value=self.biases,
            name='biases',
            shape=(4 * self.size, ),
        )

        # Initialization parameters
        self.add_parameter(
            value=self.cell_init,
            shape=(1, self.size),
            name="cell_init",
            trainable=self.learn_init,
        )
        self.add_parameter(
            value=self.hidden_init,
            shape=(1, self.size),
            name="hidden_init",
            trainable=self.learn_init,
        )

    def output(self, input_value):
        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = tf.transpose(input_value, [1, 0, 2])
        input_shape = tf.shape(input_value)
        n_batch = input_shape[1]

        def one_lstm_step(states, input_n):
            with tf.name_scope('lstm-cell'):
                cell_previous, hid_previous = states
                input_n = tf.matmul(input_n, self.input_weights) + self.biases

                # Calculate gates pre-activations and slice
                gates = input_n + tf.matmul(hid_previous, self.hidden_weights)

                # Clip gradients
                if self.gradient_clipping != 0:
                    gates = clip_gradient(gates, self.gradient_clipping)

                # Extract the pre-activation gate values
                ingate, forgetgate, cell_input, outgate = tf.split(gates,
                                                                   4,
                                                                   axis=1)

                if self.peepholes:
                    # Compute peephole connections
                    ingate += cell_previous * self.weight_cell_to_ingate
                    forgetgate += (cell_previous *
                                   self.weight_cell_to_forgetgate)

                # Apply nonlinearities
                ingate = self.activation_functions.ingate(ingate)
                forgetgate = self.activation_functions.forgetgate(forgetgate)
                cell_input = self.activation_functions.cell(cell_input)

                # Compute new cell value
                cell = forgetgate * cell_previous + ingate * cell_input

                if self.peepholes:
                    outgate += cell * self.weight_cell_to_outgate

                outgate = self.activation_functions.outgate(outgate)

                # Compute new hidden unit activation
                hid = outgate * tf.tanh(cell)
                return [cell, hid]

        cell_init = tf.tile(self.cell_init, (n_batch, 1))
        hidden_init = tf.tile(self.hidden_init, (n_batch, 1))
        sequence = input_value

        if self.backwards:
            sequence = tf.reverse(sequence, axis=[0])

        if self.unroll_scan:
            # Explicitly unroll the recurrence instead of using scan
            hid_out = unroll_scan(
                fn=one_lstm_step,
                sequence=sequence,
                outputs_info=[cell_init, hidden_init],
            )
        else:
            _, hid_out = tf.scan(
                fn=one_lstm_step,
                elems=input_value,
                initializer=[cell_init, hidden_init],
                name='lstm-scan',
            )

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = tf.reverse(hid_out, axis=[0])

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = tf.transpose(hid_out, [1, 0, 2])

        return hid_out
Example #21
0
class LVQ(BaseNetwork):
    """
    Learning Vector Quantization (LVQ) algorithm.

    Notes
    -----
    - Input data needs to be normalized, because LVQ uses
      Euclidian distance to find clusters.

    - Training error is just a ratio of miscassified
      samples

    Parameters
    ----------
    n_inputs : int
        Number of input units. It should be equal to the
        number of features in the input data set.

    n_subclasses : int, None
        Defines total number of subclasses. Values should be greater
        or equal to the number of classes. ``None`` will set up number
        of subclasses equal to the number of classes. Defaults to ``None``
        (or the same as ``n_classes``).

    n_classes : int
        Number of classes in the data set.

    prototypes_per_class : list, None
        Defines number of prototypes per each class. For instance,
        if ``n_classes=3`` and ``n_subclasses=8`` then there are
        can be 3 subclasses for the first class, 3 for the second one
        and 2 for the third one (3 + 3 + 2 == 8). The following example
        can be specified as ``prototypes_per_class=[3, 3, 2]``.

        There are two rules that apply to this parameter:

        1. ``sum(prototypes_per_class) == n_subclasses``

        2. ``len(prototypes_per_class) == n_classes``

        The ``None`` value will distribute approximately equal
        number of subclasses per each class. It's approximately,
        because in casses when ``n_subclasses % n_classes != 0``
        there is no way to distribute equal number of subclasses
        per each class.

        Defaults to ``None``.

    {BaseNetwork.step}

    n_updates_to_stepdrop : int or None
        If this options is not equal to ``None`` then after every
        update LVQ reduces step size and do it until number of
        applied updates would reach the ``n_updates_to_stepdrop``
        value. The minimum possible step size defined in the
        ``minstep`` parameter.

        Be aware that number of updates is not the same as number
        of epochs. LVQ applies update after each propagated sample
        through the network. Relations between this parameter and
        maximum number of epochs is following

        .. code-block:: python

            n_updates_to_stepdrop = n_samples * n_max_epochs

        If parameter equal to ``None`` then step size wouldn't be
        reduced after each update.

        Defaults to ``None``.

    minstep : float
        Step size would never be lower than this value. This
        property useful only in case if ``n_updates_to_stepdrop``
        is not ``None``. Defaults to ``1e-5``.

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=1)
    n_subclasses = IntProperty(minval=2, default=None, allow_none=True)
    n_classes = IntProperty(minval=2)

    prototypes_per_class = TypedListProperty(allow_none=True, default=None)
    weight = Property(expected_type=(np.ndarray, init.Initializer),
                      allow_none=True, default=None)

    n_updates_to_stepdrop = IntProperty(default=None, allow_none=True,
                                        minval=1)
    minstep = NumberProperty(minval=0, default=1e-5)

    def __init__(self, **options):
        self.initialized = False
        super(LVQ, self).__init__(**options)

        self.n_updates = 0

        if self.n_subclasses is None:
            self.n_subclasses = self.n_classes

        if isinstance(self.weight, init.Initializer):
            weight_shape = (self.n_inputs, self.n_subclasses)
            self.weight = self.weight.sample(weight_shape)

        if self.weight is not None:
            self.initialized = True

        if self.n_subclasses < self.n_classes:
            raise ValueError("Number of subclasses should be greater "
                             "or equal to the number of classes. Network "
                             "was defined with {} subclasses and {} classes"
                             "".format(self.n_subclasses, self.n_classes))

        if self.prototypes_per_class is None:
            whole, reminder = divmod(self.n_subclasses, self.n_classes)
            self.prototypes_per_class = [whole] * self.n_classes

            if reminder:
                # Since we have reminder left, it means that we cannot
                # have an equal number of subclasses per each class,
                # therefor we will add +1 to randomly selected class.
                class_indeces = np.random.choice(self.n_classes, reminder,
                                                 replace=False)

                for class_index in class_indeces:
                    self.prototypes_per_class[class_index] += 1

        if len(self.prototypes_per_class) != self.n_classes:
            raise ValueError("LVQ defined for classification problem that has "
                             "{} classes, but the `prototypes_per_class` "
                             "variable has defined data for {} classes."
                             "".format(self.n_classes,
                                       len(self.prototypes_per_class)))

        if sum(self.prototypes_per_class) != self.n_subclasses:
            raise ValueError("Invalid distribution of subclasses for the "
                             "`prototypes_per_class` variable. Got total "
                             "of {} subclasses ({}) instead of {} expected"
                             "".format(sum(self.prototypes_per_class),
                                       self.prototypes_per_class,
                                       self.n_subclasses))

        self.subclass_to_class = []
        for class_id, n_prototypes in enumerate(self.prototypes_per_class):
            self.subclass_to_class.extend([class_id] * n_prototypes)

    @property
    def training_step(self):
        if self.n_updates_to_stepdrop is None:
            return self.step

        updates_ratio = (1 - self.n_updates / self.n_updates_to_stepdrop)
        return self.minstep + (self.step - self.minstep) * updates_ratio

    def predict(self, input_data):
        if not self.initialized:
            raise NotTrained("LVQ network hasn't been trained yet")

        input_data = format_data(input_data)
        subclass_to_class = self.subclass_to_class
        weight = self.weight

        predictions = []
        for input_row in input_data:
            output = euclid_distance(input_row, weight)
            winner_subclass = int(output.argmin(axis=1))

            predicted_class = subclass_to_class[winner_subclass]
            predictions.append(predicted_class)

        return np.array(predictions)

    def train(self, input_train, target_train, *args, **kwargs):
        input_train = format_data(input_train)
        target_train = format_data(target_train)

        n_input_samples = len(input_train)

        if n_input_samples <= self.n_subclasses:
            raise ValueError("Number of training input samples should be "
                             "greater than number of sublcasses. Training "
                             "method recived {} input samples."
                             "".format(n_input_samples))

        if not self.initialized:
            target_classes = sorted(np.unique(target_train).astype(np.int))
            expected_classes = list(range(self.n_classes))

            if target_classes != expected_classes:
                raise ValueError("All classes should be integers from the "
                                 "range [0, {}], but got the following "
                                 "classes instead {}".format(
                                    self.n_classes - 1, target_classes))

            weights = []
            iterator = zip(target_classes, self.prototypes_per_class)
            for target_class, n_prototypes in iterator:
                is_valid_class = (target_train[:, 0] == target_class)
                is_valid_class = is_valid_class.astype('float64')
                n_samples_per_class = sum(is_valid_class)
                is_valid_class /= n_samples_per_class

                if n_samples_per_class <= n_prototypes:
                    raise ValueError("Input data has {0} samples for class-{1}"
                                     ". Number of samples per specified "
                                     "class-{1} should be greater than {2}."
                                     "".format(n_samples_per_class,
                                               target_class, n_prototypes))

                class_weight_indeces = np.random.choice(
                    np.arange(n_input_samples), n_prototypes,
                    replace=False, p=is_valid_class)

                class_weight = input_train[class_weight_indeces]
                weights.extend(class_weight)

            self.weight = np.array(weights)
            self.initialized = True

        super(LVQ, self).train(input_train, target_train, *args, **kwargs)

    def train_epoch(self, input_train, target_train):
        weight = self.weight
        subclass_to_class = self.subclass_to_class

        n_correct_predictions = 0
        for input_row, target in zip(input_train, target_train):
            step = self.training_step
            output = euclid_distance(input_row, weight)
            winner_subclass = int(output.argmin())
            predicted_class = subclass_to_class[winner_subclass]

            weight_update = input_row - weight[winner_subclass, :]
            is_correct_prediction = (predicted_class == target)

            if is_correct_prediction:
                weight[winner_subclass, :] += step * weight_update
            else:
                weight[winner_subclass, :] -= step * weight_update

            n_correct_predictions += is_correct_prediction
            self.n_updates += 1

        n_samples = len(input_train)
        return 1 - n_correct_predictions / n_samples
Example #22
0
class GRU(BaseRNNLayer):
    """
    Gated Recurrent Unit (GRU) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    input_weights : Initializer, ndarray
        Weight parameters for input connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    hidden_weights : Initializer, ndarray
        Weight parameters for hidden connection.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    bias : Initializer, ndarray
        Bias parameters for all gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import tensorflow as tf
            dict(
                resetgate=tf.nn.sigmoid,
                updategate=tf.nn.sigmoid,
                hidden_update=tf.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(resetgate=tf.tanh)

        Other parameters like ``updategate`` or ``hidden_update``
        will be equal to their default values.

    learn_init : bool
        If ``True``, make ``hidden_init`` trainable variable.
        Defaults to ``False``.

    hidden_init : array-like, Tensorfow variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    {BaseRNNLayer.only_return_final}

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.GRU(20),
                layers.Sigmoid(1),
            ]
        )
    """
    input_weights = ParameterProperty(default=init.HeNormal())
    hidden_weights = ParameterProperty(default=init.HeNormal())
    biases = ParameterProperty(default=init.Constant(0))

    activation_functions = MultiCallableProperty(default=dict(
        resetgate=tf.nn.sigmoid,
        updategate=tf.nn.sigmoid,
        hidden_update=tf.tanh,
    ))

    learn_init = Property(default=False, expected_type=bool)
    hidden_init = ParameterProperty(default=init.Constant(0))

    backwards = Property(default=False, expected_type=bool)
    unroll_scan = Property(default=False, expected_type=bool)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(GRU, self).initialize()
        n_inputs = np.prod(self.input_shape[1:])

        self.input_weights = self.add_parameter(
            value=self.input_weights,
            name='input_weights',
            shape=(n_inputs, 3 * self.size),
        )
        self.hidden_weights = self.add_parameter(
            value=self.hidden_weights,
            name='hidden_weights',
            shape=(self.size, 3 * self.size),
        )
        self.biases = self.add_parameter(
            value=self.biases,
            name='biases',
            shape=(3 * self.size, ),
        )

        self.add_parameter(value=self.hidden_init,
                           shape=(1, self.size),
                           name="hidden_init",
                           trainable=self.learn_init)

    def output(self, input_value):
        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = tf.transpose(input_value, [1, 0, 2])
        input_shape = tf.shape(input_value)
        n_batch = input_shape[1]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def one_gru_step(states, input_n):
            with tf.name_scope('gru-cell'):
                hid_previous, = states
                input_n = tf.matmul(input_n, self.input_weights) + self.biases

                # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1},
                # and W_{hc} h_{t - 1}
                hid_input = tf.matmul(hid_previous, self.hidden_weights)

                if self.gradient_clipping != 0:
                    input_n = clip_gradient(input_n, self.gradient_clipping)
                    hid_input = clip_gradient(hid_input,
                                              self.gradient_clipping)

                hid_resetgate, hid_updategate, hid_hidden = tf.split(hid_input,
                                                                     3,
                                                                     axis=1)

                in_resetgate, in_updategate, in_hidden = tf.split(input_n,
                                                                  3,
                                                                  axis=1)

                # Reset and update gates
                resetgate = self.activation_functions.resetgate(hid_resetgate +
                                                                in_resetgate)

                updategate = self.activation_functions.updategate(
                    hid_updategate + in_updategate)

                # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
                hidden_update = in_hidden + resetgate * hid_hidden

                if self.gradient_clipping != 0:
                    hidden_update = clip_gradient(hidden_update,
                                                  self.gradient_clipping)

                hidden_update = self.activation_functions.hidden_update(
                    hidden_update)

                # Compute (1 - u_t)h_{t - 1} + u_t c_t
                return [
                    hid_previous - updategate * (hid_previous - hidden_update)
                ]

        hidden_init = tf.tile(self.hidden_init, (n_batch, 1))
        sequence = input_value

        if self.backwards:
            sequence = tf.reverse(sequence, axis=[0])

        if self.unroll_scan:
            # Explicitly unroll the recurrence instead of using scan
            hid_out = unroll_scan(fn=one_gru_step,
                                  sequence=sequence,
                                  outputs_info=[hidden_init])
        else:
            hid_out, = tf.scan(
                fn=one_gru_step,
                elems=input_value,
                initializer=[hidden_init],
                name='gru-scan',
            )

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = tf.reverse(hid_out, axis=[0])

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = tf.transpose(hid_out, [1, 0, 2])
        return hid_out
Example #23
0
class BaseOptimizer(BaseNetwork):
    """
    Gradient descent algorithm.

    Parameters
    ----------
    network : list, tuple or LayerConnection instance
        Network's architecture. There are a few ways
        to define it.

        - List of layers.
          For instance, ``[Input(2), Tanh(4), Relu(1)]``.

        - Constructed layers.
          For instance, ``Input(2) >> Tanh(4) >> Relu(1)``.

    regularizer : function or None
        Network's regularizer.

    loss : str or function
        Error/loss function. Defaults to ``mse``.

        - ``mae`` - Mean Absolute Error.

        - ``mse`` - Mean Squared Error.

        - ``rmse`` - Root Mean Squared Error.

        - ``msle`` - Mean Squared Logarithmic Error.

        - ``rmsle`` - Root Mean Squared Logarithmic Error.

        - ``categorical_crossentropy`` - Categorical cross entropy.

        - ``binary_crossentropy`` - Binary cross entropy.

        - ``binary_hinge`` - Binary hinge entropy.

        - ``categorical_hinge`` - Categorical hinge entropy.

        - Custom function which accepts two mandatory arguments.
          The first one is expected value and the second one is
          predicted value. Example:

        .. code-block:: python

            def custom_func(expected, predicted):
                return expected - predicted

    step : float, Variable
        Learning rate, defaults to ``0.1``.

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.signals}

    {BaseNetwork.verbose}

    Attributes
    ----------
    {BaseNetwork.Attributes}

    Methods
    -------
    {BaseSkeleton.predict}

    train(X_train, y_train, X_test=None, y_test=None, epochs=100)
        Train network. You can control network's training procedure
        with ``epochs`` parameter. The ``X_test`` and ``y_test`` should
        be presented both in case network's validation required
        after each training epoch.

    {BaseSkeleton.fit}
    """
    step = ScalarVariableProperty(default=0.1)
    target = Property(default=None, allow_none=True)
    regularizer = Property(default=None, allow_none=True)
    loss = FunctionWithOptionsProperty(default='mse',
                                       choices={
                                           'mae':
                                           objectives.mae,
                                           'mse':
                                           objectives.mse,
                                           'rmse':
                                           objectives.rmse,
                                           'msle':
                                           objectives.msle,
                                           'rmsle':
                                           objectives.rmsle,
                                           'binary_crossentropy':
                                           objectives.binary_crossentropy,
                                           'categorical_crossentropy':
                                           objectives.categorical_crossentropy,
                                           'binary_hinge':
                                           objectives.binary_hinge,
                                           'categorical_hinge':
                                           objectives.categorical_hinge,
                                       })

    def __init__(self, network, options=None, **kwargs):
        options = options or kwargs

        if isinstance(network, (list, tuple)):
            network = layers.join(*network)

        self.network = network

        if len(self.network.output_layers) != 1:
            n_outputs = len(network.output_layers)

            raise InvalidConnection("Connection should have one output "
                                    "layer, got {}".format(n_outputs))

        target = options.get('target')
        if target is not None and isinstance(target, (list, tuple)):
            options['target'] = tf.placeholder(tf.float32, shape=target)

        self.target = self.network.targets
        super(BaseOptimizer, self).__init__(**options)

        start_init_time = time.time()
        self.logs.message("TENSORFLOW",
                          "Initializing Tensorflow variables and functions.")

        self.variables = AttributeKeyDict()
        self.functions = AttributeKeyDict()
        self.network.outputs
        self.init_functions()

        self.logs.message(
            "TENSORFLOW",
            "Initialization finished successfully. It took {:.2f} seconds"
            "".format(time.time() - start_init_time))

    def init_train_updates(self):
        raise NotImplementedError()

    def init_functions(self):
        loss = self.loss(self.target, self.network.outputs)
        val_loss = self.loss(self.target, self.network.training_outputs)

        if self.regularizer is not None:
            loss += self.regularizer(self.network)

        self.variables.update(
            step=self.step,
            loss=loss,
            val_loss=val_loss,
        )

        with tf.name_scope('training-updates'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            with tf.control_dependencies(update_ops):
                training_updates = self.init_train_updates()
                training_updates.extend(update_ops)

        tf_utils.initialize_uninitialized_variables()

        self.functions.update(
            predict=tf_utils.function(inputs=as_tuple(self.network.inputs),
                                      outputs=self.network.outputs,
                                      name='optimizer/predict'),
            one_training_update=tf_utils.function(
                inputs=as_tuple(self.network.inputs, self.target),
                outputs=loss,
                updates=training_updates,
                name='optimizer/one-update-step'),
            score=tf_utils.function(inputs=as_tuple(self.network.inputs,
                                                    self.target),
                                    outputs=val_loss,
                                    name='optimizer/score'))

    def format_input(self, X):
        X = as_tuple(X)
        X_formatted = []

        if len(X) != len(self.network.input_layers):
            raise ValueError("Number of inputs doesn't match number "
                             "of input layers in the network.")

        for input, input_layer in zip(X, self.network.input_layers):
            input_shape = tf.TensorShape(input_layer.input_shape)
            is_feature1d = (input_shape.ndims == 2 and input_shape[1] == 1)
            formatted_input = format_data(input, is_feature1d=is_feature1d)

            if (formatted_input.ndim + 1) == input_shape.ndims:
                # We assume that when one dimension was missed than user
                # wants to propagate single sample through the network
                formatted_input = np.expand_dims(formatted_input, axis=0)

            X_formatted.append(formatted_input)

        return X_formatted

    def format_target(self, y):
        output_shape = tf.TensorShape(self.network.output_shape)
        is_feature1d = (output_shape.ndims == 2 and output_shape[1] == 1)
        formatted_target = format_data(y, is_feature1d=is_feature1d)

        if (formatted_target.ndim + 1) == len(output_shape):
            # We assume that when one dimension was missed than user
            # wants to propagate single sample through the network
            formatted_target = np.expand_dims(formatted_target, axis=0)

        return formatted_target

    def score(self, X, y):
        """
        Calculate prediction accuracy for input data.

        Parameters
        ----------
        X : array-like
        y : array-like

        Returns
        -------
        float
            Prediction error.
        """
        X = self.format_input(X)
        y = self.format_target(y)
        return self.functions.score(*as_tuple(X, y))

    def predict(self, *X, **kwargs):
        """
        Makes a raw prediction.

        Parameters
        ----------
        X : array-like

        Returns
        -------
        array-like
        """
        default_batch_size = getattr(self, 'batch_size', None)
        predict_kwargs = dict(
            batch_size=kwargs.pop('batch_size', default_batch_size),
            verbose=self.verbose,
        )

        # We require do to this check for python 2 compatibility
        if kwargs:
            raise TypeError("Unknown arguments: {}".format(kwargs))

        return self.network.predict(*self.format_input(X), **predict_kwargs)

    def train(self,
              X_train,
              y_train,
              X_test=None,
              y_test=None,
              *args,
              **kwargs):

        is_test_data_partialy_missing = (
            (X_test is None and y_test is not None)
            or (X_test is not None and y_test is None))

        if is_test_data_partialy_missing:
            raise ValueError("Input or target test samples are missed. They "
                             "must be defined together or none of them.")

        X_train = self.format_input(X_train)
        y_train = self.format_target(y_train)

        if X_test is not None:
            X_test = self.format_input(X_test)
            y_test = self.format_target(y_test)

        return super(BaseOptimizer, self).train(X_train=X_train,
                                                y_train=y_train,
                                                X_test=X_test,
                                                y_test=y_test,
                                                *args,
                                                **kwargs)

    def one_training_update(self, X_train, y_train):
        return self.functions.one_training_update(*as_tuple(X_train, y_train))

    def get_params(self, deep=False, with_network=True):
        params = super(BaseOptimizer, self).get_params()
        if with_network:
            params['network'] = self.network
        return params

    def __reduce__(self):
        parameters = self.get_params(with_network=False)

        # We only need to know placeholders shape
        # in order to be able to reconstruct it
        parameters['target'] = tf_utils.shape_to_tuple(
            parameters['target'].shape)

        args = (self.network, parameters)
        return (self.__class__, args)

    def __repr__(self):
        return "{}({}, {})".format(self.__class__.__name__, self.network,
                                   self.repr_options())
Example #24
0
class BaseNetwork(BaseSkeleton):
    """ Base class for Neural Network algorithms.

    Parameters
    ----------
    step : float
        Learning rate, defaults to ``0.1``.
    show_epoch : int or str
        This property controls how often the network will display information
        about training. There are two main syntaxes for this property.
        You can describe it as positive integer number and it
        will describe how offen would you like to see summary output in
        terminal. For instance, number `100` mean that network will show you
        summary in 100, 200, 300 ... epochs. String value should be in a
        specific format. It should contain the number of times that the output
        will be displayed in the terminal. The second part is just
        a syntax word ``time`` or ``times`` just to make text readable.
        For instance, value ``'2 times'`` mean that the network will show
        output twice with approximately equal period of epochs and one
        additional output would be after the finall epoch.
        Defaults to ``1``.
    shuffle_data : bool
        If it's ``True`` class shuffles all your training data before
        training your network, defaults to ``True``.
    epoch_end_signal : function
        Calls this function when train epoch finishes.
    train_end_signal : function
        Calls this function when train process finishes.
    {Verbose.verbose}

    Attributes
    ----------
    errors : ErrorHistoryList
        Contains list of training errors. This object has the same
        properties as list and in addition there are three additional
        useful methods: `last`, `previous` and `normalized`.
    train_errors : ErrorHistoryList
        Alias to `errors` attribute.
    validation_errors : ErrorHistoryList
        The same as `errors` attribute, but it contains only validation
        errors.
    last_epoch : int
        Value equals to the last trained epoch. After initialization
        it is equal to ``0``.
    """
    step = NumberProperty(default=0.1, minval=0)

    show_epoch = ShowEpochProperty(minval=1, default=1)
    shuffle_data = Property(default=False, expected_type=bool)

    epoch_end_signal = Property(expected_type=types.FunctionType)
    train_end_signal = Property(expected_type=types.FunctionType)

    def __init__(self, *args, **options):
        self.errors = self.train_errors = ErrorHistoryList()
        self.validation_errors = ErrorHistoryList()
        self.training = AttributeKeyDict()
        self.last_epoch = 0

        super(BaseNetwork, self).__init__(*args, **options)
        self.init_properties()

        if self.verbose:
            show_network_options(self, highlight_options=options)

    def init_properties(self):
        """ Setup default values before populate the options.
        """

    def predict(self, input_data):
        """ Return prediction results for the input data. Output result
        includes post-processing step related to the final layer that
        transforms output to convenient format for end-use.

        Parameters
        ----------
        input_data : array-like

        Returns
        -------
        array-like
        """

    def on_epoch_start_update(self, epoch):
        """ Function would be trigger before run all training procedure
        related to the current epoch.

        Parameters
        ----------
        epoch : int
            Current epoch number.
        """
        self.last_epoch = epoch

    def train_epoch(self, input_train, target_train=None):
        raise NotImplementedError()

    def prediction_error(self, input_test, target_test):
        raise NotImplementedError()

    def train(self, input_train, target_train=None, input_test=None,
              target_test=None, epochs=100, epsilon=None,
              summary_type='table'):
        """ Method train neural network.

        Parameters
        ----------
        input_train : array-like
        target_train : array-like or Npne
        input_test : array-like or None
        target_test : array-like or None
        epochs : int
            Defaults to `100`.
        epsilon : float or None
            Defaults to ``None``.
        """

        show_epoch = self.show_epoch
        logs = self.logs
        training = self.training = AttributeKeyDict()

        if epochs <= 0:
            raise ValueError("Number of epochs needs to be greater than 0.")

        if epsilon is not None and epochs <= 2:
            raise ValueError("Network should train at teast 3 epochs before "
                             "check the difference between errors")

        if summary_type == 'table':
            logging_info_about_the_data(self, input_train, input_test)
            logging_info_about_training(self, epochs, epsilon)
            logs.newline()

            summary = SummaryTable(
                table_builder=table.TableBuilder(
                    table.Column(name="Epoch #"),
                    table.NumberColumn(name="Train err"),
                    table.NumberColumn(name="Valid err"),
                    table.TimeColumn(name="Time", width=10),
                    stdout=logs.write
                ),
                network=self,
                delay_limit=1.,
                delay_history_length=10,
            )

        elif summary_type == 'inline':
            summary = InlineSummary(network=self)

        else:
            raise ValueError("`{}` is unknown summary type"
                             "".format(summary_type))

        iterepochs = create_training_epochs_iterator(self, epochs, epsilon)
        show_epoch = parse_show_epoch_property(self, epochs, epsilon)
        training.show_epoch = show_epoch

        # Storring attributes and methods in local variables we prevent
        # useless __getattr__ call a lot of times in each loop.
        # This variables speed up loop in case on huge amount of
        # iterations.
        training_errors = self.errors
        validation_errors = self.validation_errors
        shuffle_data = self.shuffle_data

        train_epoch = self.train_epoch
        epoch_end_signal = self.epoch_end_signal
        train_end_signal = self.train_end_signal
        on_epoch_start_update = self.on_epoch_start_update

        is_first_iteration = True
        can_compute_validation_error = (input_test is not None)
        last_epoch_shown = 0

        with logs.disable_user_input():
            for epoch in iterepochs:
                validation_error = np.nan
                epoch_start_time = time.time()
                on_epoch_start_update(epoch)

                if shuffle_data:
                    input_train, target_train = shuffle(input_train,
                                                        target_train)
                try:
                    train_error = train_epoch(input_train, target_train)

                    if can_compute_validation_error:
                        validation_error = self.prediction_error(input_test,
                                                                 target_test)

                    training_errors.append(train_error)
                    validation_errors.append(validation_error)

                    epoch_finish_time = time.time()
                    training.epoch_time = epoch_finish_time - epoch_start_time

                    if epoch % training.show_epoch == 0 or is_first_iteration:
                        summary.show_last()
                        last_epoch_shown = epoch

                    if epoch_end_signal is not None:
                        epoch_end_signal(self)

                    is_first_iteration = False

                except StopNetworkTraining as err:
                    # TODO: This notification breaks table view in terminal.
                    # I need to show it in a different way.
                    logs.message("TRAIN", "Epoch #{} stopped. {}"
                                          "".format(epoch, str(err)))
                    break

            if epoch != last_epoch_shown:
                summary.show_last()

            if train_end_signal is not None:
                train_end_signal(self)

            summary.finish()
            logs.newline()

        logs.message("TRAIN", "Trainig finished")
Example #25
0
 class A(Configurable):
     property_a = Property()
Example #26
0
 def test_property_get_method(self):
     prop = Property(default=3)
     self.assertEqual(None, prop.__get__(None, None))
Example #27
0
 class C(B):
     property_c = Property()
Example #28
0
class LSTM(BaseRNNLayer):
    """
    Long Short Term Memory (LSTM) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    weights : dict or Initializer
        Weight parameters for different gates.
        Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`.

        - In case if application requires the same initialization method
          for all weights, then it's possible to specify initialization
          method that would be automaticaly applied to all weight
          parameters in the LSTM layer.

          .. code-block:: python

              layers.LSTM(2, weights=init.Normal(0.1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  weight_in_to_ingate=init.XavierUniform(),
                  weight_hid_to_ingate=init.XavierUniform(),
                  weight_cell_to_ingate=init.XavierUniform(),

                  weight_in_to_forgetgate=init.XavierUniform(),
                  weight_hid_to_forgetgate=init.XavierUniform(),
                  weight_cell_to_forgetgate=init.XavierUniform(),

                  weight_in_to_outgate=init.XavierUniform(),
                  weight_hid_to_outgate=init.XavierUniform(),
                  weight_cell_to_outgate=init.XavierUniform(),

                  weight_in_to_cell=init.XavierUniform(),
                  weight_hid_to_cell=init.XavierUniform(),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(weight_in_to_ingate=init.Normal(0.1))

          Other parameters like ``weight_cell_to_outgate`` will be
          equal to their default values.

    biases : dict or Initializer
        Bias parameters for different gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

        - In case if application requires the same initialization method
          for all biases, then it's possible to specify initialization
          method that would be automaticaly applied to all bias parameters
          in the LSTM layer.

          .. code-block:: python

              layers.LSTM(2, biases=init.Constant(1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  bias_ingate=init.Constant(0),
                  bias_forgetgate=init.Constant(0),
                  bias_cell=init.Constant(0),
                  bias_outgate=init.Constant(0),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(bias_ingate=init.Constant(1))

          Other parameters like ``bias_cell`` will be
          equal to their default values.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import theano.tensor as T
            dict(
                ingate=T.nnet.sigmoid,
                forgetgate=T.nnet.sigmoid,
                outgate=T.nnet.sigmoid,
                cell=T.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(ingate=T.tanh)

        Other parameters like ``forgetgate`` or ``outgate`` will be
        equal to their default values.

    learn_init : bool
        If ``True``, make ``cell_init`` and ``hid_init`` trainable
        variables. Defaults to ``False``.

    cell_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial cell state (:math:`c_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    hid_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``

    {BaseRNNLayer.only_return_final}

    precompute_input : bool
        if ``True``, precompute ``input_to_hid`` before iterating
        through the sequence. This can result in a speed up at the
        expense of an increase in memory usage.
        Defaults to ``True``.

    peepholes : bool
        If ``True``, the LSTM uses peephole connections.
        When ``False``, cell parameters  are ignored.
        Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    gradient_clipping : flaot or int
        If nonzero, the gradient messages are clipped to the
        given value during the backward pass. Defaults to ``0``.

    n_gradient_steps : int
        Number of timesteps to include in the backpropagated gradient.
        If ``-1``, backpropagate through the entire sequence.
        Defaults to ``-1``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.LSTM(20),
                layers.Sigmoid(1),
            ]
        )
    """
    weights = MultiParameterProperty(
        default=dict(
            weight_in_to_ingate=init.XavierUniform(),
            weight_hid_to_ingate=init.XavierUniform(),
            weight_cell_to_ingate=init.XavierUniform(),

            weight_in_to_forgetgate=init.XavierUniform(),
            weight_hid_to_forgetgate=init.XavierUniform(),
            weight_cell_to_forgetgate=init.XavierUniform(),

            weight_in_to_outgate=init.XavierUniform(),
            weight_hid_to_outgate=init.XavierUniform(),
            weight_cell_to_outgate=init.XavierUniform(),

            weight_in_to_cell=init.XavierUniform(),
            weight_hid_to_cell=init.XavierUniform(),
        ))
    biases = MultiParameterProperty(
        default=dict(
            bias_ingate=init.Constant(0),
            bias_forgetgate=init.Constant(0),
            bias_cell=init.Constant(0),
            bias_outgate=init.Constant(0),
        ))
    activation_functions = MultiCallableProperty(
        default=dict(
            ingate=T.nnet.sigmoid,
            forgetgate=T.nnet.sigmoid,
            outgate=T.nnet.sigmoid,
            cell=T.tanh,
        ))

    learn_init = Property(default=False, expected_type=bool)
    cell_init = ParameterProperty(default=init.Constant(0))
    hid_init = ParameterProperty(default=init.Constant(0))

    unroll_scan = Property(default=False, expected_type=bool)
    backwards = Property(default=False, expected_type=bool)
    precompute_input = Property(default=True, expected_type=bool)
    peepholes = Property(default=False, expected_type=bool)

    n_gradient_steps = IntProperty(default=-1)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(LSTM, self).initialize()

        n_inputs = np.prod(self.input_shape[1:])
        weights = self.weights
        biases = self.biases

        # Input gate parameters
        self.weight_in_to_ingate = self.add_parameter(
            value=weights.weight_in_to_ingate,
            name='weight_in_to_ingate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_ingate = self.add_parameter(
            value=weights.weight_hid_to_ingate,
            name='weight_hid_to_ingate',
            shape=(self.size, self.size))
        self.bias_ingate = self.add_parameter(
            value=biases.bias_ingate, name='bias_ingate',
            shape=(self.size,))

        # Forget gate parameters
        self.weight_in_to_forgetgate = self.add_parameter(
            value=weights.weight_in_to_forgetgate,
            name='weight_in_to_forgetgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_forgetgate = self.add_parameter(
            value=weights.weight_hid_to_forgetgate,
            name='weight_hid_to_forgetgate',
            shape=(self.size, self.size))
        self.bias_forgetgate = self.add_parameter(
            value=biases.bias_forgetgate, name='bias_forgetgate',
            shape=(self.size,))

        # Cell parameters
        self.weight_in_to_cell = self.add_parameter(
            value=weights.weight_in_to_cell,
            name='weight_in_to_cell',
            shape=(n_inputs, self.size))
        self.weight_hid_to_cell = self.add_parameter(
            value=weights.weight_hid_to_cell,
            name='weight_hid_to_cell',
            shape=(self.size, self.size))
        self.bias_cell = self.add_parameter(
            value=biases.bias_cell, name='bias_cell',
            shape=(self.size,))

        # If peephole (cell to gate) connections were enabled, initialize
        # peephole connections.  These are elementwise products with the cell
        # state, so they are represented as vectors.
        if self.peepholes:
            self.weight_cell_to_ingate = self.add_parameter(
                value=weights.weight_cell_to_ingate,
                name='weight_cell_to_ingate',
                shape=(self.size,))
            self.weight_cell_to_forgetgate = self.add_parameter(
                value=weights.weight_cell_to_forgetgate,
                name='weight_cell_to_forgetgate',
                shape=(self.size,))
            self.weight_cell_to_outgate = self.add_parameter(
                value=weights.weight_cell_to_outgate,
                name='weight_cell_to_outgate',
                shape=(self.size,))

        # Output gate parameters
        self.weight_in_to_outgate = self.add_parameter(
            value=weights.weight_in_to_outgate,
            name='weight_in_to_outgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_outgate = self.add_parameter(
            value=weights.weight_hid_to_outgate,
            name='weight_hid_to_outgate',
            shape=(self.size, self.size))
        self.bias_outgate = self.add_parameter(
            value=biases.bias_outgate, name='bias_outgate',
            shape=(self.size,))

        # Initialization parameters
        self.add_parameter(value=self.cell_init, shape=(1, self.size),
                           name="cell_init", trainable=self.learn_init)
        self.add_parameter(value=self.hid_init, shape=(1, self.size),
                           name="hid_init", trainable=self.learn_init)

    def output(self, input_value):
        # Treat all dimensions after the second as flattened
        # feature dimensions
        if input_value.ndim > 3:
            input_value = T.flatten(input_value, 3)

        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = input_value.dimshuffle(1, 0, 2)
        seq_len, n_batch, _ = input_value.shape

        # Stack input weight matrices into a (num_inputs, 4 * num_units)
        # matrix, which speeds up computation
        weight_in_stacked = T.concatenate([
            self.weight_in_to_ingate,
            self.weight_in_to_forgetgate,
            self.weight_in_to_cell,
            self.weight_in_to_outgate], axis=1)

        # Same for hidden weight matrices
        weight_hid_stacked = T.concatenate([
            self.weight_hid_to_ingate,
            self.weight_hid_to_forgetgate,
            self.weight_hid_to_cell,
            self.weight_hid_to_outgate], axis=1)

        # Stack biases into a (4 * num_units) vector
        bias_stacked = T.concatenate([
            self.bias_ingate,
            self.bias_forgetgate,
            self.bias_cell,
            self.bias_outgate], axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # weight_in_stacked is (n_features, 4 * num_units).
            # Input: (n_time_steps, n_batch, 4 * num_units).
            input_value = T.dot(input_value, weight_in_stacked) + bias_stacked

        # When theano.scan calls step, input_n will be
        # (n_batch, 4 * num_units). We define a slicing function
        # that extract the input to each LSTM gate
        def slice_w(x, n):
            return x[:, n * self.size:(n + 1) * self.size]

        def one_lstm_step(input_n, cell_previous, hid_previous, *args):
            if not self.precompute_input:
                input_n = T.dot(input_n, weight_in_stacked) + bias_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, weight_hid_stacked)

            # Clip gradients
            if self.gradient_clipping:
                gates = theano.gradient.grad_clip(
                    gates, -self.gradient_clipping, self.gradient_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.weight_cell_to_ingate
                forgetgate += cell_previous * self.weight_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.activation_functions.ingate(ingate)
            forgetgate = self.activation_functions.forgetgate(forgetgate)
            cell_input = self.activation_functions.cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.weight_cell_to_outgate

            outgate = self.activation_functions.outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * T.tanh(cell)
            return [cell, hid]

        ones = T.ones((n_batch, 1))
        cell_init = T.dot(ones, self.cell_init)
        hid_init = T.dot(ones, self.hid_init)

        non_sequences = [weight_hid_stacked]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_sequences += [weight_in_stacked, bias_stacked]

        # The "peephole" weight matrices are only used
        # when self.peepholes=True
        if self.peepholes:
            non_sequences += [self.weight_cell_to_ingate,
                              self.weight_cell_to_forgetgate,
                              self.weight_cell_to_outgate]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            n_time_steps = self.input_shape[0]

            # Explicitly unroll the recurrence instead of using scan
            _, hid_out = unroll_scan(
                fn=one_lstm_step,
                sequences=[input_value],
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                n_steps=n_time_steps)

        else:
            (_, hid_out), _ = theano.scan(
                fn=one_lstm_step,
                sequences=input_value,
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                truncate_gradient=self.n_gradient_steps,
                non_sequences=non_sequences,
                strict=True)

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = hid_out.dimshuffle(1, 0, 2)

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = hid_out[:, ::-1]

        return hid_out
Example #29
0
 class A(Configurable):
     correct_property = Property()
Example #30
0
class GRU(BaseRNNLayer):
    """
    Gated Recurrent Unit (GRU) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    weights : dict or Initializer
        Weight parameters for different gates.
        Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`.

        - In case if application requires the same initialization method
          for all weights, then it's possible to specify initialization
          method that would be automaticaly applied to all weight
          parameters in the GRU layer.

          .. code-block:: python

              layers.GRU(2, weights=init.Normal(0.1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  weight_in_to_updategate=init.XavierUniform(),
                  weight_hid_to_updategate=init.XavierUniform(),

                  weight_in_to_resetgate=init.XavierUniform(),
                  weight_hid_to_resetgate=init.XavierUniform(),

                  weight_in_to_hidden_update=init.XavierUniform(),
                  weight_hid_to_hidden_update=init.XavierUniform(),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(weight_in_to_updategate=init.Normal(0.1))

          Other parameters like ``weight_in_to_resetgate`` will be
          equal to their default values.

    biases : dict or Initializer
        Bias parameters for different gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

        - In case if application requires the same initialization method
          for all biases, then it's possible to specify initialization
          method that would be automaticaly applied to all bias parameters
          in the GRU layer.

          .. code-block:: python

              layers.GRU(2, biases=init.Constant(1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  bias_updategate=init.Constant(0),
                  bias_resetgate=init.Constant(0),
                  bias_hidden_update=init.Constant(0),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(bias_resetgate=init.Constant(1))

          Other parameters like ``bias_updategate`` will be
          equal to their default values.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import theano.tensor as T
            dict(
                resetgate=T.nnet.sigmoid,
                updategate=T.nnet.sigmoid,
                hidden_update=T.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(resetgate=T.tanh)

        Other parameters like ``updategate`` or ``hidden_update``
        will be equal to their default values.

    learn_init : bool
        If ``True``, make ``hid_init`` trainable variable.
        Defaults to ``False``.

    hid_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    {BaseRNNLayer.only_return_final}

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``.

    precompute_input : bool
        if ``True``, precompute ``input_to_hid`` before iterating
        through the sequence. This can result in a speed up at the
        expense of an increase in memory usage.
        Defaults to ``True``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.GRU(20),
                layers.Sigmoid(1),
            ]
        )
    """
    weights = MultiParameterProperty(
        default=dict(
            weight_in_to_updategate=init.XavierUniform(),
            weight_hid_to_updategate=init.XavierUniform(),

            weight_in_to_resetgate=init.XavierUniform(),
            weight_hid_to_resetgate=init.XavierUniform(),

            weight_in_to_hidden_update=init.XavierUniform(),
            weight_hid_to_hidden_update=init.XavierUniform(),
        ))
    biases = MultiParameterProperty(
        default=dict(
            bias_updategate=init.Constant(0),
            bias_resetgate=init.Constant(0),
            bias_hidden_update=init.Constant(0),
        ))
    activation_functions = MultiCallableProperty(
        default=dict(
            resetgate=T.nnet.sigmoid,
            updategate=T.nnet.sigmoid,
            hidden_update=T.tanh,
        ))

    learn_init = Property(default=False, expected_type=bool)
    hid_init = ParameterProperty(default=init.Constant(0))

    backwards = Property(default=False, expected_type=bool)
    unroll_scan = Property(default=False, expected_type=bool)
    precompute_input = Property(default=True, expected_type=bool)

    n_gradient_steps = IntProperty(default=-1)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(GRU, self).initialize()

        n_inputs = np.prod(self.input_shape[1:])
        weights = self.weights
        biases = self.biases

        # Update gate parameters
        self.weight_in_to_updategate = self.add_parameter(
            value=weights.weight_in_to_updategate,
            name='weight_in_to_updategate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_updategate = self.add_parameter(
            value=weights.weight_hid_to_updategate,
            name='weight_hid_to_updategate',
            shape=(self.size, self.size))
        self.bias_updategate = self.add_parameter(
            value=biases.bias_updategate, name='bias_updategate',
            shape=(self.size,))

        # Reset gate parameters
        self.weight_in_to_resetgate = self.add_parameter(
            value=weights.weight_in_to_resetgate,
            name='weight_in_to_resetgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_resetgate = self.add_parameter(
            value=weights.weight_hid_to_resetgate,
            name='weight_hid_to_resetgate',
            shape=(self.size, self.size))
        self.bias_resetgate = self.add_parameter(
            value=biases.bias_resetgate, name='bias_forgetgate',
            shape=(self.size,))

        # Hidden update gate parameters
        self.weight_in_to_hidden_update = self.add_parameter(
            value=weights.weight_in_to_hidden_update,
            name='weight_in_to_hidden_update',
            shape=(n_inputs, self.size))
        self.weight_hid_to_hidden_update = self.add_parameter(
            value=weights.weight_hid_to_hidden_update,
            name='weight_hid_to_hidden_update',
            shape=(self.size, self.size))
        self.bias_hidden_update = self.add_parameter(
            value=biases.bias_hidden_update, name='bias_hidden_update',
            shape=(self.size,))

        self.add_parameter(value=self.hid_init, shape=(1, self.size),
                           name="hid_init", trainable=self.learn_init)

    def output(self, input_value):
        # Treat all dimensions after the second as flattened
        # feature dimensions
        if input_value.ndim > 3:
            input_value = T.flatten(input_value, 3)

        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = input_value.dimshuffle(1, 0, 2)
        seq_len, n_batch, _ = input_value.shape

        # Stack input weight matrices into a (num_inputs, 3 * num_units)
        # matrix, which speeds up computation
        weight_in_stacked = T.concatenate([
            self.weight_in_to_updategate,
            self.weight_in_to_resetgate,
            self.weight_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        weight_hid_stacked = T.concatenate([
            self.weight_hid_to_updategate,
            self.weight_hid_to_resetgate,
            self.weight_hid_to_hidden_update], axis=1)

        # Stack biases into a (3 * num_units) vector
        bias_stacked = T.concatenate([
            self.bias_updategate,
            self.bias_resetgate,
            self.bias_hidden_update], axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # weight_in_stacked is (n_features, 3 * num_units).
            # Input: (n_time_steps, n_batch, 3 * num_units).
            input_value = T.dot(input_value, weight_in_stacked) + bias_stacked

        # When theano.scan calls step, input_n will be
        # (n_batch, 3 * num_units). We define a slicing function
        # that extract the input to each GRU gate
        def slice_w(x, n):
            s = x[:, n * self.size:(n + 1) * self.size]
            if self.size == 1:
                s = T.addbroadcast(s, 1)  # Theano cannot infer this by itself
            return s

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def one_gru_step(input_n, hid_previous, *args):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1},
            # and W_{hc} h_{t - 1}
            hid_input = T.dot(hid_previous, weight_hid_stacked)

            if self.gradient_clipping:
                input_n = theano.gradient.grad_clip(
                    input_n,
                    -self.gradient_clipping,
                    self.gradient_clipping)

                hid_input = theano.gradient.grad_clip(
                    hid_input,
                    -self.gradient_clipping,
                    self.gradient_clipping)

            if not self.precompute_input:
                # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u,
                # and W_{xc}x_t + b_c
                input_n = T.dot(input_n, weight_in_stacked) + bias_stacked

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            resetgate = self.activation_functions.resetgate(resetgate)

            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            updategate = self.activation_functions.updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate * hidden_update_hid

            if self.gradient_clipping:
                hidden_update = theano.gradient.grad_clip(
                    hidden_update,
                    -self.gradient_clipping,
                    self.gradient_clipping)

            hidden_update = self.activation_functions.hidden_update(
                hidden_update)

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate) * hid_previous + updategate * hidden_update
            return hid

        hid_init = T.dot(T.ones((n_batch, 1)), self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_sequences = [weight_hid_stacked]

        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_sequences += [weight_in_stacked, bias_stacked]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            n_time_steps = self.input_shape[0]

            # Explicitly unroll the recurrence instead of using scan
            hid_out, = unroll_scan(
                fn=one_gru_step,
                sequences=[input_value],
                outputs_info=[hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                n_steps=n_time_steps)

        else:
            # Scan op iterates over first dimension of input and
            # repeatedly applies the step function
            hid_out, _ = theano.scan(
                fn=one_gru_step,
                sequences=[input_value],
                outputs_info=[hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                truncate_gradient=self.n_gradient_steps,
                strict=True)

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = hid_out.dimshuffle(1, 0, 2)

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = hid_out[:, ::-1]

        return hid_out
Example #31
0
 class B(Configurable):
     property_b = Property()
Example #32
0
    def test_property_repr_with_name(self):
        prop = Property(default=3)
        prop.name = 'test'

        self.assertEqual('Property(name="test")', repr(prop))