Esempio n. 1
0
class RBFKMeans(NoStepSelection, UnsupervisedLearningMixin, BaseNetwork):
    """
    Radial basis function K-means for clustering.

    Parameters
    ----------
    n_clusters : int
        number of clusters in dataset.
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Attributes
    ----------
    centers : numpy array [n_clusters, n_futures]
        After training this property will contain coordinates
        to cluster centers.

    Methods
    -------
    {UnsupervisedLearningMixin.train}
    {BaseSkeleton.predict}
    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy.algorithms import RBFKMeans
    >>>
    >>> data = np.array([
    ...     [0.11, 0.20],
    ...     [0.25, 0.32],
    ...     [0.64, 0.60],
    ...     [0.12, 0.42],
    ...     [0.70, 0.73],
    ...     [0.30, 0.27],
    ...     [0.43, 0.81],
    ...     [0.44, 0.87],
    ...     [0.12, 0.92],
    ...     [0.56, 0.67],
    ...     [0.36, 0.35],
    ... ])
    >>> rbfk_net = RBFKMeans(n_clusters=2, verbose=False)
    >>> rbfk_net.train(data, epsilon=1e-5)
    >>> rbfk_net.centers
    array([[ 0.228     ,  0.312     ],
           [ 0.48166667,  0.76666667]])
    >>>
    >>> new_data = np.array([[0.1, 0.1], [0.9, 0.9]])
    >>> rbfk_net.predict(new_data)
    array([[ 0.],
           [ 1.]])
    """
    n_clusters = IntProperty(minval=2)

    def __init__(self, **options):
        self.centers = None
        super(RBFKMeans, self).__init__(**options)

    def predict(self, input_data):
        input_data = format_data(input_data)

        centers = self.centers
        classes = zeros((input_data.shape[0], 1))

        for i, value in enumerate(input_data):
            classes[i] = argmin(norm(centers - value, axis=1))

        return classes

    def train_epoch(self, input_train, target_train):
        centers = self.centers
        old_centers = centers.copy()
        output_train = self.predict(input_train)

        for i, center in enumerate(centers):
            positions = argwhere(output_train[:, 0] == i)

            if not np_any(positions):
                continue

            class_data = take(input_train, positions, axis=0)
            centers[i, :] = (1 / len(class_data)) * np_sum(class_data, axis=0)

        return np_abs(old_centers - centers)

    def train(self, input_train, epsilon=1e-5, epochs=100):
        n_clusters = self.n_clusters
        input_train = format_data(input_train)

        if input_train.shape[0] <= n_clusters:
            raise ValueError("Count of clusters must be less than count of "
                             "input data.")

        self.centers = input_train[:n_clusters, :].copy()
        super(RBFKMeans, self).train(input_train,
                                     epsilon=epsilon,
                                     epochs=epochs)
Esempio n. 2
0
class GrowingNeuralGas(BaseNetwork):
    """
    Growing Neural Gas (GNG) algorithm.

    Current algorithm has two modifications that hasn't been mentioned
    in the paper, but they help to speed up training.

    - The ``n_start_nodes`` parameter provides possibility to increase
      number of nodes during initialization step. It's useful when
      algorithm takes a lot of time building up large amount of neurons.

    - The ``min_distance_for_update`` parameter allows to speed up
      training when some data samples has neurons very close to them. The
      ``min_distance_for_update`` parameter controls threshold for the
      minimum distance for which we will want to update weights.

    Parameters
    ----------
    n_inputs : int
        Number of features in each sample.

    n_start_nodes : int
        Number of nodes that algorithm generates from the data during
        the initialization step. Defaults to ``2``.

    step : float
        Step (learning rate) for the neuron winner. Defaults to ``0.2``.

    neighbour_step : float
        Step (learning rate) for the neurons that connected via edges
        with neuron winner. This value typically has to be smaller than
        ``step`` value. Defaults to ``0.05``.

    max_edge_age : int
        It means that if edge won't be updated for ``max_edge_age`` iterations
        than it would be removed. The larger the value the more updates we
        allow to do before removing edge. Defaults to ``100``.

    n_iter_before_neuron_added : int
        Each ``n_iter_before_neuron_added`` weight update algorithm add new
        neuron. The smaller the value the more frequently algorithm adds
        new neurons to the network. Defaults to ``1000``.

    error_decay_rate : float
        This error decay rate would be applied to every neuron in the
        graph after each training iteration. It ensures that old errors
        will be reduced over time. Defaults to ``0.995``.

    after_split_error_decay_rate : float
        This decay rate reduces error for neurons with largest errors
        after algorithm added new neuron. This value typically lower than
        ``error_decay_rate``. Defaults to ``0.5``.

    max_nodes : int
        Maximum number of nodes that would be generated during the training.
        This parameter won't stop training when maximum number of nodes
        will be exceeded. Defaults to ``1000``.

    min_distance_for_update : float
        Parameter controls for which neurons we want to apply updates.
        In case if euclidean distance between data sample and closest
        neurons will be less than the ``min_distance_for_update`` value than
        update would be skipped for this data sample. Setting value to zero
        will disable effect provided by this parameter. Defaults to ``0``.

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.signals}

    {Verbose.verbose}

    Methods
    -------
    train(X_train, epochs=100)
        Network learns topological structure of the data. Learned
        structure will be stored in the ``graph`` attribute.

    {BaseSkeleton.fit}

    initialize_nodes(data)
        Network initializes nodes randomly sampling ``n_start_nodes``
        from the data. It would be applied automatically before
        the training in case if graph is empty.

        Note: Node re-initialization can reset network.

    Notes
    -----
    - Unlike other algorithms this network doesn't make predictions.
      Instead, it learns topological structure of the data in form of
      the graph. After that training, structure of the network can be
      extracted from the ``graph`` attribute.

    - In order to speed up training, it might be useful to increase
      the ``n_start_nodes`` parameter.

    - During the training it happens that nodes learn topological
      structure of one part of the data better than the other, mostly
      because of the different data sample density in different places.
      Increasing the ``min_distance_for_update`` can speed up training
      ignoring updates for the neurons that very close to the data sample.
      (below specified ``min_distance_for_update`` value). Training can be
      stopped in case if none of the neurons has been updated during
      the training epoch.

    Attributes
    ----------
    graph : NeuralGasGraph instance
        This attribute stores all neurons and connections between them
        in the form of undirected graph.

    {BaseNetwork.Attributes}

    Examples
    --------
    >>> from neupy import algorithms
    >>> from sklearn.datasets import make_blobs
    >>>
    >>> data, _ = make_blobs(
    ...     n_samples=1000,
    ...     n_features=2,
    ...     centers=2,
    ...     cluster_std=0.4,
    ... )
    >>>
    >>> neural_gas = algorithms.GrowingNeuralGas(
    ...     n_inputs=2,
    ...     shuffle_data=True,
    ...     verbose=True,
    ...     max_edge_age=10,
    ...     n_iter_before_neuron_added=50,
    ...     max_nodes=100,
    ... )
    >>> neural_gas.graph.n_nodes
    100
    >>> len(neural_gas.graph.edges)
    175
    >>> edges = list(neural_gas.graph.edges.keys())
    >>> neuron_1, neuron_2 = edges[0]
    >>>
    >>> neuron_1.weight
    array([[-6.77166299,  2.4121606 ]])
    >>> neuron_2.weight
    array([[-6.829309  ,  2.27839633]])

    References
    ----------
    [1] A Growing Neural Gas Network Learns Topologies, Bernd Fritzke
    """
    n_inputs = IntProperty(minval=1, required=True)
    n_start_nodes = IntProperty(minval=2, default=2)

    step = NumberProperty(default=0.2, minval=0)
    neighbour_step = NumberProperty(default=0.05, minval=0)
    max_edge_age = IntProperty(default=100, minval=1)
    max_nodes = IntProperty(default=1000, minval=1)

    n_iter_before_neuron_added = IntProperty(default=1000, minval=1)
    after_split_error_decay_rate = ProperFractionProperty(default=0.5)
    error_decay_rate = ProperFractionProperty(default=0.995)
    min_distance_for_update = NumberProperty(default=0.0, minval=0)

    def __init__(self, *args, **kwargs):
        super(GrowingNeuralGas, self).__init__(*args, **kwargs)
        self.n_updates = 0
        self.graph = NeuralGasGraph()

    def format_input_data(self, X):
        is_feature1d = self.n_inputs == 1
        X = format_data(X, is_feature1d)

        if X.ndim != 2:
            raise ValueError("Cannot make prediction, because input "
                             "data has more than 2 dimensions")

        n_samples, n_features = X.shape

        if n_features != self.n_inputs:
            raise ValueError("Input data expected to have {} features, "
                             "but got {}".format(self.n_inputs, n_features))

        return X

    def initialize_nodes(self, data):
        self.graph = NeuralGasGraph()

        for sample in sample_data_point(data, n=self.n_start_nodes):
            self.graph.add_node(NeuronNode(sample.reshape(1, -1)))

    def train(self, X_train, epochs=100):
        X_train = self.format_input_data(X_train)

        if not self.graph.nodes:
            self.initialize_nodes(X_train)

        return super(GrowingNeuralGas, self).train(
            X_train=X_train, y_train=None,
            X_test=None, y_test=None,
            epochs=epochs)

    def one_training_update(self, X_train, y_train=None):
        graph = self.graph
        step = self.step
        neighbour_step = self.neighbour_step

        max_nodes = self.max_nodes
        max_edge_age = self.max_edge_age

        error_decay_rate = self.error_decay_rate
        after_split_error_decay_rate = self.after_split_error_decay_rate
        n_iter_before_neuron_added = self.n_iter_before_neuron_added

        # We square this value, because we deal with
        # squared distances during the training.
        min_distance_for_update = np.square(self.min_distance_for_update)

        n_samples = len(X_train)
        total_error = 0
        did_update = False

        for sample in X_train:
            nodes = graph.nodes
            weights = np.concatenate([node.weight for node in nodes])

            distance = np.linalg.norm(weights - sample, axis=1)
            neuron_ids = np.argsort(distance)

            closest_neuron_id, second_closest_id = neuron_ids[:2]
            closest_neuron = nodes[closest_neuron_id]
            second_closest = nodes[second_closest_id]
            total_error += distance[closest_neuron_id]

            if distance[closest_neuron_id] < min_distance_for_update:
                continue

            self.n_updates += 1
            did_update = True

            closest_neuron.error += distance[closest_neuron_id]
            closest_neuron.weight += step * (sample - closest_neuron.weight)

            graph.add_edge(closest_neuron, second_closest)

            for to_neuron in list(graph.edges_per_node[closest_neuron]):
                edge_id = graph.find_edge_id(to_neuron, closest_neuron)
                age = graph.edges[edge_id]

                if age >= max_edge_age:
                    graph.remove_edge(to_neuron, closest_neuron)

                    if not graph.edges_per_node[to_neuron]:
                        graph.remove_node(to_neuron)

                else:
                    graph.edges[edge_id] += 1
                    to_neuron.weight += neighbour_step * (
                        sample - to_neuron.weight)

            time_to_add_new_neuron = (
                self.n_updates % n_iter_before_neuron_added == 0 and
                graph.n_nodes < max_nodes)

            if time_to_add_new_neuron:
                nodes = graph.nodes
                largest_error_neuron = max(nodes, key=attrgetter('error'))
                neighbour_neuron = max(
                    graph.edges_per_node[largest_error_neuron],
                    key=attrgetter('error'))

                largest_error_neuron.error *= after_split_error_decay_rate
                neighbour_neuron.error *= after_split_error_decay_rate

                new_weight = 0.5 * (
                    largest_error_neuron.weight + neighbour_neuron.weight
                )
                new_neuron = NeuronNode(weight=new_weight.reshape(1, -1))

                graph.remove_edge(neighbour_neuron, largest_error_neuron)
                graph.add_node(new_neuron)
                graph.add_edge(largest_error_neuron, new_neuron)
                graph.add_edge(neighbour_neuron, new_neuron)

            for node in graph.nodes:
                node.error *= error_decay_rate

        if not did_update and min_distance_for_update != 0 and n_samples > 1:
            raise StopTraining(
                "Distance between every data sample and neurons, closest "
                "to them, is less then {}".format(min_distance_for_update))

        return total_error / n_samples

    def predict(self, *args, **kwargs):
        raise NotImplementedError(
            "Growing Neural Gas algorithm doesn't make prediction. "
            "It only learns graph structure from the data "
            "(class has `graph` attribute). ")
Esempio n. 3
0
class LVQ(BaseNetwork):
    """
    Learning Vector Quantization (LVQ) algorithm.

    Notes
    -----
    - Input data needs to be normalized, because LVQ uses
      Euclidian distance to find clusters.

    - Training error is just a ratio of miscassified
      samples

    Parameters
    ----------
    n_inputs : int
        Number of input units. It should be equal to the
        number of features in the input data set.

    n_subclasses : int, None
        Defines total number of subclasses. Values should be greater
        or equal to the number of classes. ``None`` will set up number
        of subclasses equal to the number of classes. Defaults to ``None``
        (or the same as ``n_classes``).

    n_classes : int
        Number of classes in the data set.

    prototypes_per_class : list, None
        Defines number of prototypes per each class. For instance,
        if ``n_classes=3`` and ``n_subclasses=8`` then there are
        can be 3 subclasses for the first class, 3 for the second one
        and 2 for the third one (3 + 3 + 2 == 8). The following example
        can be specified as ``prototypes_per_class=[3, 3, 2]``.

        There are two rules that apply to this parameter:

        1. ``sum(prototypes_per_class) == n_subclasses``

        2. ``len(prototypes_per_class) == n_classes``

        The ``None`` value will distribute approximately equal
        number of subclasses per each class. It's approximately,
        because in casses when ``n_subclasses % n_classes != 0``
        there is no way to distribute equal number of subclasses
        per each class.

        Defaults to ``None``.

    {BaseNetwork.step}

    n_updates_to_stepdrop : int or None
        If this options is not equal to ``None`` then after every
        update LVQ reduces step size and do it until number of
        applied updates would reach the ``n_updates_to_stepdrop``
        value. The minimum possible step size defined in the
        ``minstep`` parameter.

        Be aware that number of updates is not the same as number
        of epochs. LVQ applies update after each propagated sample
        through the network. Relations between this parameter and
        maximum number of epochs is following

        .. code-block:: python

            n_updates_to_stepdrop = n_samples * n_max_epochs

        If parameter equal to ``None`` then step size wouldn't be
        reduced after each update.

        Defaults to ``None``.

    minstep : float
        Step size would never be lower than this value. This
        property useful only in case if ``n_updates_to_stepdrop``
        is not ``None``. Defaults to ``1e-5``.

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=1)
    n_subclasses = IntProperty(minval=2, default=None, allow_none=True)
    n_classes = IntProperty(minval=2)

    prototypes_per_class = TypedListProperty(allow_none=True, default=None)
    weight = Property(expected_type=(np.ndarray, init.Initializer),
                      allow_none=True, default=None)

    n_updates_to_stepdrop = IntProperty(default=None, allow_none=True,
                                        minval=1)
    minstep = NumberProperty(minval=0, default=1e-5)

    def __init__(self, **options):
        self.initialized = False
        super(LVQ, self).__init__(**options)

        self.n_updates = 0

        if self.n_subclasses is None:
            self.n_subclasses = self.n_classes

        if isinstance(self.weight, init.Initializer):
            weight_shape = (self.n_inputs, self.n_subclasses)
            self.weight = self.weight.sample(weight_shape)

        if self.weight is not None:
            self.initialized = True

        if self.n_subclasses < self.n_classes:
            raise ValueError("Number of subclasses should be greater "
                             "or equal to the number of classes. Network "
                             "was defined with {} subclasses and {} classes"
                             "".format(self.n_subclasses, self.n_classes))

        if self.prototypes_per_class is None:
            whole, reminder = divmod(self.n_subclasses, self.n_classes)
            self.prototypes_per_class = [whole] * self.n_classes

            if reminder:
                # Since we have reminder left, it means that we cannot
                # have an equal number of subclasses per each class,
                # therefor we will add +1 to randomly selected class.
                class_indeces = np.random.choice(self.n_classes, reminder,
                                                 replace=False)

                for class_index in class_indeces:
                    self.prototypes_per_class[class_index] += 1

        if len(self.prototypes_per_class) != self.n_classes:
            raise ValueError("LVQ defined for classification problem that has "
                             "{} classes, but the `prototypes_per_class` "
                             "variable has defined data for {} classes."
                             "".format(self.n_classes,
                                       len(self.prototypes_per_class)))

        if sum(self.prototypes_per_class) != self.n_subclasses:
            raise ValueError("Invalid distribution of subclasses for the "
                             "`prototypes_per_class` variable. Got total "
                             "of {} subclasses ({}) instead of {} expected"
                             "".format(sum(self.prototypes_per_class),
                                       self.prototypes_per_class,
                                       self.n_subclasses))

        self.subclass_to_class = []
        for class_id, n_prototypes in enumerate(self.prototypes_per_class):
            self.subclass_to_class.extend([class_id] * n_prototypes)

    @property
    def training_step(self):
        if self.n_updates_to_stepdrop is None:
            return self.step

        updates_ratio = (1 - self.n_updates / self.n_updates_to_stepdrop)
        return self.minstep + (self.step - self.minstep) * updates_ratio

    def predict(self, input_data):
        if not self.initialized:
            raise NotTrained("LVQ network hasn't been trained yet")

        input_data = format_data(input_data)
        subclass_to_class = self.subclass_to_class
        weight = self.weight

        predictions = []
        for input_row in input_data:
            output = euclid_distance(input_row, weight)
            winner_subclass = int(output.argmin(axis=1))

            predicted_class = subclass_to_class[winner_subclass]
            predictions.append(predicted_class)

        return np.array(predictions)

    def train(self, input_train, target_train, *args, **kwargs):
        input_train = format_data(input_train)
        target_train = format_data(target_train)

        n_input_samples = len(input_train)

        if n_input_samples <= self.n_subclasses:
            raise ValueError("Number of training input samples should be "
                             "greater than number of sublcasses. Training "
                             "method recived {} input samples."
                             "".format(n_input_samples))

        if not self.initialized:
            target_classes = sorted(np.unique(target_train).astype(np.int))
            expected_classes = list(range(self.n_classes))

            if target_classes != expected_classes:
                raise ValueError("All classes should be integers from the "
                                 "range [0, {}], but got the following "
                                 "classes instead {}".format(
                                    self.n_classes - 1, target_classes))

            weights = []
            iterator = zip(target_classes, self.prototypes_per_class)
            for target_class, n_prototypes in iterator:
                is_valid_class = (target_train[:, 0] == target_class)
                is_valid_class = is_valid_class.astype('float64')
                n_samples_per_class = sum(is_valid_class)
                is_valid_class /= n_samples_per_class

                if n_samples_per_class <= n_prototypes:
                    raise ValueError("Input data has {0} samples for class-{1}"
                                     ". Number of samples per specified "
                                     "class-{1} should be greater than {2}."
                                     "".format(n_samples_per_class,
                                               target_class, n_prototypes))

                class_weight_indeces = np.random.choice(
                    np.arange(n_input_samples), n_prototypes,
                    replace=False, p=is_valid_class)

                class_weight = input_train[class_weight_indeces]
                weights.extend(class_weight)

            self.weight = np.array(weights)
            self.initialized = True

        super(LVQ, self).train(input_train, target_train, *args, **kwargs)

    def train_epoch(self, input_train, target_train):
        weight = self.weight
        subclass_to_class = self.subclass_to_class

        n_correct_predictions = 0
        for input_row, target in zip(input_train, target_train):
            step = self.training_step
            output = euclid_distance(input_row, weight)
            winner_subclass = int(output.argmin())
            predicted_class = subclass_to_class[winner_subclass]

            weight_update = input_row - weight[winner_subclass, :]
            is_correct_prediction = (predicted_class == target)

            if is_correct_prediction:
                weight[winner_subclass, :] += step * weight_update
            else:
                weight[winner_subclass, :] -= step * weight_update

            n_correct_predictions += is_correct_prediction
            self.n_updates += 1

        n_samples = len(input_train)
        return 1 - n_correct_predictions / n_samples
Esempio n. 4
0
class GRU(BaseRNNLayer):
    """
    Gated Recurrent Unit (GRU) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    weights : dict or Initializer
        Weight parameters for different gates.
        Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`.

        - In case if application requires the same initialization method
          for all weights, then it's possible to specify initialization
          method that would be automaticaly applied to all weight
          parameters in the GRU layer.

          .. code-block:: python

              layers.GRU(2, weights=init.Normal(0.1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  weight_in_to_updategate=init.XavierUniform(),
                  weight_hid_to_updategate=init.XavierUniform(),

                  weight_in_to_resetgate=init.XavierUniform(),
                  weight_hid_to_resetgate=init.XavierUniform(),

                  weight_in_to_hidden_update=init.XavierUniform(),
                  weight_hid_to_hidden_update=init.XavierUniform(),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(weight_in_to_updategate=init.Normal(0.1))

          Other parameters like ``weight_in_to_resetgate`` will be
          equal to their default values.

    biases : dict or Initializer
        Bias parameters for different gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

        - In case if application requires the same initialization method
          for all biases, then it's possible to specify initialization
          method that would be automaticaly applied to all bias parameters
          in the GRU layer.

          .. code-block:: python

              layers.GRU(2, biases=init.Constant(1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  bias_updategate=init.Constant(0),
                  bias_resetgate=init.Constant(0),
                  bias_hidden_update=init.Constant(0),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(bias_resetgate=init.Constant(1))

          Other parameters like ``bias_updategate`` will be
          equal to their default values.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import theano.tensor as T
            dict(
                resetgate=T.nnet.sigmoid,
                updategate=T.nnet.sigmoid,
                hidden_update=T.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(resetgate=T.tanh)

        Other parameters like ``updategate`` or ``hidden_update``
        will be equal to their default values.

    learn_init : bool
        If ``True``, make ``hid_init`` trainable variable.
        Defaults to ``False``.

    hid_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    {BaseRNNLayer.only_return_final}

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``.

    precompute_input : bool
        if ``True``, precompute ``input_to_hid`` before iterating
        through the sequence. This can result in a speed up at the
        expense of an increase in memory usage.
        Defaults to ``True``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.GRU(20),
                layers.Sigmoid(1),
            ]
        )
    """
    weights = MultiParameterProperty(
        default=dict(
            weight_in_to_updategate=init.XavierUniform(),
            weight_hid_to_updategate=init.XavierUniform(),

            weight_in_to_resetgate=init.XavierUniform(),
            weight_hid_to_resetgate=init.XavierUniform(),

            weight_in_to_hidden_update=init.XavierUniform(),
            weight_hid_to_hidden_update=init.XavierUniform(),
        ))
    biases = MultiParameterProperty(
        default=dict(
            bias_updategate=init.Constant(0),
            bias_resetgate=init.Constant(0),
            bias_hidden_update=init.Constant(0),
        ))
    activation_functions = MultiCallableProperty(
        default=dict(
            resetgate=T.nnet.sigmoid,
            updategate=T.nnet.sigmoid,
            hidden_update=T.tanh,
        ))

    learn_init = Property(default=False, expected_type=bool)
    hid_init = ParameterProperty(default=init.Constant(0))

    backwards = Property(default=False, expected_type=bool)
    unroll_scan = Property(default=False, expected_type=bool)
    precompute_input = Property(default=True, expected_type=bool)

    n_gradient_steps = IntProperty(default=-1)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(GRU, self).initialize()

        n_inputs = np.prod(self.input_shape[1:])
        weights = self.weights
        biases = self.biases

        # Update gate parameters
        self.weight_in_to_updategate = self.add_parameter(
            value=weights.weight_in_to_updategate,
            name='weight_in_to_updategate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_updategate = self.add_parameter(
            value=weights.weight_hid_to_updategate,
            name='weight_hid_to_updategate',
            shape=(self.size, self.size))
        self.bias_updategate = self.add_parameter(
            value=biases.bias_updategate, name='bias_updategate',
            shape=(self.size,))

        # Reset gate parameters
        self.weight_in_to_resetgate = self.add_parameter(
            value=weights.weight_in_to_resetgate,
            name='weight_in_to_resetgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_resetgate = self.add_parameter(
            value=weights.weight_hid_to_resetgate,
            name='weight_hid_to_resetgate',
            shape=(self.size, self.size))
        self.bias_resetgate = self.add_parameter(
            value=biases.bias_resetgate, name='bias_forgetgate',
            shape=(self.size,))

        # Hidden update gate parameters
        self.weight_in_to_hidden_update = self.add_parameter(
            value=weights.weight_in_to_hidden_update,
            name='weight_in_to_hidden_update',
            shape=(n_inputs, self.size))
        self.weight_hid_to_hidden_update = self.add_parameter(
            value=weights.weight_hid_to_hidden_update,
            name='weight_hid_to_hidden_update',
            shape=(self.size, self.size))
        self.bias_hidden_update = self.add_parameter(
            value=biases.bias_hidden_update, name='bias_hidden_update',
            shape=(self.size,))

        self.add_parameter(value=self.hid_init, shape=(1, self.size),
                           name="hid_init", trainable=self.learn_init)

    def output(self, input_value):
        # Treat all dimensions after the second as flattened
        # feature dimensions
        if input_value.ndim > 3:
            input_value = T.flatten(input_value, 3)

        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = input_value.dimshuffle(1, 0, 2)
        seq_len, n_batch, _ = input_value.shape

        # Stack input weight matrices into a (num_inputs, 3 * num_units)
        # matrix, which speeds up computation
        weight_in_stacked = T.concatenate([
            self.weight_in_to_updategate,
            self.weight_in_to_resetgate,
            self.weight_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        weight_hid_stacked = T.concatenate([
            self.weight_hid_to_updategate,
            self.weight_hid_to_resetgate,
            self.weight_hid_to_hidden_update], axis=1)

        # Stack biases into a (3 * num_units) vector
        bias_stacked = T.concatenate([
            self.bias_updategate,
            self.bias_resetgate,
            self.bias_hidden_update], axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # weight_in_stacked is (n_features, 3 * num_units).
            # Input: (n_time_steps, n_batch, 3 * num_units).
            input_value = T.dot(input_value, weight_in_stacked) + bias_stacked

        # When theano.scan calls step, input_n will be
        # (n_batch, 3 * num_units). We define a slicing function
        # that extract the input to each GRU gate
        def slice_w(x, n):
            s = x[:, n * self.size:(n + 1) * self.size]
            if self.size == 1:
                s = T.addbroadcast(s, 1)  # Theano cannot infer this by itself
            return s

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def one_gru_step(input_n, hid_previous, *args):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1},
            # and W_{hc} h_{t - 1}
            hid_input = T.dot(hid_previous, weight_hid_stacked)

            if self.gradient_clipping:
                input_n = theano.gradient.grad_clip(
                    input_n,
                    -self.gradient_clipping,
                    self.gradient_clipping)

                hid_input = theano.gradient.grad_clip(
                    hid_input,
                    -self.gradient_clipping,
                    self.gradient_clipping)

            if not self.precompute_input:
                # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u,
                # and W_{xc}x_t + b_c
                input_n = T.dot(input_n, weight_in_stacked) + bias_stacked

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            resetgate = self.activation_functions.resetgate(resetgate)

            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            updategate = self.activation_functions.updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate * hidden_update_hid

            if self.gradient_clipping:
                hidden_update = theano.gradient.grad_clip(
                    hidden_update,
                    -self.gradient_clipping,
                    self.gradient_clipping)

            hidden_update = self.activation_functions.hidden_update(
                hidden_update)

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate) * hid_previous + updategate * hidden_update
            return hid

        hid_init = T.dot(T.ones((n_batch, 1)), self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_sequences = [weight_hid_stacked]

        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_sequences += [weight_in_stacked, bias_stacked]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            n_time_steps = self.input_shape[0]

            # Explicitly unroll the recurrence instead of using scan
            hid_out, = unroll_scan(
                fn=one_gru_step,
                sequences=[input_value],
                outputs_info=[hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                n_steps=n_time_steps)

        else:
            # Scan op iterates over first dimension of input and
            # repeatedly applies the step function
            hid_out, _ = theano.scan(
                fn=one_gru_step,
                sequences=[input_value],
                outputs_info=[hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                truncate_gradient=self.n_gradient_steps,
                strict=True)

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = hid_out.dimshuffle(1, 0, 2)

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = hid_out[:, ::-1]

        return hid_out
Esempio n. 5
0
class SOFM(Kohonen):
    """
    Self-Organizing Feature Map (SOFM).

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    {BaseAssociative.n_outputs}

    learning_radius : int
        Learning radius.

    features_grid : list, tuple, None
        Feature grid defines shape of the output neurons.
        The new shape should be compatible with the number
        of outputs. Defaults to ``(n_outputs, 1)``.

    transform : {{``linear``, ``euclid``, ``cos``}}
        Indicate transformation operation related to the
        input layer.

        - The ``linear`` value mean that input data would be
          multiplied by weights in typical way.

        - The ``euclid`` method will identify the closest
          weight vector to the input one.

        - The ``cos`` transformation identifies cosine
          similarity between input dataset and
          network's weights.

        Defaults to ``linear``.

    {BaseAssociative.weight}

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    {BaseAssociative.train}

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms, environment
    >>>
    >>> environment.reproducible()
    >>>
    >>> data = np.array([
    ...     [0.1961, 0.9806],
    ...     [-0.1961, 0.9806],
    ...     [-0.5812, -0.8137],
    ...     [-0.8137, -0.5812],
    ... ])
    >>>
    >>> sofmnet = algorithms.SOFM(
    ...     n_inputs=2,
    ...     n_outputs=2,
    ...     step=0.1,
    ...     learning_radius=0,
    ...     features_grid=(2, 1),
    ... )
    >>> sofmnet.train(data, epochs=100)
    >>> sofmnet.predict(data)
    array([[0, 1],
           [0, 1],
           [1, 0],
           [1, 0]])
    """
    learning_radius = IntProperty(default=0, minval=0)
    features_grid = TypedListProperty(allow_none=True, default=None)
    transform = ChoiceProperty(default='linear',
                               choices={
                                   'linear': np.dot,
                                   'euclid': neg_euclid_distance,
                                   'cos': cosine_similarity,
                               })

    def __init__(self, **options):
        super(SOFM, self).__init__(**options)

        invalid_feature_grid = (self.features_grid is not None
                                and mul(*self.features_grid) != self.n_outputs)
        if invalid_feature_grid:
            raise ValueError(
                "Feature grid should contain the same number of elements as "
                "in the output layer: {0}, but found: {1} ({2}x{3})"
                "".format(self.n_outputs, mul(*self.features_grid),
                          self.features_grid[0], self.features_grid[1]))

        if self.features_grid is None:
            self.features_grid = (self.n_outputs, 1)

    def predict_raw(self, input_data):
        input_data = format_data(input_data)
        n_samples = input_data.shape[0]
        output = np.zeros((n_samples, self.n_outputs))

        for i, input_row in enumerate(input_data):
            output[i, :] = self.transform(input_row.reshape(1, -1),
                                          self.weight)

        return output

    def update_indexes(self, layer_output):
        neuron_winner = layer_output.argmax(axis=1)
        feature_bound = self.features_grid[1]

        output_with_neightbours = neuron_neighbours(
            np.reshape(layer_output, self.features_grid),
            (neuron_winner // feature_bound, neuron_winner % feature_bound),
            self.learning_radius)
        index_y, _ = np.nonzero(
            np.reshape(output_with_neightbours, (self.n_outputs, 1)))
        return index_y
Esempio n. 6
0
class SOFM(Kohonen):
    """
    Self-Organizing Feature Map (SOFM or SOM).

    Notes
    -----
    - Training data samples should have normalized features.

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    n_outputs : int or None
        Number of outputs. Parameter is optional in case if
        ``feature_grid`` was specified.

        .. code-block:: python

            if n_outputs is None:
                n_outputs = np.prod(feature_grid)

    learning_radius : int
        Parameter defines radius within which we consider all
        neurons as neighbours to the winning neuron. The bigger
        the value the more neurons will be updated after each
        iteration.

        The ``0`` values means that we don't update
        neighbour neurons.

        Defaults to ``0``.

    std : int, float
        Parameters controls learning rate for each neighbour.
        The further neighbour  neuron from the winning neuron
        the smaller that learning rate for it. Learning rate
        scales based on the factors produced by the normal
        distribution with center in the place of a winning
        neuron and standard deviation specified as a parameter.
        The learning rate for the winning neuron is always equal
        to the value specified in the ``step`` parameter and for
        neighbour neurons it's always lower.

        The bigger the value for this parameter the bigger
        learning rate for the neighbour neurons.

        Defaults to ``1``.

    features_grid : list, tuple, None
        Feature grid defines shape of the output neurons.
        The new shape should be compatible with the number
        of outputs. It means that the following condition
        should be true:

        .. code-block:: python

            np.prod(features_grid) == n_outputs

        SOFM implementation supports n-dimensional grids.
        For instance, in order to specify grid as cube instead of
        the regular rectangular shape we can set up options as
        the following:

        .. code-block:: python

            SOFM(
                ...
                features_grid=(5, 5, 5),
                ...
            )

        Defaults to ``(n_outputs, 1)``.

    grid_type : {{``rect``, ``hexagon``}}
        Defines connection type in feature grid. Type defines
        which neurons we will consider as closest to the winning
        neuron during the training.

        - ``rect`` - Connections between neurons will be organized
          in hexagonal grid.

        - ``hexagon`` - Connections between neurons will be organized
          in hexagonal grid. It works only for 1d or 2d grids.

        Defaults to ``rect``.

    distance : {{``euclid``, ``dot_product``, ``cos``}}
        Defines function that will be used to compute
        closest weight to the input sample.

        - ``dot_product``: Just a regular dot product between
          data sample and network's weights

        - ``euclid``: Euclidean distance between data sample
          and network's weights

        - ``cos``: Cosine distance between data sample and
          network's weights

        Defaults to ``euclid``.

    reduce_radius_after : int or None
        Every specified number of epochs ``learning_radius``
        parameter will be reduced by ``1``. Process continues
        until ``learning_radius`` equal to ``0``.

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    reduce_step_after : int or None
        Defines reduction rate at which parameter ``step`` will
        be reduced using the following formula:

        .. code-block:: python

            step = step / (1 + current_epoch / reduce_step_after)

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    reduce_std_after : int or None
        Defines reduction rate at which parameter ``std`` will
        be reduced using the following formula:

        .. code-block:: python

            std = std / (1 + current_epoch / reduce_std_after)

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    weight : array-like, Initializer or {{``init_pca``, ``sample_from_data``}}
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.

        Also, it's possible to initialized weights base on the
        training data. There are two options:

        - ``sample_from_data`` - Before starting the training will
          randomly take number of training samples equal to number
          of expected outputs.

        - ``init_pca`` - Before training starts SOFM will applies PCA
          on a covariance matrix build from the training samples.
          Weights will be generated based on the two eigenvectors
          associated with the largest eigenvalues.

        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.signals}

    {Verbose.verbose}

    Methods
    -------
    init_weights(train_data)
        Initialized weights based on the input data. It works only
        for the `init_pca` and `sample_from_data` options. For other
        cases it will throw an error.

    {BaseSkeleton.predict}

    {BaseAssociative.train}

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms, utils
    >>>
    >>> utils.reproducible()
    >>>
    >>> data = np.array([
    ...     [0.1961, 0.9806],
    ...     [-0.1961, 0.9806],
    ...     [-0.5812, -0.8137],
    ...     [-0.8137, -0.5812],
    ... ])
    >>>
    >>> sofm = algorithms.SOFM(
    ...     n_inputs=2,
    ...     n_outputs=2,
    ...     step=0.1,
    ...     learning_radius=0
    ... )
    >>> sofm.train(data, epochs=100)
    >>> sofm.predict(data)
    array([[0, 1],
           [0, 1],
           [1, 0],
           [1, 0]])
    """
    n_outputs = IntProperty(minval=1, allow_none=True, default=None)
    weight = SOFMWeightParameter(default=init.Normal(),
                                 choices={
                                     'init_pca': linear_initialization,
                                     'sample_from_data': sample_data,
                                 })
    features_grid = TypedListProperty(allow_none=True, default=None)

    DistanceParameter = namedtuple('DistanceParameter', 'name func')
    distance = ChoiceProperty(default='euclid',
                              choices={
                                  'dot_product':
                                  DistanceParameter(name='dot_product',
                                                    func=np.dot),
                                  'euclid':
                                  DistanceParameter(name='euclid',
                                                    func=neg_euclid_distance),
                                  'cos':
                                  DistanceParameter(name='cosine',
                                                    func=cosine_similarity),
                              })

    GridTypeMethods = namedtuple('GridTypeMethods',
                                 'name find_neighbours find_step_scaler')

    grid_type = ChoiceProperty(
        default='rect',
        choices={
            'rect':
            GridTypeMethods(name='rectangle',
                            find_neighbours=find_neighbours_on_rect_grid,
                            find_step_scaler=find_step_scaler_on_rect_grid),
            'hexagon':
            GridTypeMethods(name='hexagon',
                            find_neighbours=find_neighbours_on_hexagon_grid,
                            find_step_scaler=find_step_scaler_on_hexagon_grid)
        })

    learning_radius = IntProperty(default=0, minval=0)
    std = NumberProperty(minval=0, default=1)

    reduce_radius_after = IntProperty(default=100, minval=1, allow_none=True)
    reduce_std_after = IntProperty(default=100, minval=1, allow_none=True)
    reduce_step_after = IntProperty(default=100, minval=1, allow_none=True)

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)

        if self.n_outputs is None and self.features_grid is None:
            raise ValueError("One of the following parameters has to be "
                             "specified: n_outputs, features_grid")

        elif self.n_outputs is None:
            self.n_outputs = np.prod(self.features_grid)

        n_grid_elements = np.prod(self.features_grid)
        invalid_feature_grid = (self.features_grid is not None
                                and n_grid_elements != self.n_outputs)

        if invalid_feature_grid:
            raise ValueError(
                "Feature grid should contain the same number of elements "
                "as in the output layer: {0}, but found: {1} (shape: {2})"
                "".format(self.n_outputs, n_grid_elements, self.features_grid))

        if self.features_grid is None:
            self.features_grid = (self.n_outputs, 1)

        if len(self.features_grid) > 2 and self.grid_type.name == 'hexagon':
            raise ValueError("SOFM with hexagon grid type should have "
                             "one or two dimensional feature grid, but got "
                             "{}d instead (shape: {!r})".format(
                                 len(self.features_grid), self.features_grid))

        is_pca_init = (isinstance(options.get('weight'), six.string_types)
                       and options.get('weight') == 'init_pca')

        self.initialized = False
        if not callable(self.weight):
            super(Kohonen, self).init_weights()
            self.initialized = True

            if self.distance.name == 'cosine':
                self.weight /= np.linalg.norm(self.weight, axis=0)

        elif is_pca_init and self.grid_type.name != 'rectangle':
            raise WeightInitializationError(
                "Cannot apply PCA weight initialization for non-rectangular "
                "grid. Grid type: {}".format(self.grid_type.name))

    def predict_raw(self, X):
        X = format_data(X, is_feature1d=(self.n_inputs == 1))

        if X.ndim != 2:
            raise ValueError("Only 2D inputs are allowed")

        n_samples = X.shape[0]
        output = np.zeros((n_samples, self.n_outputs))

        for i, input_row in enumerate(X):
            output[i, :] = self.distance.func(input_row.reshape(1, -1),
                                              self.weight)

        return output

    def update_indexes(self, layer_output):
        neuron_winner = layer_output.argmax(axis=1).item(0)
        winner_neuron_coords = np.unravel_index(neuron_winner,
                                                self.features_grid)

        learning_radius = self.learning_radius
        step = self.step
        std = self.std

        if self.reduce_radius_after is not None:
            learning_radius -= self.last_epoch // self.reduce_radius_after
            learning_radius = max(0, learning_radius)

        if self.reduce_step_after is not None:
            step = decay_function(step, self.last_epoch,
                                  self.reduce_step_after)

        if self.reduce_std_after is not None:
            std = decay_function(std, self.last_epoch, self.reduce_std_after)

        methods = self.grid_type
        output_grid = np.reshape(layer_output, self.features_grid)

        output_with_neighbours = methods.find_neighbours(
            grid=output_grid,
            center=winner_neuron_coords,
            radius=learning_radius)

        step_scaler = methods.find_step_scaler(grid=output_grid,
                                               center=winner_neuron_coords,
                                               std=std)

        index_y, = np.nonzero(output_with_neighbours.reshape(self.n_outputs))

        step_scaler = step_scaler.reshape(self.n_outputs)
        return index_y, step * step_scaler[index_y]

    def init_weights(self, X_train):
        if self.initialized:
            raise WeightInitializationError(
                "Weights have been already initialized")

        weight_initializer = self.weight
        self.weight = weight_initializer(X_train, self.features_grid)
        self.initialized = True

        if self.distance.name == 'cosine':
            self.weight /= np.linalg.norm(self.weight, axis=0)

    def train(self, X_train, epochs=100):
        if not self.initialized:
            self.init_weights(X_train)
        super(SOFM, self).train(X_train, epochs=epochs)

    def one_training_update(self, X_train, y_train=None):
        step = self.step
        predict = self.predict
        update_indexes = self.update_indexes

        error = 0
        for input_row in X_train:
            input_row = np.reshape(input_row, (1, input_row.size))
            layer_output = predict(input_row)

            index_y, step = update_indexes(layer_output)
            distance = input_row.T - self.weight[:, index_y]
            updated_weights = (self.weight[:, index_y] + step * distance)

            if self.distance.name == 'cosine':
                updated_weights /= np.linalg.norm(updated_weights, axis=0)

            self.weight[:, index_y] = updated_weights
            error += np.abs(distance).mean()

        return error / len(X_train)
Esempio n. 7
0
class CMAC(BaseNetwork):
    """
    Cerebellar Model Articulation Controller (CMAC) Network based on memory.

    Notes
    -----
    - Network always use Mean Absolute Error (MAE).
    - Network works for multi dimensional target values.

    Parameters
    ----------
    quantization : int
        Network transforms every input to discrete value.
        Quantization value contol number of total possible
        categories after quantization, defaults to ``10``.

    associative_unit_size : int
        Number of associative blocks in memory, defaults to ``2``.

    {BaseNetwork.Parameters}

    Attributes
    ----------
    weight : dict
        Network's weight that contains memorized patterns.

    Methods
    -------
    {BaseSkeleton.predict}

    train(input_train, target_train, input_test=None, target_test=None,\
    epochs=100, epsilon=None)
        Train network. You can control network's training procedure
        with ``epochs`` and ``epsilon`` parameters.
        The ``input_test`` and ``target_test`` should be presented
        both in case of you need to validate network's training
        after each iteration.

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy.algorithms import CMAC
    >>>
    >>> train_space = np.linspace(0, 2 * np.pi, 100)
    >>> test_space = np.linspace(np.pi, 2 * np.pi, 50)
    >>>
    >>> input_train = np.reshape(train_space, (100, 1))
    >>> input_test = np.reshape(test_space, (50, 1))
    >>>
    >>> target_train = np.sin(input_train)
    >>> target_test = np.sin(input_test)
    >>>
    >>> cmac = CMAC(
    ...     quantization=100,
    ...     associative_unit_size=32,
    ...     step=0.2,
    ... )
    ...
    >>> cmac.train(input_train, target_train, epochs=100)
    >>>
    >>> predicted_test = cmac.predict(input_test)
    >>> cmac.error(target_test, predicted_test)
    0.0023639417543036569
    """
    quantization = IntProperty(default=10, minval=1)
    associative_unit_size = IntProperty(default=2, minval=2)

    def __init__(self, **options):
        self.weight = {}
        super(CMAC, self).__init__(**options)

    def predict(self, input_data):
        input_data = format_data(input_data)

        get_memory_coords = self.get_memory_coords
        get_result_by_coords = self.get_result_by_coords
        predicted = []

        for input_sample in self.quantize(input_data):
            coords = get_memory_coords(input_sample)
            predicted.append(get_result_by_coords(coords))

        return np.array(predicted)

    def get_result_by_coords(self, coords):
        return sum(self.weight.setdefault(coord, 0)
                   for coord in coords) / self.associative_unit_size

    def get_memory_coords(self, quantized_value):
        assoc_unit_size = self.associative_unit_size

        for i in range(assoc_unit_size):
            point = ((quantized_value + i) / assoc_unit_size).astype(int)
            yield tuple(np.concatenate([point, [i]]))

    def quantize(self, input_data):
        return (input_data * self.quantization).astype(int)

    def train_epoch(self, input_train, target_train):
        get_memory_coords = self.get_memory_coords
        get_result_by_coords = self.get_result_by_coords
        weight = self.weight
        step = self.step

        n_samples = input_train.shape[0]
        quantized_input = self.quantize(input_train)
        errors = 0

        for input_sample, target_sample in zip(quantized_input, target_train):
            coords = list(get_memory_coords(input_sample))
            predicted = get_result_by_coords(coords)

            error = target_sample - predicted
            for coord in coords:
                weight[coord] += step * error

            errors += abs(error)

        return errors / n_samples

    def prediction_error(self, input_data, target_data):
        predicted = self.predict(input_data)
        return np.mean(np.abs(predicted - target_data))

    def train(self,
              input_train,
              target_train,
              input_test=None,
              target_test=None,
              epochs=100,
              epsilon=None,
              summary='table'):

        is_test_data_partialy_missed = (
            (input_test is None and target_test is not None)
            or (input_test is not None and target_test is None))

        if is_test_data_partialy_missed:
            raise ValueError("Input and target test samples are missed. "
                             "They must be defined together or none of them.")

        input_train = format_data(input_train)
        target_train = format_data(target_train)

        if input_test is not None:
            input_test = format_data(input_test)

        if target_test is not None:
            target_test = format_data(target_test)

        return super(CMAC, self).train(input_train=input_train,
                                       target_train=target_train,
                                       input_test=input_test,
                                       target_test=target_test,
                                       epochs=epochs,
                                       epsilon=epsilon,
                                       summary=summary)
Esempio n. 8
0
class RBFKMeans(StepSelectionBuiltIn, BaseNetwork):
    """
    Radial basis function K-means for clustering.

    Parameters
    ----------
    n_clusters : int
        Number of clusters.

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Attributes
    ----------
    centers : array-like with shape (n_clusters, n_futures)
        Cluster centers.

    Methods
    -------
    train(input_train, epsilon=1e-5, epochs=100)
        Trains network.

    {BaseSkeleton.predict}

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy.algorithms import RBFKMeans
    >>>
    >>> data = np.array([
    ...     [0.11, 0.20],
    ...     [0.25, 0.32],
    ...     [0.64, 0.60],
    ...     [0.12, 0.42],
    ...     [0.70, 0.73],
    ...     [0.30, 0.27],
    ...     [0.43, 0.81],
    ...     [0.44, 0.87],
    ...     [0.12, 0.92],
    ...     [0.56, 0.67],
    ...     [0.36, 0.35],
    ... ])
    >>> rbfk_net = RBFKMeans(n_clusters=2, verbose=False)
    >>> rbfk_net.train(data, epsilon=1e-5)
    >>> rbfk_net.centers
    array([[ 0.228     ,  0.312     ],
           [ 0.48166667,  0.76666667]])
    >>>
    >>> new_data = np.array([[0.1, 0.1], [0.9, 0.9]])
    >>> rbfk_net.predict(new_data)
    array([[ 0.],
           [ 1.]])
    """
    n_clusters = IntProperty(minval=2)
    step = WithdrawProperty()

    def __init__(self, **options):
        self.centers = None
        super(RBFKMeans, self).__init__(**options)

    def predict(self, input_data):
        input_data = format_data(input_data)

        centers = self.centers
        classes = np.zeros((input_data.shape[0], 1))

        for i, value in enumerate(input_data):
            classes[i] = np.argmin(norm(centers - value, axis=1))

        return classes

    def train_epoch(self, input_train, target_train):
        centers = self.centers
        old_centers = centers.copy()
        output_train = self.predict(input_train)

        for i, center in enumerate(centers):
            positions = np.argwhere(output_train[:, 0] == i)

            if not np.any(positions):
                continue

            class_data = np.take(input_train, positions, axis=0)
            centers[i, :] = (1 / len(class_data)) * np.sum(class_data, axis=0)

        return np.abs(old_centers - centers)

    def train(self, input_train, epsilon=1e-5, epochs=100):
        n_clusters = self.n_clusters
        input_train = format_data(input_train)
        n_samples = input_train.shape[0]

        if n_samples <= n_clusters:
            raise ValueError("Number of samples in the dataset is less than "
                             "spcified number of clusters. Got {} samples, "
                             "expected at least {} (for {} clusters)"
                             "".format(n_samples, n_clusters + 1, n_clusters))

        self.centers = input_train[:n_clusters, :].copy()
        super(RBFKMeans, self).train(input_train,
                                     epsilon=epsilon,
                                     epochs=epochs)
Esempio n. 9
0
class BaseNetwork(BaseSkeleton):
    """
    Base class for Neural Network algorithms.

    Parameters
    ----------
    step : float
        Learning rate, defaults to ``0.1``.

    show_epoch : int
        This property controls how often the network will display
        information about training. It has to be defined as positive
        integer. For instance, number ``100`` mean that network shows
        summary at 1st, 100th, 200th, 300th ... and last epochs.

        Defaults to ``1``.

    shuffle_data : bool
        If it's ``True`` than training data will be shuffled before
        the training. Defaults to ``True``.

    signals : dict, list or function
        Function that will be triggered after certain events during
        the training.

    {Verbose.Parameters}

    Methods
    -------
    {BaseSkeleton.fit}

    predict(X)
        Propagetes input ``X`` through the network and
        returns produced output.

    plot_errors(logx=False, show=True, **figkwargs)
        Using errors collected during the training this method
        generates plot that can give additional insight into the
        performance reached during the training.

    Attributes
    ----------
    errors : list
        Information about errors. It has two main attributes, namely
        ``train`` and ``valid``. These attributes provide access to
        the training and validation errors respectively.

    last_epoch : int
        Value equals to the last trained epoch. After initialization
        it is equal to ``0``.

    n_updates_made : int
        Number of training updates applied to the network.
    """
    step = NumberProperty(default=0.1, minval=0)
    show_epoch = IntProperty(minval=1, default=1)
    shuffle_data = Property(default=False, expected_type=bool)
    signals = Property(expected_type=object)

    def __init__(self, *args, **options):
        super(BaseNetwork, self).__init__(*args, **options)

        self.last_epoch = 0
        self.n_updates_made = 0
        self.errors = base_signals.ErrorCollector()

        signals = list(
            as_tuple(
                base_signals.ProgressbarSignal(),
                base_signals.PrintLastErrorSignal(),
                self.errors,
                self.signals,
            ))

        for i, signal in enumerate(signals):
            if inspect.isfunction(signal):
                signals[i] = base_signals.EpochEndSignal(signal)

            elif inspect.isclass(signal):
                signals[i] = signal()

        self.events = Events(network=self, signals=signals)

    def one_training_update(self, X_train, y_train=None):
        """
        Function would be trigger before run all training procedure
        related to the current epoch.

        Parameters
        ----------
        epoch : int
            Current epoch number.
        """
        raise NotImplementedError()

    def score(self, X_test, y_test):
        raise NotImplementedError()

    def plot_errors(self, logx=False, show=True, **figkwargs):
        return plot_optimizer_errors(optimizer=self,
                                     logx=logx,
                                     show=show,
                                     **figkwargs)

    def train(self,
              X_train,
              y_train=None,
              X_test=None,
              y_test=None,
              epochs=100,
              batch_size=None):
        """
        Method train neural network.

        Parameters
        ----------
        X_train : array-like
        y_train : array-like or None
        X_test : array-like or None
        y_test : array-like or None

        epochs : int
            Defaults to ``100``.

        epsilon : float or None
            Defaults to ``None``.
        """
        if epochs <= 0:
            raise ValueError("Number of epochs needs to be a positive number")

        epochs = int(epochs)
        first_epoch = self.last_epoch + 1
        batch_size = batch_size or getattr(self, 'batch_size', None)

        self.events.trigger(
            name='train_start',
            X_train=X_train,
            y_train=y_train,
            epochs=epochs,
            batch_size=batch_size,
            store_data=False,
        )

        try:
            for epoch in range(first_epoch, first_epoch + epochs):
                self.events.trigger('epoch_start')

                self.last_epoch = epoch
                iterator = iters.minibatches(
                    (X_train, y_train),
                    batch_size,
                    self.shuffle_data,
                )

                for X_batch, y_batch in iterator:
                    self.events.trigger('update_start')
                    update_start_time = time.time()

                    train_error = self.one_training_update(X_batch, y_batch)
                    self.n_updates_made += 1

                    self.events.trigger(
                        name='train_error',
                        value=train_error,
                        eta=time.time() - update_start_time,
                        epoch=epoch,
                        n_updates=self.n_updates_made,
                        n_samples=iters.count_samples(X_batch),
                        store_data=True,
                    )
                    self.events.trigger('update_end')

                if X_test is not None:
                    test_start_time = time.time()
                    validation_error = self.score(X_test, y_test)
                    self.events.trigger(
                        name='valid_error',
                        value=validation_error,
                        eta=time.time() - test_start_time,
                        epoch=epoch,
                        n_updates=self.n_updates_made,
                        n_samples=iters.count_samples(X_test),
                        store_data=True,
                    )

                self.events.trigger('epoch_end')

        except StopTraining as err:
            self.logs.message(
                "TRAIN",
                "Epoch #{} was stopped. Message: {}".format(epoch, str(err)))

        self.events.trigger('train_end')
Esempio n. 10
0
class RBM(BaseAlgorithm, BaseNetwork, MinibatchTrainingMixin, DumpableObject):
    """
    Boolean/Bernoulli Restricted Boltzmann Machine (RBM).
    Algorithm assumes that inputs are either binary
    values or values between 0 and 1.

    Parameters
    ----------
    n_visible : int
        Number of visible units. Number of features (columns)
        in the input data.

    n_hidden : int
        Number of hidden units. The large the number the more
        information network can capture from the data, but it
        also mean that network is more likely to overfit.

    batch_size : int
        Size of the mini-batch. Defaults to ``10``.

    weight : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Normal <neupy.init.Normal>`.

    hidden_bias : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    visible_bias : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    {BaseNetwork.Parameters}

    Methods
    -------
    train(input_train, epochs=100)
        Trains network.

    {BaseSkeleton.fit}

    visible_to_hidden(visible_input)
        Populates data throught the network and returns output
        from the hidden layer.

    hidden_to_visible(hidden_input)
        Propagates output from the hidden layer backward
        to the visible.

    gibbs_sampling(visible_input, n_iter=1)
        Makes Gibbs sampling ``n`` times using visible input.

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([
    ...     [1, 0, 1, 0],
    ...     [1, 0, 1, 0],
    ...     [1, 0, 0, 0],  # incomplete sample
    ...     [1, 0, 1, 0],
    ...
    ...     [0, 1, 0, 1],
    ...     [0, 0, 0, 1],  # incomplete sample
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ... ])
    >>>
    >>> rbm = algorithms.RBM(n_visible=4, n_hidden=1)
    >>> rbm.train(data, epochs=100)
    >>>
    >>> hidden_states = rbm.visible_to_hidden(data)
    >>> hidden_states.round(2)
    array([[ 0.99],
           [ 0.99],
           [ 0.95],
           [ 0.99],
           [ 0.  ],
           [ 0.01],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ]])

    References
    ----------
    [1] G. Hinton, A Practical Guide to Training Restricted
        Boltzmann Machines, 2010.
        http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
    """
    n_visible = IntProperty(minval=1)
    n_hidden = IntProperty(minval=1)
    batch_size = IntProperty(minval=1, default=10)

    weight = ParameterProperty(default=init.Normal())
    hidden_bias = ParameterProperty(default=init.Constant(value=0))
    visible_bias = ParameterProperty(default=init.Constant(value=0))

    def __init__(self, n_visible, n_hidden, **options):
        options.update({'n_visible': n_visible, 'n_hidden': n_hidden})
        super(RBM, self).__init__(**options)

    def init_input_output_variables(self):
        with tf.variable_scope('rbm'):
            self.weight = create_shared_parameter(value=self.weight,
                                                  name='weight',
                                                  shape=(self.n_visible,
                                                         self.n_hidden))
            self.hidden_bias = create_shared_parameter(
                value=self.hidden_bias,
                name='hidden-bias',
                shape=(self.n_hidden, ),
            )
            self.visible_bias = create_shared_parameter(
                value=self.visible_bias,
                name='visible-bias',
                shape=(self.n_visible, ),
            )

            self.variables.update(network_input=tf.placeholder(
                tf.float32,
                (None, self.n_visible),
                name="network-input",
            ),
                                  network_hidden_input=tf.placeholder(
                                      tf.float32,
                                      (None, self.n_hidden),
                                      name="network-hidden-input",
                                  ))

    def init_variables(self):
        with tf.variable_scope('rbm'):
            self.variables.update(h_samples=tf.Variable(
                tf.zeros([self.batch_size, self.n_hidden]),
                name="hidden-samples",
                dtype=tf.float32,
            ), )

    def init_methods(self):
        def free_energy(visible_sample):
            with tf.name_scope('free-energy'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias

                visible_bias_term = dot(visible_sample, self.visible_bias)

                # We can get infinity when wx_b is a relatively large number
                # (maybe 100). Taking exponent makes it even larger and
                # for with float32 it can convert it to infinity. But because
                # number is so large we don't care about +1 value before taking
                # logarithms and therefore we can just pick value as it is
                # since our operation won't change anything.
                hidden_terms = tf.where(
                    # exp(30) is such a big number that +1 won't
                    # make any difference in the outcome.
                    tf.greater(wx_b, 30),
                    wx_b,
                    tf.log1p(tf.exp(wx_b)),
                )

                hidden_term = tf.reduce_sum(hidden_terms, axis=1)
                return -(visible_bias_term + hidden_term)

        def visible_to_hidden(visible_sample):
            with tf.name_scope('visible-to-hidden'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias
                return tf.nn.sigmoid(wx_b)

        def hidden_to_visible(hidden_sample):
            with tf.name_scope('hidden-to-visible'):
                wx = tf.matmul(hidden_sample, self.weight, transpose_b=True)
                wx_b = wx + self.visible_bias
                return tf.nn.sigmoid(wx_b)

        def sample_hidden_from_visible(visible_sample):
            with tf.name_scope('sample-hidden-to-visible'):
                hidden_prob = visible_to_hidden(visible_sample)
                hidden_sample = random_binomial(hidden_prob)
                return hidden_sample

        def sample_visible_from_hidden(hidden_sample):
            with tf.name_scope('sample-visible-to-hidden'):
                visible_prob = hidden_to_visible(hidden_sample)
                visible_sample = random_binomial(visible_prob)
                return visible_sample

        network_input = self.variables.network_input
        network_hidden_input = self.variables.network_hidden_input
        input_shape = tf.shape(network_input)
        n_samples = input_shape[0]

        weight = self.weight
        h_bias = self.hidden_bias
        v_bias = self.visible_bias
        h_samples = self.variables.h_samples
        step = asfloat(self.step)

        with tf.name_scope('positive-values'):
            # We have to use `cond` instead of `where`, because
            # different if-else cases might have different shapes
            # and it triggers exception in tensorflow.
            v_pos = tf.cond(
                tf.equal(n_samples, self.batch_size), lambda: network_input,
                lambda: random_sample(network_input, self.batch_size))
            h_pos = visible_to_hidden(v_pos)

        with tf.name_scope('negative-values'):
            v_neg = sample_visible_from_hidden(h_samples)
            h_neg = visible_to_hidden(v_neg)

        with tf.name_scope('weight-update'):
            weight_update = (
                tf.matmul(v_pos, h_pos, transpose_a=True) -
                tf.matmul(v_neg, h_neg, transpose_a=True)) / asfloat(n_samples)

        with tf.name_scope('hidden-bias-update'):
            h_bias_update = tf.reduce_mean(h_pos - h_neg, axis=0)

        with tf.name_scope('visible-bias-update'):
            v_bias_update = tf.reduce_mean(v_pos - v_neg, axis=0)

        with tf.name_scope('flipped-input-features'):
            # Each row will have random feature marked with number 1
            # Other values will be equal to 0
            possible_feature_corruptions = tf.eye(self.n_visible)
            corrupted_features = random_sample(possible_feature_corruptions,
                                               n_samples)

            rounded_input = tf.round(network_input)
            # If we scale input values from [0, 1] range to [-1, 1]
            # than it will be easier to flip feature values with simple
            # multiplication.
            scaled_rounded_input = 2 * rounded_input - 1
            scaled_flipped_rounded_input = (
                # for corrupted_features we convert 0 to 1 and 1 to -1
                # in this way after multiplication we will flip all
                # signs where -1 in the transformed corrupted_features
                (-2 * corrupted_features + 1) * scaled_rounded_input)
            # Scale it back to the [0, 1] range
            flipped_rounded_input = (scaled_flipped_rounded_input + 1) / 2

        with tf.name_scope('pseudo-likelihood-loss'):
            # Stochastic pseudo-likelihood
            error = tf.reduce_mean(self.n_visible * tf.log_sigmoid(
                free_energy(flipped_rounded_input) -
                free_energy(rounded_input)))

        with tf.name_scope('gibbs-sampling'):
            gibbs_sampling = sample_visible_from_hidden(
                sample_hidden_from_visible(network_input))

        initialize_uninitialized_variables()
        self.methods.update(train_epoch=function(
            [network_input],
            error,
            name='rbm/train-epoch',
            updates=[
                (weight, weight + step * weight_update),
                (h_bias, h_bias + step * h_bias_update),
                (v_bias, v_bias + step * v_bias_update),
                (h_samples, random_binomial(p=h_neg)),
            ]),
                            prediction_error=function(
                                [network_input],
                                error,
                                name='rbm/prediction-error',
                            ),
                            diff1=function(
                                [network_input],
                                free_energy(flipped_rounded_input),
                                name='rbm/diff1-error',
                            ),
                            diff2=function(
                                [network_input],
                                free_energy(rounded_input),
                                name='rbm/diff2-error',
                            ),
                            visible_to_hidden=function(
                                [network_input],
                                visible_to_hidden(network_input),
                                name='rbm/visible-to-hidden',
                            ),
                            hidden_to_visible=function(
                                [network_hidden_input],
                                hidden_to_visible(network_hidden_input),
                                name='rbm/hidden-to-visible',
                            ),
                            gibbs_sampling=function(
                                [network_input],
                                gibbs_sampling,
                                name='rbm/gibbs-sampling',
                            ))

    def train(self, input_train, input_test=None, epochs=100, summary='table'):
        """
        Train RBM.

        Parameters
        ----------
        input_train : 1D or 2D array-like
        input_test : 1D or 2D array-like or None
            Defaults to ``None``.
        epochs : int
            Number of training epochs. Defaults to ``100``.
        summary : {'table', 'inline'}
            Training summary type. Defaults to ``'table'``.
        """
        return super(RBM, self).train(input_train=input_train,
                                      target_train=None,
                                      input_test=input_test,
                                      target_test=None,
                                      epochs=epochs,
                                      epsilon=None,
                                      summary=summary)

    def train_epoch(self, input_train, target_train=None):
        """
        Train one epoch.

        Parameters
        ----------
        input_train : array-like (n_samples, n_features)

        Returns
        -------
        float
        """
        errors = self.apply_batches(
            function=self.methods.train_epoch,
            input_data=input_train,
            description='Training batches',
            show_error_output=True,
        )

        n_samples = len(input_train)
        return average_batch_errors(errors, n_samples, self.batch_size)

    def visible_to_hidden(self, visible_input):
        """
        Populates data throught the network and returns output
        from the hidden layer.

        Parameters
        ----------
        visible_input : array-like (n_samples, n_visible_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        outputs = self.apply_batches(
            function=self.methods.visible_to_hidden,
            input_data=visible_input,
            description='Hidden from visible batches',
            show_progressbar=True,
            show_error_output=False,
            scalar_output=False,
        )
        return np.concatenate(outputs, axis=0)

    def hidden_to_visible(self, hidden_input):
        """
        Propagates output from the hidden layer backward
        to the visible.

        Parameters
        ----------
        hidden_input : array-like (n_samples, n_hidden_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_hidden == 1)
        hidden_input = format_data(hidden_input, is_input_feature1d)

        outputs = self.apply_batches(
            function=self.methods.hidden_to_visible,
            input_data=hidden_input,
            description='Visible from hidden batches',
            show_progressbar=True,
            show_error_output=False,
            scalar_output=False,
        )
        return np.concatenate(outputs, axis=0)

    def prediction_error(self, input_data, target_data=None):
        """
        Compute the pseudo-likelihood of input samples.

        Parameters
        ----------
        input_data : array-like
            Values of the visible layer

        Returns
        -------
        float
            Value of the pseudo-likelihood.
        """
        is_input_feature1d = (self.n_visible == 1)
        input_data = format_data(input_data, is_input_feature1d)

        errors = self.apply_batches(
            function=self.methods.prediction_error,
            input_data=input_data,
            description='Validation batches',
            show_error_output=True,
        )
        return average_batch_errors(
            errors,
            n_samples=len(input_data),
            batch_size=self.batch_size,
        )

    def gibbs_sampling(self, visible_input, n_iter=1):
        """
        Makes Gibbs sampling n times using visible input.

        Parameters
        ----------
        visible_input : 1d or 2d array
        n_iter : int
            Number of Gibbs sampling iterations. Defaults to ``1``.

        Returns
        -------
        array-like
            Output from the visible units after perfoming n
            Gibbs samples. Array will contain only binary
            units (0 and 1).
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        gibbs_sampling = self.methods.gibbs_sampling

        input_ = visible_input
        for iteration in range(n_iter):
            input_ = gibbs_sampling(input_)

        return input_
Esempio n. 11
0
class LocalResponseNorm(BaseLayer):
    """
    Local Response Normalization Layer.

    Aggregation is purely across channels, not within channels,
    and performed "pixelwise".

    If the value of the :math:`i` th channel is :math:`x_i`, the output is

    .. math::
        x_i = \\frac{{x_i}}{{ (k + ( \\alpha \\sum_j x_j^2 ))^\\beta }}

    where the summation is performed over this position on :math:`n`
    neighboring channels.

    Parameters
    ----------
    alpha : float
        coefficient, see equation above

    beta : float
        offset, see equation above

    k : float
        exponent, see equation above

    n : int
        Number of adjacent channels to normalize over, must be odd

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}
    """
    alpha = NumberProperty(default=1e-4)
    beta = NumberProperty(default=0.75)
    k = NumberProperty(default=2)
    n = IntProperty(default=5)

    def __init__(self, **options):
        super(LocalResponseNorm, self).__init__(**options)

        if self.n % 2 == 0:
            raise ValueError("Only works with odd ``n``")

    def validate(self, input_shape):
        ndim = len(input_shape)

        if ndim != 3:
            raise LayerConnectionError(
                "Layer `{}` expected input with 3 dimensions, got {}"
                "".format(self, ndim))

    def output(self, input_value):
        if not self.input_shape:
            raise LayerConnectionError(
                "Layer `{}` doesn't have defined input shape. Probably "
                "it doesn't have an input layer.".format(self))

        half = self.n // 2
        squared_value = input_value ** 2

        n_samples = input_value.shape[0]
        channel = input_value.shape[1]
        height = input_value.shape[2]
        width = input_value.shape[3]

        zero = asfloat(0)
        extra_channels = T.alloc(zero, n_samples, channel + 2 * half,
                                 height, width)
        squared_value = T.set_subtensor(
            extra_channels[:, half:half + channel, :, :],
            squared_value
        )
        scale = self.k

        for i in range(self.n):
            scale += self.alpha * squared_value[:, i:i + channel, :, :]

        scale = scale ** self.beta
        return input_value / scale
Esempio n. 12
0
class LocalResponseNorm(BaseLayer):
    """
    Local Response Normalization Layer.

    Aggregation is purely across channels, not within channels,
    and performed "pixelwise".

    If the value of the :math:`i` th channel is :math:`x_i`, the output is

    .. math::
        x_i = \\frac{{x_i}}{{ (k + ( \\alpha \\sum_j x_j^2 ))^\\beta }}

    where the summation is performed over this position on :math:`n`
    neighboring channels.

    Parameters
    ----------
    alpha : float
        Coefficient, see equation above

    beta : float
        Offset, see equation above

    k : float
        Exponent, see equation above

    depth_radius : int
        Number of adjacent channels to normalize over, must be odd.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}
    """
    alpha = NumberProperty(default=1e-4)
    beta = NumberProperty(default=0.75)
    k = NumberProperty(default=2)
    depth_radius = IntProperty(default=5)

    def __init__(self, **options):
        super(LocalResponseNorm, self).__init__(**options)

        if self.depth_radius % 2 == 0:
            raise ValueError("Only works with odd ``n``")

    def validate(self, input_shape):
        ndim = len(input_shape)

        if ndim != 3:
            raise LayerConnectionError(
                "Layer `{}` expected input with 3 dimensions, got {}"
                "".format(self, ndim))

    def output(self, input_value):
        return tf.nn.local_response_normalization(
            input_value,
            depth_radius=self.depth_radius,
            bias=self.k,
            alpha=self.alpha,
            beta=self.beta,
        )
Esempio n. 13
0
class Embedding(BaseLayer):
    """
    Embedding layer accepts indices as an input and returns
    rows from the weight matrix associated with these indices.
    It's useful when inputs are categorical features or for the
    word embedding tasks.

    Parameters
    ----------
    input_size : int
        Layer's input vector dimension. It's, typically, associated with
        number of categories or number of unique words that input vector has.

    output_size : int
        Layer's output vector dimension.

    weight : array-like, Tensorfow variable, scalar or Initializer
        Defines layer's weights. Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`HeNormal() <neupy.init.HeNormal>`.

    {BaseLayer.name}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}

    Examples
    --------

    This example converts dataset that has only categorical
    variables into format that suitable for Embedding layer.

    >>> import numpy as np
    >>> from neupy.layers import *
    >>>
    >>> dataset = np.array([
    ...     ['cold', 'high'],
    ...     ['hot',  'low'],
    ...     ['cold', 'low'],
    ...     ['hot',  'low'],
    ... ])
    >>>
    >>> unique_value, dataset_indices = np.unique(
    ...     dataset, return_inverse=True
    ... )
    >>> dataset_indices = dataset_indices.reshape((4, 2))
    >>> dataset_indices
    array([[0, 1],
           [2, 3],
           [0, 3],
           [2, 3]])
    >>>
    >>> n_features = dataset.shape[1]
    >>> n_unique_categories = len(unique_value)
    >>> embedded_size = 1
    >>>
    >>> network = join(
    ...     Input(n_features),
    ...     Embedding(n_unique_categories, embedded_size),
    ...     # Output from the embedding layer is 3D
    ...     # To make output 2D we need to reshape dimensions
    ...     Reshape(),
    ... )
    """
    input_size = IntProperty(minval=1)
    output_size = IntProperty(minval=1)
    weight = ParameterProperty()

    def __init__(self, input_size, output_size,
                 weight=init.HeNormal(), name=None):

        super(Embedding, self).__init__(name=name)

        self.input_size = input_size
        self.output_size = output_size
        self.weight = weight

    def get_output_shape(self, input_shape):
        input_shape = tf.TensorShape(input_shape)
        return input_shape.concatenate(self.output_size)

    def create_variables(self, input_shape):
        self.input_shape = input_shape
        self.weight = self.variable(
            value=self.weight, name='weight',
            shape=as_tuple(self.input_size, self.output_size))

    def output(self, input_value, **kwargs):
        input_value = tf.cast(input_value, tf.int32)
        return tf.gather(self.weight, input_value)

    def __repr__(self):
        return self._repr_arguments(
            self.input_size,
            self.output_size,
            name=self.name,
            weight=self.weight,
        )
Esempio n. 14
0
class ART1(BaseNetwork):
    """ Adaptive Resonance Theory (ART1) Network for binary
    data clustering.

    Notes
    -----
    * Weights are not random, so the result will be always reproduceble.

    Parameters
    ----------
    rho : float
        Control reset action in training process. Value must be
        between ``0`` and ``1``, defaults to ``0.5``.
    n_clusters : int
        Number of clusters, defaults to ``2``. Min value is also ``2``.
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}

    Methods
    -------
    train(input_data):
        Network network will train until it clusters all samples.
    {BaseSkeleton.predict}
    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([
    ...     [0, 1, 0],
    ...     [1, 0, 0],
    ...     [1, 1, 0],
    ... ])
    >>>>
    >>> artnet = algorithms.ART1(
    ...     step=2,
    ...     rho=0.7,
    ...     n_clusters=2,
    ...     verbose=False
    ... )
    >>> artnet.predict(data)
    array([ 0.,  1.,  1.])
    """
    rho = ProperFractionProperty(default=0.5)
    n_clusters = IntProperty(default=2, minval=2)

    def train(self, input_data):
        input_data = format_data(input_data)

        if input_data.ndim != 2:
            raise ValueError("Input value must be 2 dimentional, got "
                             "{0}".format(input_data.ndim))

        data_size = input_data.shape[1]
        n_clusters = self.n_clusters
        step = self.step
        rho = self.rho

        if list(sort(unique(input_data))) != [0, 1]:
            raise ValueError("ART1 Network works only with binary matrix, "
                             "all matix must contains only 0 and 1")

        if not hasattr(self, 'weight_21'):
            self.weight_21 = ones((data_size, n_clusters))

        if not hasattr(self, 'weight_12'):
            self.weight_12 = step / (step + n_clusters - 1) * self.weight_21.T

        weight_21 = self.weight_21
        weight_12 = self.weight_12

        if data_size != weight_21.shape[0]:
            raise ValueError(
                "Data dimention is invalid. Get {} columns data set. "
                "Must be - {} columns".format(data_size, weight_21.shape[0]))

        classes = zeros(input_data.shape[0])

        # Train network
        for i, p in enumerate(input_data):
            disabled_neurons = []
            reseted_values = []
            reset = True

            while reset:
                output1 = p
                input2 = dot(weight_12, output1.T)

                output2 = zeros(input2.size)
                input2[disabled_neurons] = -inf
                winner_index = input2.argmax()
                output2[winner_index] = 1

                expectation = dot(weight_21, output2)
                output1 = logical_and(p, expectation).astype(int)

                reset_value = dot(output1.T, output1) / dot(p.T, p)
                reset = reset_value < rho

                if reset:
                    disabled_neurons.append(winner_index)
                    reseted_values.append((reset_value, winner_index))

                if len(disabled_neurons) >= n_clusters:
                    # Got this case only if we test all possible clusters
                    reset = False
                    winner_index = None

                if not reset:
                    if winner_index is not None:
                        weight_12[winner_index, :] = (step * output1) / (
                            step + dot(output1.T, output1) - 1)
                        weight_21[:, winner_index] = output1
                    else:
                        # Get result with the best `rho`
                        winner_index = max(reseted_values)[1]

                    classes[i] = winner_index

        return classes

    def predict(self, input_data):
        return self.train(input_data)
Esempio n. 15
0
class Concatenate(BaseLayer):
    """
    Concatenate multiple input layers in one based on the
    specified axes.

    Parameters
    ----------
    axis : int
        The axis along which the inputs will be joined.
        Default is ``1``.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}

    Examples
    --------
    >>> from neupy import layers
    >>>
    >>> input_1 = layers.Input(10)
    >>> input_2 = layers.Input(20)
    >>>
    >>> network = [input_1, input_2] > layers.Concatenate()
    >>>
    >>> network.input_shape
    [(10,), (20,)]
    >>> network.output_shape
    (30,)
    """
    axis = IntProperty(default=1)

    def validate(self, input_shapes):
        valid_shape = as_tuple(None, input_shapes[0])

        for input_shape in input_shapes[1:]:
            for axis, axis_size in enumerate(input_shape, start=1):
                if axis != self.axis and valid_shape[axis] != axis_size:
                    raise LayerConnectionError(
                        "Cannot concatenate layers. Some of them don't "
                        "match over dimension #{} (0-based indeces)."
                        "".format(axis)
                    )

    @property
    def output_shape(self):
        if not self.input_shape:
            return

        axis = self.axis - 1  # because we do not include #0 dim
        input_shapes = copy.copy(self.input_shape)
        output_shape = list(input_shapes.pop(0))

        for input_shape in input_shapes:
            output_shape[axis] += input_shape[axis]

        return tuple(output_shape)

    def output(self, *input_values):
        return T.concatenate(input_values, axis=self.axis)
Esempio n. 16
0
class Oja(BaseNetwork):
    """
    Oja is an unsupervised technique used for the
    dimensionality reduction tasks.

    Notes
    -----
    - In practice use step as very small value.
      For instance, value ``1e-7`` can be a good choice.

    - Normalize the input data before use Oja algorithm.
      Input data shouldn't contains large values.

    - Set up smaller values for weight if error for a few
      first iterations is big compare to the input values scale.
      For instance, if your input data have values between
      ``0`` and ``1`` error value equal to ``100`` is big.

    - During the training network report mean absolute error (MAE)

    Parameters
    ----------
    minimized_data_size : int
        Expected number of features after minimization,
        defaults to ``1``.

    weight : array-like or ``None``
        Defines networks weights.
        Defaults to :class:`XavierNormal() <neupy.init.XavierNormal>`.

    {BaseNetwork.Parameters}

    Methods
    -------
    reconstruct(X)
        Reconstruct original dataset from the minimized input.

    train(X, epochs=100)
        Trains the network to the data X. Network trains until maximum
        number of ``epochs`` was reached.

    predict(X)
        Returns hidden representation of the input data ``X``. Basically,
        it applies dimensionality reduction.

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([[2, 2], [1, 1], [4, 4], [5, 5]])
    >>>
    >>> ojanet = algorithms.Oja(
    ...     minimized_data_size=1,
    ...     step=0.01,
    ...     verbose=False
    ... )
    >>>
    >>> ojanet.train(data, epochs=100)
    >>> minimized = ojanet.predict(data)
    >>> minimized
    array([[-2.82843122],
           [-1.41421561],
           [-5.65686243],
           [-7.07107804]])
    >>> ojanet.reconstruct(minimized)
    array([[ 2.00000046,  2.00000046],
           [ 1.00000023,  1.00000023],
           [ 4.00000093,  4.00000093],
           [ 5.00000116,  5.00000116]])
    """
    minimized_data_size = IntProperty(minval=1)
    weight = ParameterProperty(default=init.XavierNormal())

    def one_training_update(self, X, y_train):
        weight = self.weight

        minimized = np.dot(X, weight)
        reconstruct = np.dot(minimized, weight.T)
        error = X - reconstruct

        weight += self.step * np.dot(error.T, minimized)
        mae = np.sum(np.abs(error)) / X.size

        # Clean objects from the memory
        del minimized
        del reconstruct
        del error

        return mae

    def train(self, X, epochs=100):
        X = format_data(X)
        n_input_features = X.shape[1]

        if isinstance(self.weight, init.Initializer):
            weight_shape = (n_input_features, self.minimized_data_size)
            self.weight = self.weight.sample(weight_shape, return_array=True)

        if n_input_features != self.weight.shape[0]:
            raise ValueError("Invalid number of features. Expected {}, got {}"
                             "".format(self.weight.shape[0], n_input_features))

        super(Oja, self).train(X, epochs=epochs)

    def reconstruct(self, X):
        if not isinstance(self.weight, np.ndarray):
            raise NotTrained("Network hasn't been trained yet")

        X = format_data(X)
        if X.shape[1] != self.minimized_data_size:
            raise ValueError("Invalid input data feature space, expected "
                             "{}, got {}.".format(X.shape[1],
                                                  self.minimized_data_size))

        return np.dot(X, self.weight.T)

    def predict(self, X):
        if not isinstance(self.weight, np.ndarray):
            raise NotTrained("Network hasn't been trained yet")

        X = format_data(X)
        return np.dot(X, self.weight)
Esempio n. 17
0
class SOFM(Kohonen):
    """ Self-Organizing Feature Map.

    Parameters
    ----------
    learning_radius : int
        Learning radius.
    features_grid : int
        Learning radius.
    transform : {{'linear', 'euclid', 'cos'}}
        Indicate transformation operation related to the input layer.
        The ``linear`` value mean that input data would be multiplied by
        weights in typical way. The ``euclid`` method will identify the
        closest weight vector to the input one. The ``cos`` made the same
        as ``euclid``, but instead of euclid distance it uses cosine
        similarity. Defaults to ``linear``.
    {BaseAssociative.n_inputs}
    {BaseAssociative.n_outputs}
    {BaseAssociative.weight}
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}
    {BaseAssociative.train}
    {BaseSkeleton.fit}
    """

    learning_radius = IntProperty(default=0, minval=0)
    features_grid = TypedListProperty()
    transform = ChoiceProperty(default='linear',
                               choices={
                                   'linear': dot_product,
                                   'euclid': neg_euclid_distance,
                                   'cos': cosine_similarity,
                               })

    def __init__(self, **options):
        super(SOFM, self).__init__(**options)

        invalid_feature_grid = (self.features_grid is not None
                                and mul(*self.features_grid) != self.n_outputs)
        if invalid_feature_grid:
            raise ValueError(
                "Feature grid should contain the same number of elements as "
                "in the output layer: {0}, but found: {1} ({2}x{3})"
                "".format(self.n_outputs, mul(*self.features_grid),
                          self.features_grid[0], self.features_grid[1]))

    def init_properties(self):
        super(SOFM, self).init_properties()

        if self.features_grid is None:
            self.features_grid = (self.n_outputs, 1)

    def predict_raw(self, input_data):
        input_data = format_data(input_data)
        output = np.zeros((input_data.shape[0], self.n_outputs))
        for i, input_row in enumerate(input_data):
            output[i, :] = self.transform(input_row.reshape(1, -1),
                                          self.weight)
        return output

    def update_indexes(self, layer_output):
        neuron_winner = layer_output.argmax(axis=1)
        feature_bound = self.features_grid[1]

        output_with_neightbours = neuron_neighbours(
            np.reshape(layer_output, self.features_grid),
            (neuron_winner // feature_bound, neuron_winner % feature_bound),
            self.learning_radius)
        index_y, _ = np.nonzero(
            np.reshape(output_with_neightbours, (self.n_outputs, 1)))
        return index_y
Esempio n. 18
0
class PNN(BaseSkeleton):
    """
    Probabilistic Neural Network (PNN). Network applies only to
    the classification problems.

    Notes
    -----
    - PNN Network is sensitive for cases when one input feature
      has higher values than the other one. Input data has to be
      normalized before training.

    - Standard deviation has to match the range of the input features
      Check ``std`` parameter description for more information.

    - The bigger training dataset the slower prediction.
      Algorithm is much more efficient for small datasets.

    - Network uses lazy learning which mean that network doesn't
      need iterative training. It just stores parameters
      and use them to make a predictions.

    Parameters
    ----------
    std : float
        Standard deviation for the Probability Density Function (PDF).
        If your input features have high values than standard deviation
        should also be high. For instance, if input features from range
        ``[0, 20]`` that standard deviation should be also a big value
        like ``10`` or ``15``. Small values will lead to bad prediction.

    batch_size : int or None
        Set up min-batch size. The ``None`` value will ensure that all data
        samples will be propagated through the network at once.
        Defaults to ``128``.

    {Verbose.verbose}

    Methods
    -------
    train(X_train, y_train, copy=True)
        Network just stores all the information about the data and use
        it for the prediction. Parameter ``copy`` copies input data
        before saving it inside the network.

        The ``y_train`` argument should be a vector or
        matrix with one feature column.

    predict(X)
        Return classes associated with each sample in the ``X``.

    predict_proba(X)
        Predict probabilities for each class.

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>>
    >>> from sklearn import datasets, metrics
    >>> from sklearn.model_selection import train_test_split
    >>> from neupy import algorithms
    >>>
    >>> dataset = datasets.load_digits()
    >>> x_train, x_test, y_train, y_test = train_test_split(
    ...     dataset.data, dataset.target, test_size=0.3
    ... )
    >>>
    >>> pnn = algorithms.PNN(std=10, verbose=False)
    >>> pnn.train(x_train, y_train)
    >>>
    >>> y_predicted = pnn.predict(x_test)
    >>> metrics.accuracy_score(y_test, y_predicted)
    0.98888888888888893
    """
    std = BoundedProperty(minval=0)
    batch_size = IntProperty(default=128, minval=0, allow_none=True)

    def __init__(self, std, batch_size=128, verbose=False):
        self.std = std
        self.batch_size = batch_size

        self.classes = None
        self.X_train = None
        self.y_train = None

        super(PNN, self).__init__(batch_size=batch_size, verbose=verbose)

    def train(self, X_train, y_train, copy=True):
        """
        Trains network. PNN doesn't actually train, it just stores
        input data and use it for prediction.

        Parameters
        ----------
        X_train : array-like (n_samples, n_features)

        y_train : array-like (n_samples,)
            Target variable should be vector or matrix
            with one feature column.

        copy : bool
            If value equal to ``True`` than input matrices will
            be copied. Defaults to ``True``.

        Raises
        ------
        ValueError
            In case if something is wrong with input data.
        """
        X_train = format_data(X_train, copy=copy)
        y_train = format_data(y_train, copy=copy, make_float=False)

        self.X_train = X_train
        self.y_train = y_train

        if X_train.shape[0] != y_train.shape[0]:
            raise ValueError(
                "Number of samples in the input and "
                "target datasets are different")

        if y_train.shape[1] != 1:
            raise ValueError(
                "Target value should be vector or "
                "matrix with only one column")

        classes = self.classes = np.unique(y_train)
        n_classes = classes.size
        n_samples = X_train.shape[0]

        class_ratios = self.class_ratios = np.zeros(n_classes)
        row_comb_matrix = self.row_comb_matrix = np.zeros(
            (n_classes, n_samples))

        for i, class_name in enumerate(classes):
            class_name = classes[i]
            class_val_positions = (y_train == class_name)
            row_comb_matrix[i, class_val_positions.ravel()] = 1
            class_ratios[i] = np.sum(class_val_positions)

    def predict_proba(self, X):
        """
        Predict probabilities for each class.

        Parameters
        ----------
        X : array-like (n_samples, n_features)

        Returns
        -------
        array-like (n_samples, n_classes)
        """
        outputs = iters.apply_batches(
            function=self.predict_raw,
            inputs=format_data(X),
            batch_size=self.batch_size,
            show_progressbar=self.logs.enable,
        )
        raw_output = np.concatenate(outputs, axis=1)

        total_output_sum = raw_output.sum(axis=0).reshape((-1, 1))
        return raw_output.T / total_output_sum

    def predict_raw(self, X):
        """
        Raw prediction.

        Parameters
        ----------
        X : array-like (n_samples, n_features)

        Raises
        ------
        NotTrained
            If network hasn't been trained.

        ValueError
            In case if something is wrong with input data.

        Returns
        -------
        array-like (n_samples, n_classes)
        """
        if self.classes is None:
            raise NotTrained(
                "Cannot make a prediction. Network hasn't been trained yet")

        if X.shape[1] != self.X_train.shape[1]:
            raise ValueError(
                "Input data must contain {0} features, got {1}"
                "".format(self.X_train.shape[1],  X.shape[1]))

        class_ratios = self.class_ratios.reshape((-1, 1))
        pdf_outputs = pdf_between_data(self.X_train, X, self.std)

        return np.dot(self.row_comb_matrix, pdf_outputs) / class_ratios

    def predict(self, X):
        """
        Predicts class from the input data.

        Parameters
        ----------
        X : array-like (n_samples, n_features)

        Returns
        -------
        array-like (n_samples,)
        """
        outputs = iters.apply_batches(
            function=self.predict_raw,
            inputs=format_data(X),
            batch_size=self.batch_size,
            show_progressbar=self.logs.enable,
        )

        raw_output = np.concatenate(outputs, axis=1)
        return self.classes[raw_output.argmax(axis=0)]
Esempio n. 19
0
File: base.py Progetto: PranY/neupy
class BaseLayer(with_metaclass(LayerMeta, ChainConnection, BaseConfigurable)):
    """ Base class for all layers.

    Parameters
    ----------
    {layer_params}
    """
    __layer_params = """input_size : int
        Layer input size.
    weight : 2D array-like or None
        Define your layer weights. `None` means that your weights will be
        generate randomly dependence on property `init_method`.
        `None` by default.
    init_method : {'gauss', 'bounded', 'ortho'}
        Weight initialization method.
        `gauss` will generate random weights dependence on Standard
        Normal Distribution.
        `bounded` generate uniform random weghts in initialized bounds.
        `ortho` generate random orthogonal matrix.
    random_weight_bound : tuple of two int
        Available only for `init_method` eqaul to `bounded`, defaults
        to `(0, 1)`.
    """
    shared_docs = {'layer_params': __layer_params}

    input_size = IntProperty()
    weight = ArrayProperty(default=None)
    random_weight_bound = NumberBoundProperty(default=(0, 1))
    init_method = ChoiceProperty(default=GAUSSIAN,
                                 choices=[GAUSSIAN, BOUNDED, ORTHOGONAL])

    def __init__(self, input_size, **options):
        super(BaseLayer, self).__init__()

        self.input_size = input_size
        self.use_bias = False

        # Default variables which will change after initialization
        self.relate_to_layer = None
        self.size = None

        # If you will set class method function variable, python understend
        # that this is new class method and will call it with `self`
        # first parameter.
        if hasattr(self.__class__, 'activation_function'):
            self.activation_function = self.__class__.activation_function

        # Initialize default options
        BaseConfigurable.__init__(self, **options)

    def relate_to(self, right_layer):
        self.relate_to_layer = right_layer

    def initialize(self, with_bias=False):
        self.use_bias = with_bias
        size = self.input_size + self.use_bias
        self.size = (size, self.relate_to_layer.input_size)
        self.weight = self._init_weight()

    # --------------- Weights manipulations --------------- #

    def _init_weight(self):
        if self.weight is not None:
            return self.weight

        init_method = self.init_method

        if init_method == GAUSSIAN:
            return randn(*self.size)

        elif init_method == BOUNDED:
            return random_bounded(self.size, *self.random_weight_bound)

        elif init_method == ORTHOGONAL:
            return random_orthogonal(self.size)

    @property
    def weight_without_bias(self):
        if self.use_bias:
            return self.weight[1:, :]
        return self.weight

    # --------------- Layer operations --------------- #

    def summator(self, input_value):
        return dot(input_value, self.weight)

    def output(self, input_value):
        input_data = self.preformat_input(input_value)
        summated = self.summator(input_data)
        return self.activation_function(summated)

    def preformat_input(self, input_data):
        if self.use_bias:
            input_data = add_bias_column(input_data)
        return input_data

    def __repr__(self):
        return '{name}({size})'.format(name=self.__class__.__name__,
                                       size=self.input_size)
Esempio n. 20
0
class BaseAssociative(BaseNetwork):
    """
    Base class for associative learning.

    Parameters
    ----------
    n_inputs : int
        Number of features (columns) in the input data.

    n_outputs : int
        Number of outputs in the  network.

    weight : array-like, Initializer
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    train(input_train, summary='table', epochs=100)
        Train neural network.

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=1, required=True)
    n_outputs = IntProperty(minval=1, required=True)
    weight = ParameterProperty(default=init.Normal())

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)
        self.init_layers()

    def init_layers(self):
        valid_weight_shape = (self.n_inputs, self.n_outputs)

        if isinstance(self.weight, init.Initializer):
            self.weight = self.weight.sample(
                valid_weight_shape, return_array=True)

        if self.weight.shape != valid_weight_shape:
            raise ValueError(
                "Weight matrix has invalid shape. Got {}, expected {}"
                "".format(self.weight.shape, valid_weight_shape))

        self.weight = self.weight.astype(float)

    def format_input_data(self, input_data):
        is_feature1d = self.n_inputs == 1
        input_data = format_data(input_data, is_feature1d)

        if input_data.ndim != 2:
            raise ValueError("Cannot make prediction, because input "
                             "data has more than 2 dimensions")

        n_samples, n_features = input_data.shape

        if n_features != self.n_inputs:
            raise ValueError("Input data expected to have {} features, "
                             "but got {}".format(self.n_inputs, n_features))

        return input_data

    def train(self, input_train, summary='table', epochs=100):
        input_train = self.format_input_data(input_train)

        return super(BaseAssociative, self).train(
            input_train=input_train, target_train=None,
            input_test=None, target_test=None,
            epochs=epochs, epsilon=None,
            summary=summary)
Esempio n. 21
0
class LSTM(BaseRNNLayer):
    """
    Long Short Term Memory (LSTM) Layer.

    Parameters
    ----------
    {BaseRNNLayer.size}

    weights : dict or Initializer
        Weight parameters for different gates.
        Defaults to :class:`XavierUniform() <neupy.init.XavierUniform>`.

        - In case if application requires the same initialization method
          for all weights, then it's possible to specify initialization
          method that would be automaticaly applied to all weight
          parameters in the LSTM layer.

          .. code-block:: python

              layers.LSTM(2, weights=init.Normal(0.1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  weight_in_to_ingate=init.XavierUniform(),
                  weight_hid_to_ingate=init.XavierUniform(),
                  weight_cell_to_ingate=init.XavierUniform(),

                  weight_in_to_forgetgate=init.XavierUniform(),
                  weight_hid_to_forgetgate=init.XavierUniform(),
                  weight_cell_to_forgetgate=init.XavierUniform(),

                  weight_in_to_outgate=init.XavierUniform(),
                  weight_hid_to_outgate=init.XavierUniform(),
                  weight_cell_to_outgate=init.XavierUniform(),

                  weight_in_to_cell=init.XavierUniform(),
                  weight_hid_to_cell=init.XavierUniform(),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(weight_in_to_ingate=init.Normal(0.1))

          Other parameters like ``weight_cell_to_outgate`` will be
          equal to their default values.

    biases : dict or Initializer
        Bias parameters for different gates.
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

        - In case if application requires the same initialization method
          for all biases, then it's possible to specify initialization
          method that would be automaticaly applied to all bias parameters
          in the LSTM layer.

          .. code-block:: python

              layers.LSTM(2, biases=init.Constant(1))

        - In case if application requires different initialization
          values for different weights then it's possible to specify
          an exact weight by name.

          .. code-block:: python

              dict(
                  bias_ingate=init.Constant(0),
                  bias_forgetgate=init.Constant(0),
                  bias_cell=init.Constant(0),
                  bias_outgate=init.Constant(0),
              )

          If application requires modification to only one (or multiple)
          parameter then it's better to specify the one that you need to
          modify and ignore other parameters

          .. code-block:: python

              dict(bias_ingate=init.Constant(1))

          Other parameters like ``bias_cell`` will be
          equal to their default values.

    activation_functions : dict, callable
        Activation functions for different gates. Defaults to:

        .. code-block:: python

            # import theano.tensor as T
            dict(
                ingate=T.nnet.sigmoid,
                forgetgate=T.nnet.sigmoid,
                outgate=T.nnet.sigmoid,
                cell=T.tanh,
            )

        If application requires modification to only one parameter
        then it's better to specify the one that you need to modify
        and ignore other parameters

        .. code-block:: python

            dict(ingate=T.tanh)

        Other parameters like ``forgetgate`` or ``outgate`` will be
        equal to their default values.

    learn_init : bool
        If ``True``, make ``cell_init`` and ``hid_init`` trainable
        variables. Defaults to ``False``.

    cell_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial cell state (:math:`c_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    hid_init : array-like, Theano variable, scalar or Initializer
        Initializer for initial hidden state (:math:`h_0`).
        Defaults to :class:`Constant(0) <neupy.init.Constant>`.

    backwards : bool
        If ``True``, process the sequence backwards and then reverse the
        output again such that the output from the layer is always
        from :math:`x_1` to :math:`x_n`. Defaults to ``False``

    {BaseRNNLayer.only_return_final}

    precompute_input : bool
        if ``True``, precompute ``input_to_hid`` before iterating
        through the sequence. This can result in a speed up at the
        expense of an increase in memory usage.
        Defaults to ``True``.

    peepholes : bool
        If ``True``, the LSTM uses peephole connections.
        When ``False``, cell parameters  are ignored.
        Defaults to ``False``.

    unroll_scan : bool
        If ``True`` the recursion is unrolled instead of using scan.
        For some graphs this gives a significant speed up but it
        might also consume more memory. When ``unroll_scan=True``,
        backpropagation always includes the full sequence, so
        ``n_gradient_steps`` must be set to ``-1`` and the input
        sequence length must be known at compile time (i.e.,
        cannot be given as ``None``). Defaults to ``False``.

    gradient_clipping : flaot or int
        If nonzero, the gradient messages are clipped to the
        given value during the backward pass. Defaults to ``0``.

    n_gradient_steps : int
        Number of timesteps to include in the backpropagated gradient.
        If ``-1``, backpropagate through the entire sequence.
        Defaults to ``-1``.

    {BaseLayer.Parameters}

    Notes
    -----
    Code was adapted from the
    `Lasagne <https://github.com/Lasagne/Lasagne>`_ library.

    Examples
    --------

    Sequence classification

    .. code-block:: python

        from neupy import layers, algorithms

        n_time_steps = 40
        n_categories = 20
        embedded_size = 10

        network = algorithms.RMSProp(
            [
                layers.Input(n_time_steps),
                layers.Embedding(n_categories, embedded_size),
                layers.LSTM(20),
                layers.Sigmoid(1),
            ]
        )
    """
    weights = MultiParameterProperty(
        default=dict(
            weight_in_to_ingate=init.XavierUniform(),
            weight_hid_to_ingate=init.XavierUniform(),
            weight_cell_to_ingate=init.XavierUniform(),

            weight_in_to_forgetgate=init.XavierUniform(),
            weight_hid_to_forgetgate=init.XavierUniform(),
            weight_cell_to_forgetgate=init.XavierUniform(),

            weight_in_to_outgate=init.XavierUniform(),
            weight_hid_to_outgate=init.XavierUniform(),
            weight_cell_to_outgate=init.XavierUniform(),

            weight_in_to_cell=init.XavierUniform(),
            weight_hid_to_cell=init.XavierUniform(),
        ))
    biases = MultiParameterProperty(
        default=dict(
            bias_ingate=init.Constant(0),
            bias_forgetgate=init.Constant(0),
            bias_cell=init.Constant(0),
            bias_outgate=init.Constant(0),
        ))
    activation_functions = MultiCallableProperty(
        default=dict(
            ingate=T.nnet.sigmoid,
            forgetgate=T.nnet.sigmoid,
            outgate=T.nnet.sigmoid,
            cell=T.tanh,
        ))

    learn_init = Property(default=False, expected_type=bool)
    cell_init = ParameterProperty(default=init.Constant(0))
    hid_init = ParameterProperty(default=init.Constant(0))

    unroll_scan = Property(default=False, expected_type=bool)
    backwards = Property(default=False, expected_type=bool)
    precompute_input = Property(default=True, expected_type=bool)
    peepholes = Property(default=False, expected_type=bool)

    n_gradient_steps = IntProperty(default=-1)
    gradient_clipping = NumberProperty(default=0, minval=0)

    def initialize(self):
        super(LSTM, self).initialize()

        n_inputs = np.prod(self.input_shape[1:])
        weights = self.weights
        biases = self.biases

        # Input gate parameters
        self.weight_in_to_ingate = self.add_parameter(
            value=weights.weight_in_to_ingate,
            name='weight_in_to_ingate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_ingate = self.add_parameter(
            value=weights.weight_hid_to_ingate,
            name='weight_hid_to_ingate',
            shape=(self.size, self.size))
        self.bias_ingate = self.add_parameter(
            value=biases.bias_ingate, name='bias_ingate',
            shape=(self.size,))

        # Forget gate parameters
        self.weight_in_to_forgetgate = self.add_parameter(
            value=weights.weight_in_to_forgetgate,
            name='weight_in_to_forgetgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_forgetgate = self.add_parameter(
            value=weights.weight_hid_to_forgetgate,
            name='weight_hid_to_forgetgate',
            shape=(self.size, self.size))
        self.bias_forgetgate = self.add_parameter(
            value=biases.bias_forgetgate, name='bias_forgetgate',
            shape=(self.size,))

        # Cell parameters
        self.weight_in_to_cell = self.add_parameter(
            value=weights.weight_in_to_cell,
            name='weight_in_to_cell',
            shape=(n_inputs, self.size))
        self.weight_hid_to_cell = self.add_parameter(
            value=weights.weight_hid_to_cell,
            name='weight_hid_to_cell',
            shape=(self.size, self.size))
        self.bias_cell = self.add_parameter(
            value=biases.bias_cell, name='bias_cell',
            shape=(self.size,))

        # If peephole (cell to gate) connections were enabled, initialize
        # peephole connections.  These are elementwise products with the cell
        # state, so they are represented as vectors.
        if self.peepholes:
            self.weight_cell_to_ingate = self.add_parameter(
                value=weights.weight_cell_to_ingate,
                name='weight_cell_to_ingate',
                shape=(self.size,))
            self.weight_cell_to_forgetgate = self.add_parameter(
                value=weights.weight_cell_to_forgetgate,
                name='weight_cell_to_forgetgate',
                shape=(self.size,))
            self.weight_cell_to_outgate = self.add_parameter(
                value=weights.weight_cell_to_outgate,
                name='weight_cell_to_outgate',
                shape=(self.size,))

        # Output gate parameters
        self.weight_in_to_outgate = self.add_parameter(
            value=weights.weight_in_to_outgate,
            name='weight_in_to_outgate',
            shape=(n_inputs, self.size))
        self.weight_hid_to_outgate = self.add_parameter(
            value=weights.weight_hid_to_outgate,
            name='weight_hid_to_outgate',
            shape=(self.size, self.size))
        self.bias_outgate = self.add_parameter(
            value=biases.bias_outgate, name='bias_outgate',
            shape=(self.size,))

        # Initialization parameters
        self.add_parameter(value=self.cell_init, shape=(1, self.size),
                           name="cell_init", trainable=self.learn_init)
        self.add_parameter(value=self.hid_init, shape=(1, self.size),
                           name="hid_init", trainable=self.learn_init)

    def output(self, input_value):
        # Treat all dimensions after the second as flattened
        # feature dimensions
        if input_value.ndim > 3:
            input_value = T.flatten(input_value, 3)

        # Because scan iterates over the first dimension we
        # dimshuffle to (n_time_steps, n_batch, n_features)
        input_value = input_value.dimshuffle(1, 0, 2)
        seq_len, n_batch, _ = input_value.shape

        # Stack input weight matrices into a (num_inputs, 4 * num_units)
        # matrix, which speeds up computation
        weight_in_stacked = T.concatenate([
            self.weight_in_to_ingate,
            self.weight_in_to_forgetgate,
            self.weight_in_to_cell,
            self.weight_in_to_outgate], axis=1)

        # Same for hidden weight matrices
        weight_hid_stacked = T.concatenate([
            self.weight_hid_to_ingate,
            self.weight_hid_to_forgetgate,
            self.weight_hid_to_cell,
            self.weight_hid_to_outgate], axis=1)

        # Stack biases into a (4 * num_units) vector
        bias_stacked = T.concatenate([
            self.bias_ingate,
            self.bias_forgetgate,
            self.bias_cell,
            self.bias_outgate], axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # weight_in_stacked is (n_features, 4 * num_units).
            # Input: (n_time_steps, n_batch, 4 * num_units).
            input_value = T.dot(input_value, weight_in_stacked) + bias_stacked

        # When theano.scan calls step, input_n will be
        # (n_batch, 4 * num_units). We define a slicing function
        # that extract the input to each LSTM gate
        def slice_w(x, n):
            return x[:, n * self.size:(n + 1) * self.size]

        def one_lstm_step(input_n, cell_previous, hid_previous, *args):
            if not self.precompute_input:
                input_n = T.dot(input_n, weight_in_stacked) + bias_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, weight_hid_stacked)

            # Clip gradients
            if self.gradient_clipping:
                gates = theano.gradient.grad_clip(
                    gates, -self.gradient_clipping, self.gradient_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.weight_cell_to_ingate
                forgetgate += cell_previous * self.weight_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.activation_functions.ingate(ingate)
            forgetgate = self.activation_functions.forgetgate(forgetgate)
            cell_input = self.activation_functions.cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.weight_cell_to_outgate

            outgate = self.activation_functions.outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * T.tanh(cell)
            return [cell, hid]

        ones = T.ones((n_batch, 1))
        cell_init = T.dot(ones, self.cell_init)
        hid_init = T.dot(ones, self.hid_init)

        non_sequences = [weight_hid_stacked]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_sequences += [weight_in_stacked, bias_stacked]

        # The "peephole" weight matrices are only used
        # when self.peepholes=True
        if self.peepholes:
            non_sequences += [self.weight_cell_to_ingate,
                              self.weight_cell_to_forgetgate,
                              self.weight_cell_to_outgate]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            n_time_steps = self.input_shape[0]

            # Explicitly unroll the recurrence instead of using scan
            _, hid_out = unroll_scan(
                fn=one_lstm_step,
                sequences=[input_value],
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                non_sequences=non_sequences,
                n_steps=n_time_steps)

        else:
            (_, hid_out), _ = theano.scan(
                fn=one_lstm_step,
                sequences=input_value,
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                truncate_gradient=self.n_gradient_steps,
                non_sequences=non_sequences,
                strict=True)

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            return hid_out[-1]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = hid_out.dimshuffle(1, 0, 2)

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = hid_out[:, ::-1]

        return hid_out
Esempio n. 22
0
class BaseAssociative(BaseNetwork):
    """
    Base class for associative learning.

    Parameters
    ----------
    n_inputs : int
        Number of input units.

    n_outputs : int
        Number of output units.

    weight : array-like, Initializer
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    train(input_train, epochs=100)
        Train neural network.

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=1, required=True)
    n_outputs = IntProperty(minval=1, required=True)
    weight = ParameterProperty(default=init.Normal())

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)
        self.init_layers()

    def init_layers(self):
        valid_weight_shape = (self.n_inputs, self.n_outputs)

        if isinstance(self.weight, init.Initializer):
            self.weight = self.weight.sample(valid_weight_shape)

        if self.weight.shape != valid_weight_shape:
            raise ValueError("Weight matrix has invalid shape. Got {}, "
                             "expected {}".format(self.weight.shape,
                                                  valid_weight_shape))

        self.weight = self.weight.astype(float)

    def train(self, input_train, epochs=100):
        input_train = format_data(input_train, is_feature1d=True)
        return super(BaseAssociative, self).train(input_train=input_train,
                                                  target_train=None,
                                                  input_test=None,
                                                  target_test=None,
                                                  epochs=epochs,
                                                  epsilon=None,
                                                  summary='table')
Esempio n. 23
0
class WolfeLineSearchForStep(StepSelectionBuiltIn, Configurable):
    """
    Class that has all functions required in order to apply line search over
    step parameter that used during the network training.

    Parameters
    ----------
    wolfe_maxiter : int
        Controls maximun number of iteration during the line search that
        identifies optimal step size during the weight update stage.
        Defaults to ``20``.

    wolfe_c1 : float
        Parameter for Armijo condition rule. It's used during the line search
        that identifies optimal step size during the weight update stage.
        Defaults ``1e-4``.

    wolfe_c2 : float
        Parameter for curvature condition rule. It's used during the line
        search that identifies optimal step size during the weight update
        stage. Defaults ``0.9``.
    """
    wolfe_maxiter = IntProperty(default=20, minval=0)
    wolfe_c1 = NumberProperty(default=1e-4, minval=0)
    wolfe_c2 = NumberProperty(default=0.9, minval=0)

    def find_optimal_step(self, parameter_vector, parameter_update):
        network_inputs = self.variables.network_inputs
        network_output = self.variables.network_output
        layers_and_parameters = list(iter_parameters(self.layers))

        def prediction(step):
            step = asfloat(step)
            updated_params = parameter_vector + step * parameter_update

            # This trick allow us to replace shared variables
            # with tensorflow variables and get output from the network
            start_pos = 0
            for layer, attrname, param in layers_and_parameters:
                end_pos = start_pos + get_variable_size(param)
                updated_param_value = tf.reshape(
                    updated_params[start_pos:end_pos], param.shape)
                setattr(layer, attrname, updated_param_value)
                start_pos = end_pos

            output = self.connection.output(*network_inputs)

            # Restore previous parameters
            for layer, attrname, param in layers_and_parameters:
                setattr(layer, attrname, param)

            return output

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            gradient, = tf.gradients(error_func, step)
            return gradient

        return line_search(phi, derphi, self.wolfe_maxiter, self.wolfe_c1,
                           self.wolfe_c2)
Esempio n. 24
0
class BaseStepAssociative(BaseAssociative):
    """
    Base class for associative algorithms which have 2 layers and first
    one is has step function as activation.

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    {BaseAssociative.n_outputs}

    n_unconditioned : int
        Number of unconditioned units in neraul networks. All these
        units wouldn't update during the training procedure.
        Unconditioned should be the first feature in the dataset.

    weight : array-like
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to ``None`` which means that all unconditional
        weights will be equal to ``1``. Other weights equal to ``0``.

    bias : array-like, Initializer
        Neural network bias units.
        Defaults to :class:`Constant(-0.5) <neupy.init.Constant>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    {BaseAssociative.train}

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=2, required=True)
    n_unconditioned = IntProperty(minval=1, required=True)

    weight = ArrayProperty()
    bias = ParameterProperty(default=init.Constant(-0.5))

    def init_layers(self):
        if self.n_inputs <= self.n_unconditioned:
            raise ValueError(
                "Number of uncondition features should be less than total "
                "number of features. `n_inputs`={} and "
                "`n_unconditioned`={}".format(self.n_inputs,
                                              self.n_unconditioned))

        valid_weight_shape = (self.n_inputs, self.n_outputs)
        valid_bias_shape = (self.n_outputs, )

        if self.weight is None:
            self.weight = np.zeros(valid_weight_shape)
            self.weight[:self.n_unconditioned, :] = 1

        if isinstance(self.bias, init.Initializer):
            self.bias = self.bias.sample(valid_bias_shape)

        super(BaseStepAssociative, self).init_layers()

        if self.bias.shape != valid_bias_shape:
            raise ValueError("Bias vector has invalid shape. Got {}, "
                             "expected {}".format(self.bias.shape,
                                                  valid_bias_shape))

        self.bias = self.bias.astype(float)

    def predict(self, input_data):
        input_data = format_data(input_data, is_feature1d=False)
        raw_output = input_data.dot(self.weight) + self.bias
        return np.where(raw_output > 0, 1, 0)

    def train(self, input_train, *args, **kwargs):
        input_train = format_data(input_train, is_feature1d=False)
        return super(BaseStepAssociative, self).train(input_train, *args,
                                                      **kwargs)

    def train_epoch(self, input_train, target_train):
        weight = self.weight
        n_unconditioned = self.n_unconditioned
        predict = self.predict
        weight_delta = self.weight_delta

        for input_row in input_train:
            input_row = np.reshape(input_row, (1, input_row.size))
            layer_output = predict(input_row)
            weight[n_unconditioned:, :] += weight_delta(
                input_row, layer_output)
Esempio n. 25
0
class CMAC(BaseNetwork):
    """
    Cerebellar Model Articulation Controller (CMAC) Network based on memory.

    Notes
    -----
    - Network always use Mean Absolute Error (MAE).
    - Network works for multi dimensional target values.

    Parameters
    ----------
    quantization : int
        Network transforms every input to discrete value.
        Quantization value controls number of total number of
        categories after quantization, defaults to ``10``.

    associative_unit_size : int
        Number of associative blocks in memory, defaults to ``2``.

    {BaseNetwork.Parameters}

    Attributes
    ----------
    weight : dict
        Network's weight that contains memorized patterns.

    Methods
    -------
    {BaseSkeleton.predict}

    train(X_train, y_train, X_test=None, y_test=None, epochs=100)
        Trains the network to the data X. Network trains until maximum
        number of ``epochs`` was reached.

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy.algorithms import CMAC
    >>>
    >>> train_space = np.linspace(0, 2 * np.pi, 100)
    >>> test_space = np.linspace(np.pi, 2 * np.pi, 50)
    >>>
    >>> X_train = np.reshape(train_space, (100, 1))
    >>> X_test = np.reshape(test_space, (50, 1))
    >>>
    >>> y_train = np.sin(X_train)
    >>> y_test = np.sin(X_test)
    >>>
    >>> cmac = CMAC(
    ...     quantization=100,
    ...     associative_unit_size=32,
    ...     step=0.2,
    ... )
    ...
    >>> cmac.train(X_train, y_train, epochs=100)
    >>>
    >>> predicted_test = cmac.predict(X_test)
    >>> cmac.score(y_test, predicted_test)
    0.0023639417543036569
    """
    quantization = IntProperty(default=10, minval=1)
    associative_unit_size = IntProperty(default=2, minval=2)

    def __init__(self, **options):
        self.weight = {}
        super(CMAC, self).__init__(**options)

    def predict(self, X):
        X = format_data(X)

        get_memory_coords = self.get_memory_coords
        get_result_by_coords = self.get_result_by_coords
        predicted = []

        for input_sample in self.quantize(X):
            coords = get_memory_coords(input_sample)
            predicted.append(get_result_by_coords(coords))

        return np.array(predicted)

    def get_result_by_coords(self, coords):
        return sum(self.weight.setdefault(coord, 0)
                   for coord in coords) / self.associative_unit_size

    def get_memory_coords(self, quantized_value):
        assoc_unit_size = self.associative_unit_size

        for i in range(assoc_unit_size):
            point = ((quantized_value + i) / assoc_unit_size).astype(int)
            yield tuple(np.concatenate([point, [i]]))

    def quantize(self, X):
        return (X * self.quantization).astype(int)

    def one_training_update(self, X_train, y_train):
        get_memory_coords = self.get_memory_coords
        get_result_by_coords = self.get_result_by_coords
        weight = self.weight
        step = self.step

        n_samples = X_train.shape[0]
        quantized_input = self.quantize(X_train)
        errors = 0

        for input_sample, target_sample in zip(quantized_input, y_train):
            coords = list(get_memory_coords(input_sample))
            predicted = get_result_by_coords(coords)

            error = target_sample - predicted
            for coord in coords:
                weight[coord] += step * error

            errors += sum(abs(error))

        return errors / n_samples

    def score(self, X, y):
        predicted = self.predict(X)
        return np.mean(np.abs(predicted - y))

    def train(self, X_train, y_train, X_test=None, y_test=None, epochs=100):
        is_test_data_partialy_missed = ((X_test is None and y_test is not None)
                                        or (X_test is not None
                                            and y_test is None))

        if is_test_data_partialy_missed:
            raise ValueError("Input and target test samples are missed. "
                             "They must be defined together or none of them.")

        X_train = format_data(X_train)
        y_train = format_data(y_train)

        if X_test is not None:
            X_test = format_data(X_test)
            y_test = format_data(y_test)

        return super(CMAC, self).train(X_train,
                                       y_train,
                                       X_test,
                                       y_test,
                                       epochs=epochs)
Esempio n. 26
0
class CMAC(SupervisedLearning, BaseNetwork):
    """ CMAC Network based on memory.

    Notes
    -----
    * Network always use Mean Absolute Error (MAE).
    * Works for multi dimensional target values.

    Parameters
    ----------
    quantization : int
        Network transform every input to discrete values. Quantization
        value contol number of total possible values after
        quantization, defaults to ``10``.
    associative_unit_size : int
        Number of associative blocks in memory, defaults to ``2``.
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.shuffle_data}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}

    Attributes
    ----------
    weights : dict
        Neural network weights that contain memorized patterns.

    Methods
    -------
    {BaseSkeleton.predict}
    {SupervisedLearning.train}
    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy.algorithms import CMAC
    >>>
    >>> train_space = np.linspace(0, 2 * np.pi, 100)
    >>> test_space = np.linspace(np.pi, 2 * np.pi, 50)
    >>>
    >>> input_train = np.reshape(train_space, (100, 1))
    >>> input_test = np.reshape(test_space, (50, 1))
    >>>
    >>> target_train = np.sin(input_train)
    >>> target_test = np.sin(input_test)
    >>>
    >>> cmac = CMAC(
    ...     quantization=100,
    ...     associative_unit_size=32,
    ...     step=0.2,
    ... )
    ...
    >>> cmac.train(input_train, target_train, epochs=100)
    >>> predicted_test = cmac.predict(input_test)
    >>> cmac.error(target_test, predicted_test)
    0.0023639417543036569
    """
    quantization = IntProperty(default=10, minval=1)
    associative_unit_size = IntProperty(default=2, minval=2)

    def __init__(self, **options):
        self.weights = {}
        super(CMAC, self).__init__(**options)

    def predict(self, input_data):
        input_data = format_data(input_data)

        get_memory_coords = self.get_memory_coords
        get_result_by_coords = self.get_result_by_coords
        predicted = []

        for input_sample in self.quantize(input_data):
            coords = get_memory_coords(input_sample)
            predicted.append(get_result_by_coords(coords))

        return array(predicted)

    def get_result_by_coords(self, coords):
        return sum(
            self.weights.setdefault(coord, 0) for coord in coords
        ) / self.associative_unit_size

    def get_memory_coords(self, quantized_value):
        assoc_unit_size = self.associative_unit_size

        for i in range(assoc_unit_size):
            point = ((quantized_value + i) / assoc_unit_size).astype(int)
            yield tuple(concatenate([point, [i]]))

    def quantize(self, input_data):
        return (input_data * self.quantization).astype(int)

    def train_epoch(self, input_train, target_train):
        get_memory_coords = self.get_memory_coords
        get_result_by_coords = self.get_result_by_coords
        weights = self.weights
        step = self.step

        quantized_input = self.quantize(input_train)
        errors = 0

        for input_sample, target_sample in zip(quantized_input, target_train):
            coords = list(get_memory_coords(input_sample))
            predicted = get_result_by_coords(coords)

            error = target_sample - predicted
            for coord in coords:
                weights[coord] += step * error

            errors += abs(error)
        return errors / input_train.shape[0]
Esempio n. 27
0
class ParameterBasedLayer(BaseLayer):
    """
    Layer that creates weight and bias parameters.

    Parameters
    ----------
    size : int
        Layer's output size.

    weight : array-like, Theano variable, scalar or Initializer
        Defines layer's weights. Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`XavierNormal() <neupy.init.XavierNormal>`.

    bias : 1D array-like, Theano variable, scalar, Initializer or None
        Defines layer's bias.
        Default initialization methods you can find
        :ref:`here <init-methods>`. Defaults to
        :class:`Constant(0) <neupy.init.Constant>`.
        The ``None`` value excludes bias from the calculations and
        do not add it into parameters list.

    {BaseLayer.Parameters}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}
    """
    size = IntProperty(minval=1)
    weight = ParameterProperty(default=init.XavierNormal())
    bias = ParameterProperty(default=init.Constant(value=0), allow_none=True)

    def __init__(self, size, **options):
        super(ParameterBasedLayer, self).__init__(size=size, **options)

    @property
    def weight_shape(self):
        return as_tuple(self.input_shape, self.output_shape)

    @property
    def bias_shape(self):
        if self.bias is not None:
            return as_tuple(self.output_shape)

    def initialize(self):
        super(ParameterBasedLayer, self).initialize()

        self.add_parameter(value=self.weight, name='weight',
                           shape=self.weight_shape, trainable=True)

        if self.bias is not None:
            self.add_parameter(value=self.bias, name='bias',
                               shape=self.bias_shape, trainable=True)

    def __repr__(self):
        classname = self.__class__.__name__
        return '{name}({size})'.format(name=classname, size=self.size)
Esempio n. 28
0
File: oja.py Progetto: disc5/neupy
class Oja(UnsupervisedLearning, BaseNetwork):
    """ Oja unsupervised algorithm that minimize input data feature
    space.

    Notes
    -----
    * In practice use step as very small value. For example ``1e-7``.
    * Normalize the input data before use Oja algorithm. Input data \
    shouldn't contains large values.
    * Set up smaller values for weights if error for a few first iterations \
    is big compare to the input values scale. For example, if your input \
    data have values between 0 and 1 error value equal to 100 is big.

    Parameters
    ----------
    minimized_data_size : int
        Expected number of features after minimization, defaults to ``1``
    weights : array-like or ``None``
        Predefine default weights which controll your data in two sides.
        If weights are, ``None`` before train algorithms generate random
        weights. Defaults to ``None``.
    {BaseNetwork.step}
    {BaseNetwork.show_epoch}
    {BaseNetwork.epoch_end_signal}
    {BaseNetwork.train_end_signal}
    {Verbose.verbose}

    Methods
    -------
    reconstruct(input_data):
        Reconstruct your minimized data.
    {BaseSkeleton.predict}
    {UnsupervisedLearning.train}
    {BaseSkeleton.fit}
    {BaseNetwork.plot_errors}

    Raises
    ------
    ValueError
        * Try reconstruct without training.
        * Invalid number of input data features for ``train`` and \
        ``reconstruct`` methods.

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([[2, 2], [1, 1], [4, 4], [5, 5]])
    >>>
    >>> ojanet = algorithms.Oja(
    ...     minimized_data_size=1,
    ...     step=0.01,
    ...     verbose=False
    ... )
    >>>
    >>> ojanet.train(data, epsilon=1e-5)
    >>> minimized = ojanet.predict(data)
    >>> minimized
    array([[-2.82843122],
           [-1.41421561],
           [-5.65686243],
           [-7.07107804]])
    >>> ojanet.reconstruct(minimized)
    array([[ 2.00000046,  2.00000046],
           [ 1.00000023,  1.00000023],
           [ 4.00000093,  4.00000093],
           [ 5.00000116,  5.00000116]])
    """
    minimized_data_size = IntProperty(minval=1)
    weights = ArrayProperty()

    def init_properties(self):
        del self.shuffle_data
        super(Oja, self).init_properties()

    def train_epoch(self, input_data, target_train):
        weights = self.weights

        minimized = dot(input_data, weights)
        reconstruct = dot(minimized, weights.T)
        error = input_data - reconstruct

        weights += self.step * dot(error.T, minimized)

        mae = np_sum(np_abs(error)) / input_data.size

        # Clear memory
        del minimized
        del reconstruct
        del error

        return mae

    def train(self, input_data, epsilon=1e-2, epochs=100):
        input_data = format_data(input_data)
        n_input_features = input_data.shape[1]

        if self.weights is None:
            self.weights = randn(n_input_features, self.minimized_data_size)

        if n_input_features != self.weights.shape[0]:
            raise ValueError(
                "Invalid number of features. Expected {}, got {}".format(
                    self.weights.shape[0], n_input_features))

        super(Oja, self).train(input_data, epsilon=epsilon, epochs=epochs)

    def reconstruct(self, input_data):
        if self.weights is None:
            raise ValueError("Train network before use reconstruct method.")

        input_data = format_data(input_data)
        if input_data.shape[1] != self.minimized_data_size:
            raise ValueError("Invalid input data feature space, expected "
                             "{}, got {}.".format(input_data.shape[1],
                                                  self.minimized_data_size))

        return dot(input_data, self.weights.T)

    def predict(self, input_data):
        if self.weights is None:
            raise ValueError("Train network before use prediction method.")

        input_data = format_data(input_data)
        return dot(input_data, self.weights)
Esempio n. 29
0
class ParameterBasedLayer(BaseLayer):
    """ Layer that creates weight and bias parameters.

    Parameters
    ----------
    size : int
        Layer input size.
    weight : 2D array-like or None
        Define your layer weights. ``None`` means that your weights will be
        generate randomly dependence on property ``init_method``.
        ``None`` by default.
    bias : 1D array-like or None
        Define your layer bias. ``None`` means that your weights will be
        generate randomly dependence on property ``init_method``.
    init_method : {{'bounded', 'normal', 'ortho', 'xavier_normal',\
    'xavier_uniform', 'he_normal', 'he_uniform'}}
        Weight initialization method. Defaults to ``xavier_normal``.

        * ``normal`` will generate random weights from normal distribution \
        with standard deviation equal to ``0.01``.

        * ``bounded`` generate random weights from Uniform distribution.

        * ``ortho`` generate random orthogonal matrix.

        * ``xavier_normal`` generate random matrix from normal distrubtion \
        where variance equal to :math:`\\frac{{2}}{{fan_{{in}} + \
        fan_{{out}}}}`. Where :math:`fan_{{in}}` is a number of \
        layer input units and :math:`fan_{{out}}` - number of layer \
        output units.

        * ``xavier_uniform`` generate random matrix from uniform \
        distribution \ where :math:`w_{{ij}} \in \
        [-\\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}, \
        \\sqrt{{\\frac{{6}}{{fan_{{in}} + fan_{{out}}}}}}`].

        * ``he_normal`` generate random matrix from normal distrubtion \
        where variance equal to :math:`\\frac{{2}}{{fan_{{in}}}}`. \
        Where :math:`fan_{{in}}` is a number of layer input units.

        * ``he_uniform`` generate random matrix from uniformal \
        distribution where :math:`w_{{ij}} \in [\
        -\\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}, \
        \\sqrt{{\\frac{{6}}{{fan_{{in}}}}}}]`

    bounds : tuple of two float
        Available only for ``init_method`` equal to ``bounded``.  Value
        identify minimum and maximum possible value in random weights.
        Defaults to ``(0, 1)``.
    """
    size = IntProperty(minval=1)
    weight = SharedArrayProperty(default=None)
    bias = SharedArrayProperty(default=None)
    bounds = TypedListProperty(default=(0, 1), element_type=(int, float))
    init_method = ChoiceProperty(default=XAVIER_NORMAL,
                                 choices=VALID_INIT_METHODS)

    def __init__(self, size, **options):
        if size is not None:
            options['size'] = size
        super(ParameterBasedLayer, self).__init__(**options)

    def weight_shape(self):
        output_size = self.relate_to_layer.size
        return (self.size, output_size)

    def bias_shape(self):
        output_size = self.relate_to_layer.size
        return (output_size,)

    def initialize(self):
        super(ParameterBasedLayer, self).initialize()

        self.weight = create_shared_parameter(
            value=self.weight,
            name='weight_{}'.format(self.layer_id),
            shape=self.weight_shape(),
            bounds=self.bounds,
            init_method=self.init_method,
        )
        self.bias = create_shared_parameter(
            value=self.bias,
            name='bias_{}'.format(self.layer_id),
            shape=self.bias_shape(),
            bounds=self.bounds,
            init_method=self.init_method,
        )
        self.parameters = [self.weight, self.bias]

    def __repr__(self):
        classname = self.__class__.__name__
        return '{name}({size})'.format(name=classname, size=self.size)
Esempio n. 30
0
class Concatenate(BaseLayer):
    """
    Concatenate multiple inputs into one. Inputs will be concatenated over
    the specified axis (controlled with parameter ``axis``).

    Parameters
    ----------
    axis : int
        The axis along which the inputs will be concatenated.
        Default is ``-1``.

    {BaseLayer.name}

    Methods
    -------
    {BaseLayer.Methods}

    Attributes
    ----------
    {BaseLayer.Attributes}

    Examples
    --------
    >>> from neupy.layers import *
    >>> network = (Input(10) | Input(20)) >> Concatenate()
    [(?, 10), (?, 20)] -> [... 3 layers ...] -> (?, 30)
    """
    axis = IntProperty()

    def __init__(self, axis=-1, name=None):
        super(Concatenate, self).__init__(name=name)
        self.axis = axis

    def get_output_shape(self, *input_shapes):
        input_shapes = [tf.TensorShape(shape) for shape in input_shapes]
        # The axis value has 0-based indeces where 0s index points
        # to the batch dimension of the input. Shapes in the neupy
        # do not store information about the batch and we need to
        # put None value on the 0s position.
        valid_shape = input_shapes[0]

        if any(shape.ndims is None for shape in input_shapes):
            return tf.TensorShape(None)

        # Avoid using negative indeces
        possible_axes = list(range(len(valid_shape)))
        concat_axis = possible_axes[self.axis]

        for input_shape in input_shapes[1:]:
            if len(valid_shape) != len(input_shape):
                raise LayerConnectionError(
                    "Cannot concatenate layers, because inputs have "
                    "different number of dimensions. Shapes: {} and {}"
                    "".format(valid_shape, input_shape))

            for axis, axis_size in enumerate(input_shape):
                if axis != concat_axis and valid_shape[axis] != axis_size:
                    raise LayerConnectionError(
                        "Cannot concatenate layers, because some of them "
                        "don't match over dimension #{} (0-based indeces). "
                        "Shapes: {} and {}"
                        "".format(axis, valid_shape, input_shape))

        output_shape = input_shapes.pop(0)
        output_shape = [dim.value for dim in output_shape.dims]

        for input_shape in input_shapes:
            output_shape[self.axis] += input_shape[self.axis]

        return tf.TensorShape(output_shape)

    def output(self, *inputs, **kwargs):
        return tf.concat(inputs, axis=self.axis)