예제 #1
0
    def test_stacked_lstm(self):
        x_train, x_test, y_train, y_test = self.data
        network = algorithms.RMSProp(
            [
                layers.Input(self.n_time_steps),
                layers.Embedding(self.n_categories, 10),
                layers.LSTM(
                    n_units=10,
                    only_return_final=False,
                    input_weights=init.Normal(0.1),
                    hidden_weights=init.Normal(0.1),
                ),
                layers.LSTM(
                    n_units=2,
                    input_weights=init.Normal(0.1),
                    hidden_weights=init.Normal(0.1),
                ),
                layers.Sigmoid(1),
            ],
            step=0.05,
            verbose=False,
            batch_size=1,
            loss='binary_crossentropy',
        )
        network.train(x_train, y_train, x_test, y_test, epochs=20)

        y_predicted = network.predict(x_test).round()
        accuracy = (y_predicted.T == y_test).mean()

        self.assertGreaterEqual(accuracy, 0.8)
예제 #2
0
파일: train_vin.py 프로젝트: degerli/neupy
def create_VIN(input_image_shape=(8, 8, 2), n_hidden_filters=150,
               n_state_filters=10, k=10):

    SamePadConvolution = partial(layers.Convolution, padding='SAME', bias=None)

    R = layers.join(
        layers.Input(input_image_shape, name='grid-input'),
        layers.Convolution((3, 3, n_hidden_filters),
                           padding='SAME',
                           weight=init.Normal(),
                           bias=init.Normal()),
        SamePadConvolution((1, 1, 1), weight=init.Normal()),
    )

    # Create shared weights
    q_weight = random_weight((3, 3, 1, n_state_filters))
    fb_weight = random_weight((3, 3, 1, n_state_filters))

    Q = R > SamePadConvolution((3, 3, n_state_filters), weight=q_weight)

    for i in range(k):
        V = Q > ChannelGlobalMaxPooling()
        Q = layers.join(
            # Convolve R and V separately and then add outputs together with
            # the Elementwise layer. This part of the code looks different
            # from the one that was used in the original VIN repo, but
            # it does the same operation.
            #
            # conv(x, w) == (conv(x1, w1) + conv(x2, w2))
            # where, x = concat(x1, x2)
            #        w = concat(w1, w2)
            #
            # See code sample from Github Gist: https://bit.ly/2zm3ntN
            [[
                R,
                SamePadConvolution((3, 3, n_state_filters), weight=q_weight)
            ], [
                V,
                SamePadConvolution((3, 3, n_state_filters), weight=fb_weight)
            ]],
            layers.Elementwise(merge_function=tf.add),
        )

    input_state_1 = layers.Input(UNKNOWN, name='state-1-input')
    input_state_2 = layers.Input(UNKNOWN, name='state-2-input')

    # Select the conv-net channels at the state position (S1, S2)
    VIN = [Q, input_state_1, input_state_2] > SelectValueAtStatePosition()

    # Set up softmax layer that predicts actions base on (S1, S2)
    # position. Each action encodes specific direction:
    # N, S, E, W, NE, NW, SE, SW (in the same order)
    VIN = VIN > layers.Softmax(8, bias=None, weight=init.Normal())

    return VIN
예제 #3
0
    def test_variable_creation(self):
        weight = np.ones((3, 3))
        var1 = tf_utils.create_variable(weight, name='var1', shape=(3, 3))
        self.assertShapesEqual(var1.shape, (3, 3))

        var2 = tf_utils.create_variable(5, name='var2', shape=(4, 3))
        self.assertShapesEqual(var2.shape, (4, 3))
        np.testing.assert_array_almost_equal(self.eval(var2), 5 * np.ones(
            (4, 3)))

        initializer = init.Normal()
        var3 = tf_utils.create_variable(initializer, name='var3', shape=(4, 7))
        self.assertShapesEqual(var3.shape, (4, 7))

        weight = tf.Variable(np.ones((3, 3)), dtype=tf.float32)
        var4 = tf_utils.create_variable(weight, name='var4', shape=(3, 3))
        self.assertShapesEqual(var4.shape, (3, 3))
        self.assertIs(var4, weight)

        weight = np.ones((3, 4))
        with self.assertRaisesRegexp(ValueError, "Cannot create variable"):
            tf_utils.create_variable(weight, name='var5', shape=(3, 3))

        weight = tf.Variable(np.ones((4, 3)), dtype=tf.float32)
        with self.assertRaisesRegexp(ValueError, "Cannot create variable"):
            tf_utils.create_variable(weight, name='var6', shape=(3, 3))
예제 #4
0
    def test_reproducibility(self):
        normal = init.Normal(mean=0, std=0.01, seed=0)

        weight1 = normal.sample((10, 20), return_array=True)
        weight2 = normal.sample((10, 20), return_array=True)

        np.testing.assert_array_almost_equal(weight1, weight2)
예제 #5
0
    def test_normal_initializer(self):
        norm = init.Normal(mean=0, std=0.01)
        weight = self.eval(norm.sample((30, 30)))
        self.assertNormalyDistributed(weight)

        weight = norm.sample((30, 30), return_array=True)
        self.assertNormalyDistributed(weight)
예제 #6
0
def create_VIN(input_image_shape=(2, 8, 8), n_hidden_filters=150,
               n_state_filters=10, k=10):

    HalfPaddingConv = partial(layers.Convolution, padding='half', bias=None)

    R = layers.join(
        layers.Input(input_image_shape, name='grid-input'),
        layers.Convolution((n_hidden_filters, 3, 3),
                           padding='half',
                           weight=init.Normal(),
                           bias=init.Normal()),
        HalfPaddingConv((1, 1, 1), weight=init.Normal()),
    )

    # Create shared weights
    q_weight = random_weight((n_state_filters, 1, 3, 3))
    fb_weight = random_weight((n_state_filters, 1, 3, 3))

    Q = R > HalfPaddingConv((n_state_filters, 3, 3), weight=q_weight)

    for i in range(k):
        V = Q > GlobalMaxPooling()
        Q = layers.join(
            # Convolve R and V separately and then add
            # outputs together with the Elementwise layer
            [[
                R,
                HalfPaddingConv((n_state_filters, 3, 3), weight=q_weight)
            ], [
                V,
                HalfPaddingConv((n_state_filters, 3, 3), weight=fb_weight)
            ]],
            layers.Elementwise(merge_function=T.add),
        )

    input_state_1 = layers.Input(10, name='state-1-input')
    input_state_2 = layers.Input(10, name='state-2-input')

    # Select the conv-net channels at the state position (S1, S2)
    VIN = [Q, input_state_1, input_state_2] > SelectValueAtStatePosition()

    # Set up softmax layer that predicts actions base on (S1, S2)
    # position. Each action encodes specific direction:
    # N, S, E, W, NE, NW, SE, SW (in the same order)
    VIN = VIN > layers.Softmax(8, bias=None, weight=init.Normal())

    return VIN
예제 #7
0
    def test_layer_copy(self):
        relu = layers.Relu(10, weight=init.Normal(), bias=None)
        copied_relu = copy.copy(relu)

        self.assertEqual(relu.name, 'relu-1')
        self.assertEqual(copied_relu.name, 'relu-2')

        self.assertIsInstance(relu.weight, init.Normal)
        self.assertIsNone(relu.bias)
예제 #8
0
    def test_normal_reprodusible_with_outside_seed(self):
        norm = init.Normal(mean=0, std=0.01)

        np.random.seed(0)
        weight1 = norm.sample((10, 4), return_array=True)

        np.random.seed(0)
        weight2 = norm.sample((10, 4), return_array=True)

        np.testing.assert_array_almost_equal(weight1, weight2)
예제 #9
0
def create_VIN(input_image_shape=(8, 8, 2),
               n_hidden_filters=150,
               n_state_filters=10,
               k=10):

    # Default initialization method
    normal = init.Normal()

    # Create shared weights
    q_weight = create_random_weight((3, 3, 1, n_state_filters))
    fb_weight = create_random_weight((3, 3, 1, n_state_filters))

    # Define basic layers
    SamePadConv = partial(Convolution, padding='SAME', bias=None)

    R = join(
        Input(input_image_shape, name='grid-input'),
        SamePadConv((3, 3, n_hidden_filters), weight=normal, bias=normal),
        SamePadConv((1, 1, 1), weight=normal),
    )
    Q = R >> SamePadConv((3, 3, n_state_filters), weight=q_weight)

    for i in range(k):
        V = Q >> ChannelGlobalMaxPooling()
        Q = join(
            # Convolve R and V separately and then add outputs together with
            # the Elementwise layer. This part of the code looks different
            # from the one that was used in the original VIN repo, but
            # it does the same operation.
            #
            # conv(x, w) == (conv(x1, w1) + conv(x2, w2))
            # where, x = concat(x1, x2)
            #        w = concat(w1, w2)
            #
            # See code sample from Github Gist: https://bit.ly/2zm3ntN
            parallel(
                R >> SamePadConv((3, 3, n_state_filters), weight=q_weight),
                V >> SamePadConv((3, 3, n_state_filters), weight=fb_weight),
            ),
            Elementwise('add'),
        )

    input_state_1 = Input(UNKNOWN, name='state-1-input')
    input_state_2 = Input(UNKNOWN, name='state-2-input')

    # Select the conv-net channels at the state position (S1, S2)
    VIN = (Q | input_state_1 | input_state_2) >> SelectValueAtStatePosition()

    # Set up softmax layer that predicts actions base on (S1, S2)
    # position. Each action encodes specific direction:
    # N, S, E, W, NE, NW, SE, SW (in the same order)
    VIN = VIN >> Softmax(8, bias=None, weight=normal)

    return VIN
예제 #10
0
    def test_lvq_weight_initialization_state(self):
        lvqnet = algorithms.LVQ(n_inputs=2, n_classes=2)
        self.assertFalse(lvqnet.initialized)

        lvqnet.train(np.random.random((10, 2)), np.random.random(10).round(),
                     epochs=1)
        self.assertTrue(lvqnet.initialized)

        lvqnet = algorithms.LVQ(n_inputs=2, n_classes=3,
                                weight=np.random.random((2, 3)))
        self.assertTrue(lvqnet.initialized)

        lvqnet = algorithms.LVQ(n_inputs=2, n_classes=3,
                                weight=init.Normal())
        self.assertTrue(lvqnet.initialized)
        self.assertEqual(lvqnet.weight.shape, (2, 3))
예제 #11
0
    def test_sofm_angle_distance(self):
        sn = algorithms.SOFM(n_inputs=2,
                             n_outputs=3,
                             transform='cos',
                             learning_radius=1,
                             features_grid=(3, 1),
                             weight=init.Normal(mean=0, std=1),
                             verbose=False)

        sn.train(input_data, epochs=6)

        answers = np.array([
            [1., 0., 0.],
            [1., 0., 0.],
            [0., 1., 0.],
            [0., 1., 0.],
            [0., 0., 1.],
            [0., 0., 1.],
        ])

        np.testing.assert_array_almost_equal(sn.predict(input_data), answers)
예제 #12
0
class BaseAssociative(BaseNetwork):
    """
    Base class for associative learning.

    Parameters
    ----------
    n_inputs : int
        Number of features (columns) in the input data.

    n_outputs : int
        Number of outputs in the  network.

    weight : array-like, Initializer
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    train(input_train, summary='table', epochs=100)
        Train neural network.

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=1, required=True)
    n_outputs = IntProperty(minval=1, required=True)
    weight = ParameterProperty(default=init.Normal())

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)
        self.init_layers()

    def init_layers(self):
        valid_weight_shape = (self.n_inputs, self.n_outputs)

        if isinstance(self.weight, init.Initializer):
            self.weight = self.weight.sample(
                valid_weight_shape, return_array=True)

        if self.weight.shape != valid_weight_shape:
            raise ValueError(
                "Weight matrix has invalid shape. Got {}, expected {}"
                "".format(self.weight.shape, valid_weight_shape))

        self.weight = self.weight.astype(float)

    def format_input_data(self, input_data):
        is_feature1d = self.n_inputs == 1
        input_data = format_data(input_data, is_feature1d)

        if input_data.ndim != 2:
            raise ValueError("Cannot make prediction, because input "
                             "data has more than 2 dimensions")

        n_samples, n_features = input_data.shape

        if n_features != self.n_inputs:
            raise ValueError("Input data expected to have {} features, "
                             "but got {}".format(self.n_inputs, n_features))

        return input_data

    def train(self, input_train, summary='table', epochs=100):
        input_train = self.format_input_data(input_train)

        return super(BaseAssociative, self).train(
            input_train=input_train, target_train=None,
            input_test=None, target_test=None,
            epochs=epochs, epsilon=None,
            summary=summary)
예제 #13
0
 def test_normal_initialize_repr(self):
     hormal_initializer = init.Normal(mean=0, std=0.01)
     self.assertEqual("Normal(mean=0, std=0.01)", str(hormal_initializer))
예제 #14
0
class SOFM(Kohonen):
    """
    Self-Organizing Feature Map (SOFM or SOM).

    Notes
    -----
    - Training data samples should have normalized features.

    Parameters
    ----------
    {BaseAssociative.n_inputs}

    n_outputs : int or None
        Number of outputs. Parameter is optional in case if
        ``feature_grid`` was specified.

        .. code-block:: python

            if n_outputs is None:
                n_outputs = np.prod(feature_grid)

    learning_radius : int
        Parameter defines radius within which we consider all
        neurons as neighbours to the winning neuron. The bigger
        the value the more neurons will be updated after each
        iteration.

        The ``0`` values means that we don't update
        neighbour neurons.

        Defaults to ``0``.

    std : int, float
        Parameters controls learning rate for each neighbour.
        The further neighbour  neuron from the winning neuron
        the smaller that learning rate for it. Learning rate
        scales based on the factors produced by the normal
        distribution with center in the place of a winning
        neuron and standard deviation specified as a parameter.
        The learning rate for the winning neuron is always equal
        to the value specified in the ``step`` parameter and for
        neighbour neurons it's always lower.

        The bigger the value for this parameter the bigger
        learning rate for the neighbour neurons.

        Defaults to ``1``.

    features_grid : list, tuple, None
        Feature grid defines shape of the output neurons.
        The new shape should be compatible with the number
        of outputs. It means that the following condition
        should be true:

        .. code-block:: python

            np.prod(features_grid) == n_outputs

        SOFM implementation supports n-dimensional grids.
        For instance, in order to specify grid as cube instead of
        the regular rectangular shape we can set up options as
        the following:

        .. code-block:: python

            SOFM(
                ...
                features_grid=(5, 5, 5),
                ...
            )

        Defaults to ``(n_outputs, 1)``.

    grid_type : {{``rect``, ``hexagon``}}
        Defines connection type in feature grid. Type defines
        which neurons we will consider as closest to the winning
        neuron during the training.

        - ``rect`` - Connections between neurons will be organized
          in hexagonal grid.

        - ``hexagon`` - Connections between neurons will be organized
          in hexagonal grid. It works only for 1d or 2d grids.

        Defaults to ``rect``.

    distance : {{``euclid``, ``dot_product``, ``cos``}}
        Defines function that will be used to compute
        closest weight to the input sample.

        - ``dot_product``: Just a regular dot product between
          data sample and network's weights

        - ``euclid``: Euclidean distance between data sample
          and network's weights

        - ``cos``: Cosine distance between data sample and
          network's weights

        Defaults to ``euclid``.

    reduce_radius_after : int or None
        Every specified number of epochs ``learning_radius``
        parameter will be reduced by ``1``. Process continues
        until ``learning_radius`` equal to ``0``.

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    reduce_step_after : int or None
        Defines reduction rate at which parameter ``step`` will
        be reduced using the following formula:

        .. code-block:: python

            step = step / (1 + current_epoch / reduce_step_after)

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    reduce_std_after : int or None
        Defines reduction rate at which parameter ``std`` will
        be reduced using the following formula:

        .. code-block:: python

            std = std / (1 + current_epoch / reduce_std_after)

        The ``None`` value disables parameter reduction
        during the training.

        Defaults to ``100``.

    weight : array-like, Initializer or {{``init_pca``, ``sample_from_data``}}
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.

        Also, it's possible to initialized weights base on the
        training data. There are two options:

        - ``sample_from_data`` - Before starting the training will
          randomly take number of training samples equal to number
          of expected outputs.

        - ``init_pca`` - Before training starts SOFM will applies PCA
          on a covariance matrix build from the training samples.
          Weights will be generated based on the two eigenvectors
          associated with the largest eigenvalues.

        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.signals}

    {Verbose.verbose}

    Methods
    -------
    init_weights(train_data)
        Initialized weights based on the input data. It works only
        for the `init_pca` and `sample_from_data` options. For other
        cases it will throw an error.

    {BaseSkeleton.predict}

    {BaseAssociative.train}

    {BaseSkeleton.fit}

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms, utils
    >>>
    >>> utils.reproducible()
    >>>
    >>> data = np.array([
    ...     [0.1961, 0.9806],
    ...     [-0.1961, 0.9806],
    ...     [-0.5812, -0.8137],
    ...     [-0.8137, -0.5812],
    ... ])
    >>>
    >>> sofm = algorithms.SOFM(
    ...     n_inputs=2,
    ...     n_outputs=2,
    ...     step=0.1,
    ...     learning_radius=0
    ... )
    >>> sofm.train(data, epochs=100)
    >>> sofm.predict(data)
    array([[0, 1],
           [0, 1],
           [1, 0],
           [1, 0]])
    """
    n_outputs = IntProperty(minval=1, allow_none=True, default=None)
    weight = SOFMWeightParameter(default=init.Normal(),
                                 choices={
                                     'init_pca': linear_initialization,
                                     'sample_from_data': sample_data,
                                 })
    features_grid = TypedListProperty(allow_none=True, default=None)

    DistanceParameter = namedtuple('DistanceParameter', 'name func')
    distance = ChoiceProperty(default='euclid',
                              choices={
                                  'dot_product':
                                  DistanceParameter(name='dot_product',
                                                    func=np.dot),
                                  'euclid':
                                  DistanceParameter(name='euclid',
                                                    func=neg_euclid_distance),
                                  'cos':
                                  DistanceParameter(name='cosine',
                                                    func=cosine_similarity),
                              })

    GridTypeMethods = namedtuple('GridTypeMethods',
                                 'name find_neighbours find_step_scaler')

    grid_type = ChoiceProperty(
        default='rect',
        choices={
            'rect':
            GridTypeMethods(name='rectangle',
                            find_neighbours=find_neighbours_on_rect_grid,
                            find_step_scaler=find_step_scaler_on_rect_grid),
            'hexagon':
            GridTypeMethods(name='hexagon',
                            find_neighbours=find_neighbours_on_hexagon_grid,
                            find_step_scaler=find_step_scaler_on_hexagon_grid)
        })

    learning_radius = IntProperty(default=0, minval=0)
    std = NumberProperty(minval=0, default=1)

    reduce_radius_after = IntProperty(default=100, minval=1, allow_none=True)
    reduce_std_after = IntProperty(default=100, minval=1, allow_none=True)
    reduce_step_after = IntProperty(default=100, minval=1, allow_none=True)

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)

        if self.n_outputs is None and self.features_grid is None:
            raise ValueError("One of the following parameters has to be "
                             "specified: n_outputs, features_grid")

        elif self.n_outputs is None:
            self.n_outputs = np.prod(self.features_grid)

        n_grid_elements = np.prod(self.features_grid)
        invalid_feature_grid = (self.features_grid is not None
                                and n_grid_elements != self.n_outputs)

        if invalid_feature_grid:
            raise ValueError(
                "Feature grid should contain the same number of elements "
                "as in the output layer: {0}, but found: {1} (shape: {2})"
                "".format(self.n_outputs, n_grid_elements, self.features_grid))

        if self.features_grid is None:
            self.features_grid = (self.n_outputs, 1)

        if len(self.features_grid) > 2 and self.grid_type.name == 'hexagon':
            raise ValueError("SOFM with hexagon grid type should have "
                             "one or two dimensional feature grid, but got "
                             "{}d instead (shape: {!r})".format(
                                 len(self.features_grid), self.features_grid))

        is_pca_init = (isinstance(options.get('weight'), six.string_types)
                       and options.get('weight') == 'init_pca')

        self.initialized = False
        if not callable(self.weight):
            super(Kohonen, self).init_weights()
            self.initialized = True

            if self.distance.name == 'cosine':
                self.weight /= np.linalg.norm(self.weight, axis=0)

        elif is_pca_init and self.grid_type.name != 'rectangle':
            raise WeightInitializationError(
                "Cannot apply PCA weight initialization for non-rectangular "
                "grid. Grid type: {}".format(self.grid_type.name))

    def predict_raw(self, X):
        X = format_data(X, is_feature1d=(self.n_inputs == 1))

        if X.ndim != 2:
            raise ValueError("Only 2D inputs are allowed")

        n_samples = X.shape[0]
        output = np.zeros((n_samples, self.n_outputs))

        for i, input_row in enumerate(X):
            output[i, :] = self.distance.func(input_row.reshape(1, -1),
                                              self.weight)

        return output

    def update_indexes(self, layer_output):
        neuron_winner = layer_output.argmax(axis=1).item(0)
        winner_neuron_coords = np.unravel_index(neuron_winner,
                                                self.features_grid)

        learning_radius = self.learning_radius
        step = self.step
        std = self.std

        if self.reduce_radius_after is not None:
            learning_radius -= self.last_epoch // self.reduce_radius_after
            learning_radius = max(0, learning_radius)

        if self.reduce_step_after is not None:
            step = decay_function(step, self.last_epoch,
                                  self.reduce_step_after)

        if self.reduce_std_after is not None:
            std = decay_function(std, self.last_epoch, self.reduce_std_after)

        methods = self.grid_type
        output_grid = np.reshape(layer_output, self.features_grid)

        output_with_neighbours = methods.find_neighbours(
            grid=output_grid,
            center=winner_neuron_coords,
            radius=learning_radius)

        step_scaler = methods.find_step_scaler(grid=output_grid,
                                               center=winner_neuron_coords,
                                               std=std)

        index_y, = np.nonzero(output_with_neighbours.reshape(self.n_outputs))

        step_scaler = step_scaler.reshape(self.n_outputs)
        return index_y, step * step_scaler[index_y]

    def init_weights(self, X_train):
        if self.initialized:
            raise WeightInitializationError(
                "Weights have been already initialized")

        weight_initializer = self.weight
        self.weight = weight_initializer(X_train, self.features_grid)
        self.initialized = True

        if self.distance.name == 'cosine':
            self.weight /= np.linalg.norm(self.weight, axis=0)

    def train(self, X_train, epochs=100):
        if not self.initialized:
            self.init_weights(X_train)
        super(SOFM, self).train(X_train, epochs=epochs)

    def one_training_update(self, X_train, y_train=None):
        step = self.step
        predict = self.predict
        update_indexes = self.update_indexes

        error = 0
        for input_row in X_train:
            input_row = np.reshape(input_row, (1, input_row.size))
            layer_output = predict(input_row)

            index_y, step = update_indexes(layer_output)
            distance = input_row.T - self.weight[:, index_y]
            updated_weights = (self.weight[:, index_y] + step * distance)

            if self.distance.name == 'cosine':
                updated_weights /= np.linalg.norm(updated_weights, axis=0)

            self.weight[:, index_y] = updated_weights
            error += np.abs(distance).mean()

        return error / len(X_train)
예제 #15
0
파일: train_vin.py 프로젝트: degerli/neupy
def random_weight(shape):
    initializer = init.Normal()
    weight = initializer.sample(shape)
    return tf.Variable(asfloat(weight), dtype=tf.float32)
예제 #16
0
class RBM(BaseAlgorithm, BaseNetwork, MinibatchTrainingMixin, DumpableObject):
    """
    Boolean/Bernoulli Restricted Boltzmann Machine (RBM).
    Algorithm assumes that inputs are either binary
    values or values between 0 and 1.

    Parameters
    ----------
    n_visible : int
        Number of visible units. Number of features (columns)
        in the input data.

    n_hidden : int
        Number of hidden units. The large the number the more
        information network can capture from the data, but it
        also mean that network is more likely to overfit.

    batch_size : int
        Size of the mini-batch. Defaults to ``10``.

    weight : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Normal <neupy.init.Normal>`.

    hidden_bias : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    visible_bias : array-like, Tensorfow variable, Initializer or scalar
        Default initialization methods
        you can find :ref:`here <init-methods>`.
        Defaults to :class:`Constant(value=0) <neupy.init.Constant>`.

    {BaseNetwork.Parameters}

    Methods
    -------
    train(input_train, epochs=100)
        Trains network.

    {BaseSkeleton.fit}

    visible_to_hidden(visible_input)
        Populates data throught the network and returns output
        from the hidden layer.

    hidden_to_visible(hidden_input)
        Propagates output from the hidden layer backward
        to the visible.

    gibbs_sampling(visible_input, n_iter=1)
        Makes Gibbs sampling ``n`` times using visible input.

    Examples
    --------
    >>> import numpy as np
    >>> from neupy import algorithms
    >>>
    >>> data = np.array([
    ...     [1, 0, 1, 0],
    ...     [1, 0, 1, 0],
    ...     [1, 0, 0, 0],  # incomplete sample
    ...     [1, 0, 1, 0],
    ...
    ...     [0, 1, 0, 1],
    ...     [0, 0, 0, 1],  # incomplete sample
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ...     [0, 1, 0, 1],
    ... ])
    >>>
    >>> rbm = algorithms.RBM(n_visible=4, n_hidden=1)
    >>> rbm.train(data, epochs=100)
    >>>
    >>> hidden_states = rbm.visible_to_hidden(data)
    >>> hidden_states.round(2)
    array([[ 0.99],
           [ 0.99],
           [ 0.95],
           [ 0.99],
           [ 0.  ],
           [ 0.01],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ],
           [ 0.  ]])

    References
    ----------
    [1] G. Hinton, A Practical Guide to Training Restricted
        Boltzmann Machines, 2010.
        http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
    """
    n_visible = IntProperty(minval=1)
    n_hidden = IntProperty(minval=1)
    batch_size = IntProperty(minval=1, default=10)

    weight = ParameterProperty(default=init.Normal())
    hidden_bias = ParameterProperty(default=init.Constant(value=0))
    visible_bias = ParameterProperty(default=init.Constant(value=0))

    def __init__(self, n_visible, n_hidden, **options):
        options.update({'n_visible': n_visible, 'n_hidden': n_hidden})
        super(RBM, self).__init__(**options)

    def init_input_output_variables(self):
        with tf.variable_scope('rbm'):
            self.weight = create_shared_parameter(value=self.weight,
                                                  name='weight',
                                                  shape=(self.n_visible,
                                                         self.n_hidden))
            self.hidden_bias = create_shared_parameter(
                value=self.hidden_bias,
                name='hidden-bias',
                shape=(self.n_hidden, ),
            )
            self.visible_bias = create_shared_parameter(
                value=self.visible_bias,
                name='visible-bias',
                shape=(self.n_visible, ),
            )

            self.variables.update(network_input=tf.placeholder(
                tf.float32,
                (None, self.n_visible),
                name="network-input",
            ),
                                  network_hidden_input=tf.placeholder(
                                      tf.float32,
                                      (None, self.n_hidden),
                                      name="network-hidden-input",
                                  ))

    def init_variables(self):
        with tf.variable_scope('rbm'):
            self.variables.update(h_samples=tf.Variable(
                tf.zeros([self.batch_size, self.n_hidden]),
                name="hidden-samples",
                dtype=tf.float32,
            ), )

    def init_methods(self):
        def free_energy(visible_sample):
            with tf.name_scope('free-energy'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias

                visible_bias_term = dot(visible_sample, self.visible_bias)

                # We can get infinity when wx_b is a relatively large number
                # (maybe 100). Taking exponent makes it even larger and
                # for with float32 it can convert it to infinity. But because
                # number is so large we don't care about +1 value before taking
                # logarithms and therefore we can just pick value as it is
                # since our operation won't change anything.
                hidden_terms = tf.where(
                    # exp(30) is such a big number that +1 won't
                    # make any difference in the outcome.
                    tf.greater(wx_b, 30),
                    wx_b,
                    tf.log1p(tf.exp(wx_b)),
                )

                hidden_term = tf.reduce_sum(hidden_terms, axis=1)
                return -(visible_bias_term + hidden_term)

        def visible_to_hidden(visible_sample):
            with tf.name_scope('visible-to-hidden'):
                wx = tf.matmul(visible_sample, self.weight)
                wx_b = wx + self.hidden_bias
                return tf.nn.sigmoid(wx_b)

        def hidden_to_visible(hidden_sample):
            with tf.name_scope('hidden-to-visible'):
                wx = tf.matmul(hidden_sample, self.weight, transpose_b=True)
                wx_b = wx + self.visible_bias
                return tf.nn.sigmoid(wx_b)

        def sample_hidden_from_visible(visible_sample):
            with tf.name_scope('sample-hidden-to-visible'):
                hidden_prob = visible_to_hidden(visible_sample)
                hidden_sample = random_binomial(hidden_prob)
                return hidden_sample

        def sample_visible_from_hidden(hidden_sample):
            with tf.name_scope('sample-visible-to-hidden'):
                visible_prob = hidden_to_visible(hidden_sample)
                visible_sample = random_binomial(visible_prob)
                return visible_sample

        network_input = self.variables.network_input
        network_hidden_input = self.variables.network_hidden_input
        input_shape = tf.shape(network_input)
        n_samples = input_shape[0]

        weight = self.weight
        h_bias = self.hidden_bias
        v_bias = self.visible_bias
        h_samples = self.variables.h_samples
        step = asfloat(self.step)

        with tf.name_scope('positive-values'):
            # We have to use `cond` instead of `where`, because
            # different if-else cases might have different shapes
            # and it triggers exception in tensorflow.
            v_pos = tf.cond(
                tf.equal(n_samples, self.batch_size), lambda: network_input,
                lambda: random_sample(network_input, self.batch_size))
            h_pos = visible_to_hidden(v_pos)

        with tf.name_scope('negative-values'):
            v_neg = sample_visible_from_hidden(h_samples)
            h_neg = visible_to_hidden(v_neg)

        with tf.name_scope('weight-update'):
            weight_update = (
                tf.matmul(v_pos, h_pos, transpose_a=True) -
                tf.matmul(v_neg, h_neg, transpose_a=True)) / asfloat(n_samples)

        with tf.name_scope('hidden-bias-update'):
            h_bias_update = tf.reduce_mean(h_pos - h_neg, axis=0)

        with tf.name_scope('visible-bias-update'):
            v_bias_update = tf.reduce_mean(v_pos - v_neg, axis=0)

        with tf.name_scope('flipped-input-features'):
            # Each row will have random feature marked with number 1
            # Other values will be equal to 0
            possible_feature_corruptions = tf.eye(self.n_visible)
            corrupted_features = random_sample(possible_feature_corruptions,
                                               n_samples)

            rounded_input = tf.round(network_input)
            # If we scale input values from [0, 1] range to [-1, 1]
            # than it will be easier to flip feature values with simple
            # multiplication.
            scaled_rounded_input = 2 * rounded_input - 1
            scaled_flipped_rounded_input = (
                # for corrupted_features we convert 0 to 1 and 1 to -1
                # in this way after multiplication we will flip all
                # signs where -1 in the transformed corrupted_features
                (-2 * corrupted_features + 1) * scaled_rounded_input)
            # Scale it back to the [0, 1] range
            flipped_rounded_input = (scaled_flipped_rounded_input + 1) / 2

        with tf.name_scope('pseudo-likelihood-loss'):
            # Stochastic pseudo-likelihood
            error = tf.reduce_mean(self.n_visible * tf.log_sigmoid(
                free_energy(flipped_rounded_input) -
                free_energy(rounded_input)))

        with tf.name_scope('gibbs-sampling'):
            gibbs_sampling = sample_visible_from_hidden(
                sample_hidden_from_visible(network_input))

        initialize_uninitialized_variables()
        self.methods.update(train_epoch=function(
            [network_input],
            error,
            name='rbm/train-epoch',
            updates=[
                (weight, weight + step * weight_update),
                (h_bias, h_bias + step * h_bias_update),
                (v_bias, v_bias + step * v_bias_update),
                (h_samples, random_binomial(p=h_neg)),
            ]),
                            prediction_error=function(
                                [network_input],
                                error,
                                name='rbm/prediction-error',
                            ),
                            diff1=function(
                                [network_input],
                                free_energy(flipped_rounded_input),
                                name='rbm/diff1-error',
                            ),
                            diff2=function(
                                [network_input],
                                free_energy(rounded_input),
                                name='rbm/diff2-error',
                            ),
                            visible_to_hidden=function(
                                [network_input],
                                visible_to_hidden(network_input),
                                name='rbm/visible-to-hidden',
                            ),
                            hidden_to_visible=function(
                                [network_hidden_input],
                                hidden_to_visible(network_hidden_input),
                                name='rbm/hidden-to-visible',
                            ),
                            gibbs_sampling=function(
                                [network_input],
                                gibbs_sampling,
                                name='rbm/gibbs-sampling',
                            ))

    def train(self, input_train, input_test=None, epochs=100, summary='table'):
        """
        Train RBM.

        Parameters
        ----------
        input_train : 1D or 2D array-like
        input_test : 1D or 2D array-like or None
            Defaults to ``None``.
        epochs : int
            Number of training epochs. Defaults to ``100``.
        summary : {'table', 'inline'}
            Training summary type. Defaults to ``'table'``.
        """
        return super(RBM, self).train(input_train=input_train,
                                      target_train=None,
                                      input_test=input_test,
                                      target_test=None,
                                      epochs=epochs,
                                      epsilon=None,
                                      summary=summary)

    def train_epoch(self, input_train, target_train=None):
        """
        Train one epoch.

        Parameters
        ----------
        input_train : array-like (n_samples, n_features)

        Returns
        -------
        float
        """
        errors = self.apply_batches(
            function=self.methods.train_epoch,
            input_data=input_train,
            description='Training batches',
            show_error_output=True,
        )

        n_samples = len(input_train)
        return average_batch_errors(errors, n_samples, self.batch_size)

    def visible_to_hidden(self, visible_input):
        """
        Populates data throught the network and returns output
        from the hidden layer.

        Parameters
        ----------
        visible_input : array-like (n_samples, n_visible_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        outputs = self.apply_batches(
            function=self.methods.visible_to_hidden,
            input_data=visible_input,
            description='Hidden from visible batches',
            show_progressbar=True,
            show_error_output=False,
            scalar_output=False,
        )
        return np.concatenate(outputs, axis=0)

    def hidden_to_visible(self, hidden_input):
        """
        Propagates output from the hidden layer backward
        to the visible.

        Parameters
        ----------
        hidden_input : array-like (n_samples, n_hidden_features)

        Returns
        -------
        array-like
        """
        is_input_feature1d = (self.n_hidden == 1)
        hidden_input = format_data(hidden_input, is_input_feature1d)

        outputs = self.apply_batches(
            function=self.methods.hidden_to_visible,
            input_data=hidden_input,
            description='Visible from hidden batches',
            show_progressbar=True,
            show_error_output=False,
            scalar_output=False,
        )
        return np.concatenate(outputs, axis=0)

    def prediction_error(self, input_data, target_data=None):
        """
        Compute the pseudo-likelihood of input samples.

        Parameters
        ----------
        input_data : array-like
            Values of the visible layer

        Returns
        -------
        float
            Value of the pseudo-likelihood.
        """
        is_input_feature1d = (self.n_visible == 1)
        input_data = format_data(input_data, is_input_feature1d)

        errors = self.apply_batches(
            function=self.methods.prediction_error,
            input_data=input_data,
            description='Validation batches',
            show_error_output=True,
        )
        return average_batch_errors(
            errors,
            n_samples=len(input_data),
            batch_size=self.batch_size,
        )

    def gibbs_sampling(self, visible_input, n_iter=1):
        """
        Makes Gibbs sampling n times using visible input.

        Parameters
        ----------
        visible_input : 1d or 2d array
        n_iter : int
            Number of Gibbs sampling iterations. Defaults to ``1``.

        Returns
        -------
        array-like
            Output from the visible units after perfoming n
            Gibbs samples. Array will contain only binary
            units (0 and 1).
        """
        is_input_feature1d = (self.n_visible == 1)
        visible_input = format_data(visible_input, is_input_feature1d)

        gibbs_sampling = self.methods.gibbs_sampling

        input_ = visible_input
        for iteration in range(n_iter):
            input_ = gibbs_sampling(input_)

        return input_
예제 #17
0
class BaseAssociative(BaseNetwork):
    """
    Base class for associative learning.

    Parameters
    ----------
    n_inputs : int
        Number of features (columns) in the input data.

    n_outputs : int
        Number of outputs in the  network.

    weight : array-like, Initializer
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.Parameters}

    Methods
    -------
    {BaseSkeleton.predict}

    train(X_train, epochs=100)
        Train neural network.

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=1, required=True)
    n_outputs = IntProperty(minval=1, required=True)
    weight = ParameterProperty(default=init.Normal())

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)
        self.init_weights()

    def init_weights(self):
        valid_weight_shape = (self.n_inputs, self.n_outputs)

        if isinstance(self.weight, init.Initializer):
            self.weight = self.weight.sample(valid_weight_shape,
                                             return_array=True)

        if self.weight.shape != valid_weight_shape:
            raise ValueError(
                "Weight matrix has invalid shape. Got {}, expected {}"
                "".format(self.weight.shape, valid_weight_shape))

        self.weight = self.weight.astype(float)

    def format_input_data(self, X):
        X = format_data(X, is_feature1d=(self.n_inputs == 1))

        if X.ndim != 2:
            raise ValueError("Cannot make prediction, because input "
                             "data has more than 2 dimensions")

        if X.shape[1] != self.n_inputs:
            raise ValueError("Input data expected to have {} features, "
                             "but got {}".format(self.n_inputs, X.shape[1]))

        return X

    def train(self, X_train, epochs=100):
        X_train = self.format_input_data(X_train)
        return super(BaseAssociative, self).train(X_train=X_train,
                                                  epochs=epochs)
def train_som(train_data, train_targets, 
          grid_size=30, 
          n_epochs=100, 
          lrn_radius=2,
          init_mode=init.Normal(0, 1),
          pca_model=None, n_pca=None, 
          plot_flag=False, 
          extra_str='', dir_path='../Data/som_models/'):
    # Preprocess data if needed
    if type(train_data)==pd.core.frame.DataFrame:
        train_data = train_data.to_numpy()
    if type(train_targets)==pd.core.frame.DataFrame:
        train_targets = train_targets.to_numpy(dtype='int').squeeze()
    if pca_model is None and n_pca is not None:
        train_data, pca_model = preprocess_data(train_data, n_pca)
    
    # Create SOM structure
    GRID_HEIGHT = grid_size
    GRID_WIDTH = grid_size

    som = algorithms.SOFM(
        n_inputs=train_data.shape[1],
        features_grid=(GRID_HEIGHT, GRID_WIDTH),

        learning_radius=lrn_radius,
        weight=init_mode,
        reduce_radius_after=50,

        step=0.5,
        std=1,

        shuffle_data=True,
        verbose=True,
    )

    # Train SOM
    som.train(train_data, epochs=n_epochs)

    # Get model targets for future predictions
    trained_clusters = som.predict(train_data).argmax(axis=1)
    model_targets = np.zeros([GRID_HEIGHT*GRID_WIDTH,1])

    for row_id in range(GRID_HEIGHT):
        for col_id in range(GRID_WIDTH):
            index = row_id * GRID_HEIGHT + col_id
            indices = np.argwhere(trained_clusters == index).ravel()
            clustered_targets = train_targets[indices]

            if len(clustered_targets) > 0:
                # Select the target mode
                target = stats.mode(clustered_targets).mode[0]
            else:
                # If no prediction, assume 0
                target = 0
            model_targets[index] = target

    # Compute training MSE
    som_predictions = model_targets[trained_clusters]
    som.mse = mean_squared_error(train_targets, som_predictions)
    accuracy = accuracy_score(train_targets, som_predictions)
    print('SOM train MSE: ', som.mse)
    print('SOM train Acc: ', accuracy)

    # Save model
    som.model_targets = model_targets.squeeze()
    som.pca_model = pca_model
    save_som_model(som, extra_str, dir_path)

    # Plot SOM map
    if plot_flag:
        plot_SOM(som, train_data, train_targets)

    return som
예제 #19
0
class BaseAssociative(BaseNetwork):
    """
    Base class for associative learning.

    Parameters
    ----------
    n_inputs : int
        Number of input units.

    n_outputs : int
        Number of output units.

    weight : array-like, Initializer
        Neural network weights.
        Value defined manualy should have shape ``(n_inputs, n_outputs)``.
        Defaults to :class:`Normal() <neupy.init.Normal>`.

    {BaseNetwork.step}

    {BaseNetwork.show_epoch}

    {BaseNetwork.shuffle_data}

    {BaseNetwork.epoch_end_signal}

    {BaseNetwork.train_end_signal}

    {Verbose.verbose}

    Methods
    -------
    {BaseSkeleton.predict}

    train(input_train, epochs=100)
        Train neural network.

    {BaseSkeleton.fit}
    """
    n_inputs = IntProperty(minval=1, required=True)
    n_outputs = IntProperty(minval=1, required=True)
    weight = ParameterProperty(default=init.Normal())

    def __init__(self, **options):
        super(BaseAssociative, self).__init__(**options)
        self.init_layers()

    def init_layers(self):
        valid_weight_shape = (self.n_inputs, self.n_outputs)

        if isinstance(self.weight, init.Initializer):
            self.weight = self.weight.sample(valid_weight_shape)

        if self.weight.shape != valid_weight_shape:
            raise ValueError("Weight matrix has invalid shape. Got {}, "
                             "expected {}".format(self.weight.shape,
                                                  valid_weight_shape))

        self.weight = self.weight.astype(float)

    def train(self, input_train, epochs=100):
        input_train = format_data(input_train, is_feature1d=True)
        return super(BaseAssociative, self).train(input_train=input_train,
                                                  target_train=None,
                                                  input_test=None,
                                                  target_test=None,
                                                  epochs=epochs,
                                                  epsilon=None,
                                                  summary='table')
예제 #20
0
    def test_normal_initializer(self):
        norm = init.Normal(mean=0, std=0.01)
        weight = norm.sample((30, 30))

        self.assertNormalyDistributed(weight)