Ejemplo n.º 1
0
    def update(self, samples, axis=0) -> None:
        """
        Update the online calculation of the mean and variance.

        Notes:
            Modifies the metrics in place.

        Args:
            samples: data samples

        Returns:
            None

        """
        # compute the sample size and sample mean
        n = len(samples)
        sample_mean = be.tsum(samples, axis=axis) / max(n, 1)
        sample_square = be.tsum(be.square(be.subtract(sample_mean, samples)),
                                axis=axis)

        if self.mean is None:
            self.mean = be.zeros_like(sample_mean)
            self.square = be.zeros_like(sample_square)
            self.var = be.zeros_like(sample_square)
            self.num = 0

        delta = sample_mean - self.mean
        new_num = self.num + n
        correction = n * self.num * be.square(delta) / max(new_num, 1)

        self.square += sample_square + correction
        self.var = self.square / max(new_num - 1, 1)
        self.mean = (self.num * self.mean + n * sample_mean) / max(new_num, 1)
        self.num = new_num
Ejemplo n.º 2
0
    def update(self, samples) -> None:
        """
        Update the online calculation of the mean and variance.

        Notes:
            Modifies the metrics in place.

        Args:
            samples: data samples

        Returns:
            None

        """
        n = len(samples)
        sample_mean = be.tsum(samples) / n
        sample_square = be.tsum(be.square(samples - sample_mean))

        delta = sample_mean - self.mean
        new_num = self.num + n
        correction = n * self.num * delta**2 / max(new_num, 1)

        self.square += sample_square + correction
        self.var = self.square / max(new_num - 1, 1)
        self.mean = (self.num * self.mean + n * sample_mean) / max(new_num, 1)
        self.num = new_num
Ejemplo n.º 3
0
def test_gaussian_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.GaussianLayer(num_visible_units)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    rbm = hidden.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    log_var_a = 0.1 * be.randn((num_visible_units, ))
    log_var_b = 0.1 * be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].int_params.loc[:] = a
    rbm.layers[1].int_params.loc[:] = b
    rbm.layers[0].int_params.log_var[:] = log_var_a
    rbm.layers[1].int_params.log_var[:] = log_var_b
    rbm.weights[0].int_params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    visible_var = be.exp(log_var_a)
    vdata_scaled = vdata / be.broadcast(visible_var, vdata)

    # compute the mean of the hidden layer
    rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()])
    hidden_var = be.exp(log_var_b)
    hid_mean = rbm.layers[1].mean()
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_vis_loc = -be.mean(vdata_scaled, axis=0)
    d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0)
    d_vis_logvar += be.batch_dot(
        hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata)
    d_vis_logvar /= visible_var

    d_hid_loc = -be.mean(hid_mean_scaled, axis=0)

    d_hid_logvar = -0.5 * be.mean(
        be.square(hid_mean - be.broadcast(b, hid_mean)), axis=0)
    d_hid_logvar += be.batch_dot(vdata_scaled, W, hid_mean,
                                 axis=0) / len(hid_mean)
    d_hid_logvar /= hidden_var

    d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled)

    # compute the derivatives using the layer functions
    rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()])
    rbm.layers[0].update([hid_mean_scaled], [rbm.weights[0].W_T()])

    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                           [rbm.weights[0].W()])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                           [rbm.weights[0].W_T()])

    weight_derivs = rbm.weights[0].derivatives(vdata_scaled, hid_mean_scaled)

    assert be.allclose(d_vis_loc, vis_derivs.loc), \
    "derivative of visible loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_loc, hid_derivs.loc), \
    "derivative of hidden loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_vis_logvar, vis_derivs.log_var, rtol=1e-05, atol=1e-01), \
    "derivative of visible log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_logvar, hid_derivs.log_var, rtol=1e-05, atol=1e-01), \
    "derivative of hidden log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_W, weight_derivs.matrix), \
    "derivative of weights wrong in gaussian-gaussian rbm"
Ejemplo n.º 4
0
def test_gaussian_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.GaussianLayer(num_visible_units)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    log_var_a = 0.1 * be.randn((num_visible_units,))
    log_var_b = 0.1 * be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.layers[0].params.log_var[:] = log_var_a
    rbm.layers[1].params.log_var[:] = log_var_b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    visible_var = be.exp(log_var_a)
    vdata_scaled = vdata / visible_var

    # compute the mean of the hidden layer
    hid_mean = rbm.layers[1].conditional_mean(
        [vdata_scaled], [rbm.connections[0].W()])
    hidden_var = be.exp(log_var_b)
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_vis_loc = be.mean((a-vdata)/visible_var, axis=0)
    d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0)
    d_vis_logvar += be.batch_quadratic(hid_mean_scaled, be.transpose(W), vdata,
                                 axis=0) / len(vdata)
    d_vis_logvar /= visible_var

    d_hid_loc = be.mean((b-hid_mean)/hidden_var, axis=0)

    d_hid_logvar = -0.5 * be.mean(be.square(hid_mean - b), axis=0)
    d_hid_logvar += be.batch_quadratic(vdata_scaled, W, hid_mean,
                                 axis=0) / len(hid_mean)
    d_hid_logvar /= hidden_var

    d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled)

    # compute the derivatives using the layer functions
    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                            [rbm.connections[0].W(trans=True)])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                           [rbm.connections[0].W()])

    weight_derivs = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled)

    # compute simple weighted derivatives using the layer functions
    scale = 2
    scale_func = partial(be.multiply, be.float_scalar(scale))
    vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                        [rbm.connections[0].W(trans=True)], weighting_function=scale_func)

    hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                        [rbm.connections[0].W()], weighting_function=scale_func)

    weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled,
                                                weighting_function=scale_func)

    assert be.allclose(d_vis_loc, vis_derivs[0].loc), \
    "derivative of visible loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_loc, hid_derivs[0].loc), \
    "derivative of hidden loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_vis_logvar, vis_derivs[0].log_var, rtol=1e-05, atol=1e-01), \
    "derivative of visible log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_logvar, hid_derivs[0].log_var, rtol=1e-05, atol=1e-01), \
    "derivative of hidden log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_W, weight_derivs[0].matrix), \
    "derivative of weights wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_vis_loc, vis_derivs_scaled[0].loc), \
    "weighted derivative of visible loc wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_hid_loc, hid_derivs_scaled[0].loc), \
    "weighted derivative of hidden loc wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_vis_logvar, vis_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \
    "weighted derivative of visible log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_hid_logvar, hid_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \
    "weighted derivative of hidden log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \
    "weighted derivative of weights wrong in gaussian-gaussian rbm"