Example #1
0
    def test_skip_linear_transform(self, BatchNormLayer):
        input_shape = (20, 30, 40)

        # random input tensor, beta, gamma
        input = (np.random.randn(*input_shape).astype(theano.config.floatX) +
                 np.random.randn(1, 30, 1).astype(theano.config.floatX))
        beta = np.random.randn(30).astype(theano.config.floatX)
        gamma = np.random.randn(30).astype(theano.config.floatX)

        # create layers without beta or gamma
        layer1 = BatchNormLayer(input_shape, beta=None, gamma=gamma)
        layer2 = BatchNormLayer(input_shape, beta=beta, gamma=None)

        # check that one parameter is missing
        assert len(layer1.get_params()) == 3
        assert len(layer2.get_params()) == 3

        # call get_output_for()
        result1 = layer1.get_output_for(theano.tensor.constant(input),
                                        deterministic=False).eval()
        result2 = layer2.get_output_for(theano.tensor.constant(input),
                                        deterministic=False).eval()

        # compute expected results and expected updated parameters
        mean = input.mean(axis=(0, 2))
        std = np.sqrt(input.var(axis=(0, 2)) + layer1.epsilon)
        exp_result = (input - mean[None, :, None]) / std[None, :, None]
        exp_result1 = exp_result * gamma[None, :, None]  # no beta
        exp_result2 = exp_result + beta[None, :, None]  # no gamma

        # compare expected results to actual results
        tol = {'atol': 1e-5, 'rtol': 1e-6}
        assert np.allclose(result1, exp_result1, **tol)
        assert np.allclose(result2, exp_result2, **tol)
Example #2
0
    def test_skip_linear_transform(self, BatchNormLayer):
        input_shape = (20, 30, 40)

        # random input tensor, beta, gamma
        input = (np.random.randn(*input_shape).astype(theano.config.floatX) +
                 np.random.randn(1, 30, 1).astype(theano.config.floatX))
        beta = np.random.randn(30).astype(theano.config.floatX)
        gamma = np.random.randn(30).astype(theano.config.floatX)

        # create layers without beta or gamma
        layer1 = BatchNormLayer(input_shape, beta=None, gamma=gamma)
        layer2 = BatchNormLayer(input_shape, beta=beta, gamma=None)

        # check that one parameter is missing
        assert len(layer1.get_params()) == 3
        assert len(layer2.get_params()) == 3

        # call get_output_for()
        result1 = layer1.get_output_for(theano.tensor.constant(input),
                                        deterministic=False).eval()
        result2 = layer2.get_output_for(theano.tensor.constant(input),
                                        deterministic=False).eval()

        # compute expected results and expected updated parameters
        mean = input.mean(axis=(0, 2))
        std = np.sqrt(input.var(axis=(0, 2)) + layer1.epsilon)
        exp_result = (input - mean[None, :, None]) / std[None, :, None]
        exp_result1 = exp_result * gamma[None, :, None]  # no beta
        exp_result2 = exp_result + beta[None, :, None]  # no gamma

        # compare expected results to actual results
        tol = {'atol': 1e-5, 'rtol': 1e-6}
        assert np.allclose(result1, exp_result1, **tol)
        assert np.allclose(result2, exp_result2, **tol)
Example #3
0
    def test_get_output_for(self, BatchNormLayer, deterministic, use_averages,
                            update_averages):
        input_shape = (20, 30, 40)

        # random input tensor, beta, gamma, mean, inv_std and alpha
        input = (np.random.randn(*input_shape).astype(theano.config.floatX) +
                 np.random.randn(1, 30, 1).astype(theano.config.floatX))
        beta = np.random.randn(30).astype(theano.config.floatX)
        gamma = np.random.randn(30).astype(theano.config.floatX)
        mean = np.random.randn(30).astype(theano.config.floatX)
        inv_std = np.random.rand(30).astype(theano.config.floatX)
        alpha = np.random.rand()

        # create layer (with default axes: normalize over all but second axis)
        layer = BatchNormLayer(input_shape,
                               beta=beta,
                               gamma=gamma,
                               mean=mean,
                               inv_std=inv_std,
                               alpha=alpha)

        # call get_output_for()
        kwargs = {'deterministic': deterministic}
        if use_averages is not None:
            kwargs['batch_norm_use_averages'] = use_averages
        else:
            use_averages = deterministic
        if update_averages is not None:
            kwargs['batch_norm_update_averages'] = update_averages
        else:
            update_averages = not deterministic
        result = layer.get_output_for(theano.tensor.constant(input),
                                      **kwargs).eval()

        # compute expected results and expected updated parameters
        input_mean = input.mean(axis=(0, 2))
        input_inv_std = 1 / np.sqrt(input.var(axis=(0, 2)) + layer.epsilon)
        if use_averages:
            use_mean, use_inv_std = mean, inv_std
        else:
            use_mean, use_inv_std = input_mean, input_inv_std
        bcast = (np.newaxis, slice(None), np.newaxis)
        exp_result = (input - use_mean[bcast]) * use_inv_std[bcast]
        exp_result = exp_result * gamma[bcast] + beta[bcast]
        if update_averages:
            new_mean = (1 - alpha) * mean + alpha * input_mean
            new_inv_std = (1 - alpha) * inv_std + alpha * input_inv_std
        else:
            new_mean, new_inv_std = mean, inv_std

        # compare expected results to actual results
        tol = {'atol': 1e-5, 'rtol': 1e-6}
        assert np.allclose(layer.mean.get_value(), new_mean, **tol)
        assert np.allclose(layer.inv_std.get_value(), new_inv_std, **tol)
        assert np.allclose(result, exp_result, **tol)
Example #4
0
    def test_get_output_for(self, BatchNormLayer, deterministic, use_averages,
                            update_averages):
        input_shape = (20, 30, 40)

        # random input tensor, beta, gamma, mean, inv_std and alpha
        input = (np.random.randn(*input_shape).astype(theano.config.floatX) +
                 np.random.randn(1, 30, 1).astype(theano.config.floatX))
        beta = np.random.randn(30).astype(theano.config.floatX)
        gamma = np.random.randn(30).astype(theano.config.floatX)
        mean = np.random.randn(30).astype(theano.config.floatX)
        inv_std = np.random.rand(30).astype(theano.config.floatX)
        alpha = np.random.rand()

        # create layer (with default axes: normalize over all but second axis)
        layer = BatchNormLayer(input_shape, beta=beta, gamma=gamma, mean=mean,
                               inv_std=inv_std, alpha=alpha)

        # call get_output_for()
        kwargs = {'deterministic': deterministic}
        if use_averages is not None:
            kwargs['batch_norm_use_averages'] = use_averages
        else:
            use_averages = deterministic
        if update_averages is not None:
            kwargs['batch_norm_update_averages'] = update_averages
        else:
            update_averages = not deterministic
        result = layer.get_output_for(theano.tensor.constant(input),
                                      **kwargs).eval()

        # compute expected results and expected updated parameters
        input_mean = input.mean(axis=(0, 2))
        input_inv_std = 1 / np.sqrt(input.var(axis=(0, 2)) + layer.epsilon)
        if use_averages:
            use_mean, use_inv_std = mean, inv_std
        else:
            use_mean, use_inv_std = input_mean, input_inv_std
        bcast = (np.newaxis, slice(None), np.newaxis)
        exp_result = (input - use_mean[bcast]) * use_inv_std[bcast]
        exp_result = exp_result * gamma[bcast] + beta[bcast]
        if update_averages:
            new_mean = (1 - alpha) * mean + alpha * input_mean
            new_inv_std = (1 - alpha) * inv_std + alpha * input_inv_std
        else:
            new_mean, new_inv_std = mean, inv_std

        # compare expected results to actual results
        tol = {'atol': 1e-5, 'rtol': 1e-6}
        assert np.allclose(layer.mean.get_value(), new_mean, **tol)
        assert np.allclose(layer.inv_std.get_value(), new_inv_std, **tol)
        assert np.allclose(result, exp_result, **tol)