Ejemplo n.º 1
0
    def serial_fn_kernel(k: Kernel, *args, **kwargs) -> Kernel:
        n1, n2 = k.nngp.shape[:2]
        (n1_batches, n1_batch_size, n2_batches,
         n2_batch_size) = _get_n_batches_and_batch_sizes(
             n1, n2, batch_size, device_count)

        n1s = np.arange(0, n1, n1_batch_size)
        n2s = np.arange(0, n2, n2_batch_size)

        kwargs_np1 = {}
        kwargs_np2 = {}

        kwargs_other = {}
        for key, v in kwargs.items():
            if _is_np_ndarray(v):
                assert isinstance(v, tuple) and len(v) == 2
                v1 = np.reshape(v[0], (
                    n1_batches,
                    n1_batch_size,
                ) + v[0].shape[1:])
                v2 = np.reshape(v[1], (
                    n2_batches,
                    n2_batch_size,
                ) + v[1].shape[1:])
                kwargs_np1[key] = v1
                kwargs_np2[key] = v2
            else:
                kwargs_other[key] = v

        def row_fn(_, n1):
            return _, _scan(col_fn, n1, (n2s, kwargs_np2))[1]

        def col_fn(n1, n2):
            # NOTE(schsam): If we end up wanting to enable jit-of-batch then we will
            # probably have to change this to dynamic slicing.
            n1, kwargs1 = n1
            n2, kwargs2 = n2
            kwargs_merge = {
                **kwargs_other,
                **dict((key, (kwargs1[key], kwargs2[key])) for key in kwargs1)
            }
            n1_slice = slice(n1, n1 + n1_batch_size)
            n2_slice = slice(n2, n2 + n2_batch_size)
            in_kernel = k.slice(n1_slice, n2_slice)
            return (n1, kwargs1), kernel_fn(in_kernel, *args, **kwargs_merge)

        cov2_is_none = k.cov2 is None
        _, k = _scan(row_fn, 0, (n1s, kwargs_np1))
        if cov2_is_none:
            k = k.replace(cov2=None)
        return flatten(k, cov2_is_none)
Ejemplo n.º 2
0
def _reshape_kernel_for_pmap(k: Kernel, device_count: int,
                             n1_per_device: int) -> Kernel:
    cov2 = k.cov2
    if cov2 is None:
        cov2 = k.cov1
    cov2 = np.broadcast_to(cov2, (device_count, ) + cov2.shape)

    mask2 = k.mask2
    if mask2 is None and k.mask1 is not None:
        mask2 = k.mask1
    if mask2 is not None:
        mask2 = np.broadcast_to(mask2, (device_count, ) + mask2.shape)

    x1_is_x2 = np.broadcast_to(k.x1_is_x2, (device_count, ) + k.x1_is_x2.shape)

    nngp, ntk, cov1 = [
        np.reshape(x, (
            device_count,
            n1_per_device,
        ) + x.shape[1:]) for x in (k.nngp, k.ntk, k.cov1)
    ]

    return k.replace(nngp=nngp,
                     ntk=ntk,
                     cov1=cov1,
                     cov2=cov2,
                     x1_is_x2=x1_is_x2,
                     shape1=(n1_per_device, ) + k.shape1[1:],
                     mask2=mask2)
Ejemplo n.º 3
0
    def train(self, x, y):
        """Runs a single training pass.

        Args:
            x: 2-d array of size batch_size x image_size.
            y: 2-d array of size batch_size x num_classes in one-hot notation.
            learning_rate: The learning rate.

        Returns:
            The loss of this training pass
        """
        len_dim_0 = x.shape[0]
        x = np.reshape(x, (len_dim_0, 784))
        with tf.GradientTape() as tape:
            tape.watch(self.layers)
            y_out = self.forward(x)
            loss = self.mean_squared_error(y_out, y)
            grads = tape.gradient(loss, self.layers)
            temp_layers = []
            for layer, grad in zip(self.layers, grads):
                new_w = layer.weights - (self.learning_rate * grad.weights)
                new_b = layer.biases - (self.learning_rate * grad.biases)
                temp_layers.append(self.Layer(new_w, new_b))
            self.layers = temp_layers

        return loss
Ejemplo n.º 4
0
    def parallel_fn_x1(x1, x2=None, *args, **kwargs):
        if 'key' in kwargs:
            raise NotImplementedError(
                'Batching for the empirical kernel with dropout'
                ' is not implemented. ')
        x2_is_none = x2 is None
        if x2_is_none:
            # TODO(schsam): Only compute the upper triangular part of the kernel.
            x2 = x1

        n1 = x1.shape[0]

        assert x1.shape[1:] == x2.shape[1:]
        input_shape = x1.shape[1:]

        _device_count = device_count

        n1_per_device, ragged = divmod(n1, device_count)
        if n1_per_device and ragged:
            raise ValueError(
                ('Dataset size ({}) must divide number of '
                 'physical devices ({}).').format(n1, device_count))
        elif not n1_per_device:
            _device_count = ragged
            n1_per_device = 1

        x1 = np.reshape(x1, (
            _device_count,
            n1_per_device,
        ) + input_shape)
        kernel = kernel_fn(x1, x2, *args, **kwargs)
        return _flatten_kernel(kernel, x2_is_none, True)
Ejemplo n.º 5
0
def outer_prod(x, y, start_axis, end_axis, prod_op):
  if y is None:
    y = x
  x = interleave_ones(x, start_axis, end_axis, True)
  y = interleave_ones(y, start_axis, end_axis, False)
  if x.ndim <= 5:
    return prod_op(x, y)
  elif x.ndim == 6:
    x = np.tile(x, (1, y.shape[1], 1, 1, 1, 1))
    y = np.tile(y, (x.shape[0], 1, 1, 1, 1, 1))
    z = np.reshape(x, (x.shape[0] * x.shape[1],) + x.shape[2:])
    k = np.reshape(y, (y.shape[0] * y.shape[1],) + y.shape[2:])
    result = prod_op(z, k)
    result = np.reshape(result, (x.shape[0], x.shape[1],) + result.shape[1:])
    return result
  else:
    raise ValueError("Current setting does not support matrices of rank beyond 6")
Ejemplo n.º 6
0
def _get_inputs_and_model(width=1, n_classes=2, use_conv=True):
    key = stateless_uniform(shape=[2],
                            seed=[1, 1],
                            minval=None,
                            maxval=None,
                            dtype=tf.int32)
    keys = tf_random_split(key)
    key = keys[0]
    split = keys[1]
    x1 = np.asarray(normal((8, 4, 3, 2), seed=key))
    x2 = np.asarray(normal((4, 4, 3, 2), seed=split))

    if not use_conv:
        x1 = np.reshape(x1, (x1.shape[0], -1))
        x2 = np.reshape(x2, (x2.shape[0], -1))

    init_fn, apply_fn, kernel_fn = stax.serial(
        stax.Conv(width, (3, 3)) if use_conv else stax.Dense(width),
        stax.Relu(), stax.Flatten(), stax.Dense(n_classes, 2., 0.5))
    return x1, x2, init_fn, apply_fn, kernel_fn, key
Ejemplo n.º 7
0
def _flatten_batch_dimensions(k: np.ndarray,
                              discard_axis: int = None) -> np.ndarray:
    """Takes a kernel that has been evaluated in batches and flattens."""
    if discard_axis is not None:
        if k.ndim % 2:
            k = np.take(k, 0, axis=discard_axis)
            return np.reshape(k, (-1, ) + k.shape[2:])

        if discard_axis == 1:
            return np.reshape(k, (k.shape[0] * k.shape[1], ) + k.shape[2:])

        return k[0]

    else:
        if k.ndim % 2:
            return np.reshape(k, (k.shape[0] * k.shape[1], ) + k.shape[2:])

        k = np.transpose(k, (0, 2, 1, 3) + tuple(range(4, k.ndim)))
        return np.reshape(k,
                          (k.shape[0] * k.shape[1], k.shape[2] * k.shape[3]) +
                          k.shape[4:])
Ejemplo n.º 8
0
    def parallel_fn_x1(x1, x2=None, *args, **kwargs):
        x2_is_none = x2 is None
        if x2_is_none:
            # TODO(schsam): Only compute the upper triangular part of the kernel.
            x2 = x1

        n1 = x1.shape[0]

        assert x1.shape[1:] == x2.shape[1:]
        input_shape = x1.shape[1:]

        _device_count = device_count

        n1_per_device, ragged = divmod(n1, device_count)
        if n1_per_device and ragged:
            raise ValueError(
                ('Dataset size ({}) must divide number of '
                 'physical devices ({}).').format(n1, device_count))
        elif not n1_per_device:
            _device_count = ragged
            n1_per_device = 1

        for k, v in kwargs.items():

            if _is_np_ndarray(v):
                assert isinstance(v, tuple) and len(v) == 2
                v0 = np.reshape(v[0], (
                    _device_count,
                    n1_per_device,
                ) + v[0].shape[1:])
                kwargs[k] = (v0, v[1])

        x1 = np.reshape(x1, (
            _device_count,
            n1_per_device,
        ) + input_shape)
        kernel = kernel_fn(x1, x2, *args, **kwargs)
        return _flatten_kernel(kernel, x2_is_none, True)
Ejemplo n.º 9
0
    def serial_fn_x1(x1: np.ndarray,
                     x2: np.ndarray = None,
                     *args,
                     **kwargs) -> _KernelType:
        # TODO(xlc): Make batch + dropout work reasonably well.
        if 'key' in kwargs:
            raise NotImplementedError(
                'Batching for the empirical kernel with dropout'
                ' is not implemented. ')
        x2_is_none = x2 is None
        if x2_is_none:
            # TODO(schsam): Only compute the upper triangular part of the kernel.
            x2 = x1

        n1, n2 = x1.shape[0], x2.shape[0]
        (n1_batches, n1_batch_size, n2_batches,
         n2_batch_size) = _get_n_batches_and_batch_sizes(
             n1, n2, batch_size, device_count)

        input_shape = x1.shape[1:]
        x1s = np.reshape(x1, (
            n1_batches,
            n1_batch_size,
        ) + input_shape)
        x2s = np.reshape(x2, (
            n2_batches,
            n2_batch_size,
        ) + input_shape)

        def row_fn(_, x1):
            return _, _scan(col_fn, x1, x2s)[1]

        def col_fn(x1: np.ndarray, x2: np.ndarray):
            return x1, kernel_fn(x1, x2, *args, **kwargs)

        _, kernel = _scan(row_fn, 0, x1s)
        return flatten(kernel, x2_is_none)
Ejemplo n.º 10
0
 def apply_fun(params, inputs, **kwargs):
     return tfnp.reshape(inputs, (inputs.shape[0], -1))
Ejemplo n.º 11
0
    def serial_fn_x1(x1: np.ndarray,
                     x2: np.ndarray = None,
                     *args,
                     **kwargs) -> _KernelType:

        x2_is_none = x2 is None
        if x2_is_none:
            # TODO(schsam): Only compute the upper triangular part of the kernel.
            x2 = x1

        n1, n2 = x1.shape[0], x2.shape[0]
        (n1_batches, n1_batch_size, n2_batches,
         n2_batch_size) = _get_n_batches_and_batch_sizes(
             n1, n2, batch_size, device_count)
        kwargs_np1 = {}
        kwargs_np2 = {}
        kwargs_other = {}
        for k, v in kwargs.items():
            if _is_np_ndarray(v):
                if k == 'rng':
                    key1, key2 = random.split(v)
                    v1 = random.split(key1, n1_batches)
                    v2 = random.split(key2, n2_batches)
                else:
                    assert isinstance(v, tuple) and len(v) == 2
                    v1 = np.reshape(v[0], (
                        n1_batches,
                        n1_batch_size,
                    ) + v[0].shape[1:])
                    v2 = np.reshape(v[1], (
                        n2_batches,
                        n2_batch_size,
                    ) + v[1].shape[1:])
                kwargs_np1[k] = v1
                kwargs_np2[k] = v2
            else:
                kwargs_other[k] = v
        input_shape = x1.shape[1:]
        x1s = np.reshape(x1, (
            n1_batches,
            n1_batch_size,
        ) + input_shape)
        x2s = np.reshape(x2, (
            n2_batches,
            n2_batch_size,
        ) + input_shape)

        def row_fn(_, x1):
            return _, _scan(col_fn, x1, (x2s, kwargs_np2))[1]

        def col_fn(x1, x2):
            x1, kwargs1 = x1
            x2, kwargs2 = x2
            kwargs_merge = {
                **kwargs_other,
                **dict((k, (kwargs1[k], kwargs2[k])) for k in kwargs1)
            }
            return (x1, kwargs1), kernel_fn(x1, x2, *args, **kwargs_merge)

        _, kernel = _scan(row_fn, 0, (x1s, kwargs_np1))
        return flatten(kernel, x2_is_none)