Esempio n. 1
0
    def traverse(
        self,
        latent_index: Union[int, str],
        min_val: int = -2.0,
        max_val: int = 2.0,
        num: int = 11,
        n_samples: int = 1,
        mode: Literal['linear', 'quantile', 'gaussian'] = 'linear',
        seed: int = 1,
    ) -> VariationalPosterior:
        """Create data for latents' traverse experiments

    Parameters
    ----------
    latent_index : Union[int, str]
    min_val : int, optional
        minimum value of the traverse, by default -2.0
    max_val : int, optional
        maximum value of the traverse, by default 2.0
    num : int, optional
        number of points in the traverse, must be odd number, by default 11
    n_samples : int, optional
        number of samples selected for the traverse, by default 2
    mode : {'linear', 'quantile', 'gaussian'}, optional
        'linear' mode take linear interpolation between the `min_val` and `max_val`.
        'quantile' mode return `num` quantiles based on min and max values inferred
        from the data. 'gaussian' mode takes `num` Gaussian quantiles,
        by default 'linear'

    Returns
    -------
    VariationalPosterior
        a copy of VariationalPosterior with the new traversed latents,
        the total number of return samples is: `n_samples * num`

    Example
    --------
    For `n_samples=2`, `num=2`, and `n_latents=2`, the return latents are:
    ```
    [[-2., 0.47],
     [ 0., 0.47],
     [ 2., 0.47],
     [-2., 0.31],
     [ 0., 0.31],
     [ 2., 0.31]]
    ```
    """
        num = int(num)
        assert num % 2 == 1, f'num must be odd number, i.e. centerred at 0, given {num}'
        n_samples = int(n_samples)
        assert num > 1 and n_samples > 0, \
          ("num > 1 and n_samples > 0, "
           f"but given: num={num} n_samples={n_samples}")
        ### factor index
        if isinstance(latent_index, string_types):
            latent_index = self.latent_names.index(latent_index)
        latent_index = int(latent_index)
        ### check the mode
        all_mode = ('quantile', 'linear', 'gaussian')
        mode = str(mode).strip().lower()
        assert mode in all_mode, \
          f"Only support mode:{all_mode}, but given mode='{mode}'"
        ### sample
        random_state = np.random.RandomState(seed=seed)
        indices = random_state.choice(self.n_samples,
                                      size=n_samples,
                                      replace=False)
        Z_org = self.dist_to_tensor(self.latents).numpy()
        Z = Z_org[indices]
        ### ranges
        # z_range is a matrix [n_latents, num]
        # linear range
        if mode == 'linear':
            z_range = np.linspace(min_val, max_val, num=num)
        # min-max quantile
        elif mode == 'quantile':
            z_range = np.linspace(min(Z_org[:, latent_index]),
                                  max(Z_org[:, latent_index]),
                                  num=num)
        # gaussian quantile
        elif mode == 'gaussian':
            dist = Normal(
                loc=tf.reduce_mean(self.latents.mean()[:, latent_index]),
                scale=tf.reduce_mean(self.latents.stddev()[:, latent_index]),
            )
            z_range = []
            for i in np.linspace(1e-5, 1.0 - 1e-5, num=num, dtype=np.float32):
                z_range.append(dist.quantile(i))
            z_range = np.array(z_range)
        ### traverse
        Z = np.repeat(np.array(Z), len(z_range), axis=0)
        Z_indices = np.repeat(indices, len(z_range), axis=0)
        # repeat for each sample
        for j in range(n_samples):
            s = j * len(z_range)
            e = (j + 1) * len(z_range)
            Z[s:e, latent_index] = z_range
        ### create the new posterior
        # NOTE: this might not work for multi-latents
        outputs = list(as_tuple(self.model.decode(Z, training=False)))
        obj = self.copy(Z_indices,
                        latents=VectorDeterministic(Z, name="Latents"),
                        outputs=outputs,
                        suffix='traverse')
        return obj
Esempio n. 2
0
def traverse_dims(
        x: Union[np.ndarray, tf.Tensor, Distribution],
        feature_indices: Optional[Sequence[int]] = None,
        min_val: Union[float, np.ndarray] = -2.0,
        max_val: Union[float, np.ndarray] = 2.0,
        n_traverse_points: int = 11,
        mode: Literal['linear', 'quantile',
                      'gaussian'] = 'linear') -> np.ndarray:
    """Traversing a dimension of a matrix between given range

  Parameters
  ----------
  x : Union[np.ndarray, tf.Tensor, Distribution]
      the 2-D array for performing dimension traverse
  feature_indices : Union[int, List[int]]
      a single index or list of indices for traverse (i.e. which columns in the
      last dimension are for traverse)
  min_val : int, optional
      minimum value of the traverse, by default -2.0
  max_val : int, optional
      maximum value of the traverse, by default 2.0
  n_traverse_points : int, optional
      number of points in the traverse, must be odd number, by default 11
  mode : {'linear', 'quantile', 'gaussian'}, optional
      'linear' mode take linear interpolation between the `min_val` and
      `max_val`.
      'quantile' mode return `num` quantiles based on min and max values inferred
      from the data. 'gaussian' mode takes `num` Gaussian quantiles,
      by default 'linear'

  Returns
  -------
  np.ndarray
      the ndarray with traversed axes

  Example
  --------
  For `n_traverse_points=3`, and `feature_indices=[0]`,
  the return latents are:
  ```
  [[-2., 0.47],
   [ 0., 0.47],
   [ 2., 0.47]]
  ```
  """
    if feature_indices is None:
        feature_indices = list(
            range(x.event_shape[-1] if isinstance(x, Distribution) else x.
                  shape[-1]))
    if hasattr(feature_indices, 'numpy'):
        feature_indices = feature_indices.numpy()
    if isinstance(feature_indices, np.ndarray):
        feature_indices = feature_indices.tolist()
    feature_indices = as_tuple(feature_indices, t=int)
    # === 0. list of indices, repeat for each index
    if len(feature_indices) > 1:
        arr = [
            traverse_dims(x,
                          feature_indices=i,
                          min_val=min_val,
                          max_val=max_val,
                          n_traverse_points=n_traverse_points,
                          mode=mode) for i in feature_indices
        ]
        return np.concatenate(arr, axis=0)
    # === 1. single index
    if not isinstance(min_val, Number):
        assert len(min_val) == x.shape[-1]
        min_val = min_val[feature_indices]
    if not isinstance(max_val, Number):
        assert len(max_val) == x.shape[-1]
        max_val = max_val[feature_indices]
    feature_indices = feature_indices[0]
    n_traverse_points = int(n_traverse_points)
    assert n_traverse_points % 2 == 1, \
      ('n_traverse_points must be odd number, '
       f'i.e. centered at 0, given {n_traverse_points}')
    assert n_traverse_points > 1, \
      f'n_traverse_points > 1 but given: n_traverse_points={n_traverse_points}.'
    ### check the mode
    all_mode = ('quantile', 'linear', 'gaussian')
    mode = str(mode).strip().lower()
    assert mode in all_mode, \
      f"Only support traverse mode:{all_mode}, but given '{mode}'"
    px = None
    if isinstance(x, Distribution):
        px = x
        x = px.mean()
    elif mode == 'gaussian':
        raise ValueError('A distribution must be provided for mean and stddev '
                         'in Gaussian mode.')
    ### sample
    x_org = x
    x = np.array(x)
    assert len(
        x.shape) == 2, f'input arrays x must be 2D-array, given: {x.shape}'
    ### ranges
    # z_range is a matrix [n_latents, num]
    # linear range
    if mode == 'linear':
        x_range = np.linspace(min_val, max_val, num=n_traverse_points)
    # min-max quantile
    elif mode == 'quantile':
        if x_org.shape[0] == 1:
            vmin, vmax = np.min(x_org), np.max(x_org)
        else:
            vmin = np.min(x_org[:, feature_indices])
            vmax = np.max(x_org[:, feature_indices])
        x_range = np.linspace(vmin, vmax, num=n_traverse_points)
    # gaussian quantile
    elif mode == 'gaussian':
        dist = Normal(loc=tf.reduce_mean(px.mean(), 0)[feature_indices],
                      scale=tf.reduce_max(px.stddev(), 0)[feature_indices])
        x_range = []
        for i in np.linspace(1e-6,
                             1.0 - 1e-6,
                             num=n_traverse_points,
                             dtype=np.float32):
            x_range.append(dist.quantile(i))
        x_range = np.array(x_range)
    else:
        raise ValueError(f'Unknown mode="mode"')
    ### traverse
    X = np.repeat(x, len(x_range), axis=0)
    # repeat for each sample
    for i in range(x.shape[0]):
        s = i * len(x_range)
        e = (i + 1) * len(x_range)
        # note, this should be added not simple assignment
        X[s:e, feature_indices] += x_range.astype(X.dtype)
    return X