Python entropy1D Examples

Programming Language: Python

Namespace/Package Name: sklearn.metrics.cluster

Method/Function: entropy1D

Examples at hotexamples.com: 2

Python entropy1D - 2 examples found. These are the top rated real world Python examples of sklearn.metrics.cluster.entropy1D extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: metrics.py Project: sycomix/odin-ai

def discrete_entropy(labels):
  r""" Iterately compute discrete entropy for integer samples set along the
  column of 2-D array.

  Arguments:
    labels : 1-D or 2-D array

  Returns:
    entropy : A Scalar or array `[n_factors]`
  """
  labels = np.atleast_1d(labels)
  if labels.ndim == 1:
    return entropy1D(labels.ravel())
  elif labels.ndim > 2:
    raise ValueError("Only support 1-D or 2-D array for labels entropy.")
  num_factors = labels.shape[1]
  h = np.zeros(num_factors)
  for j in range(num_factors):
    h[j] = entropy1D(labels[:, j])
  return h

Example #2

Show file

def giga(X_y: Union[tf.data.Dataset, Generator, Iterator],
         fn_qz_x: Callable[[tf.Tensor], tfd.Distribution],
         fn_px_z: Callable[[tf.Tensor], tfd.Distribution],
         pz: Optional[tfd.Distribution] = None,
         n_samples: int = 10000,
         n_mcmc: int = 100,
         batch_size: int = 32,
         adjusted: bool = True,
         verbose: bool = True):
  C_mcmc = tf.math.log(tf.constant(n_mcmc, dtype=tf.float32))
  ## process the data into mini batches
  if not isinstance(X_y, (tf.data.Dataset, Generator, Iterator)):
    X, y = X_y
    if not isinstance(X, tf.data.Dataset):
      X = tf.data.Dataset.from_tensor_slices(X).batch(batch_size)
    if not isinstance(y, tf.data.Dataset):
      y = tf.data.Dataset.from_tensor_slices(y).batch(batch_size)
    X_y = tf.data.Dataset.zip((X, y))
  if isinstance(X_y, tf.data.Dataset):
    X_y = X_y.repeat(-1).shuffle(1000)
    X_y = iter(X_y)
  ## iterate the dataset until have enough n_samples
  count = 0
  log_qz_x = []
  log_px_z = []
  log_pz = []
  qy = []
  prog = tqdm(desc='GIGA',
              total=n_samples * n_mcmc,
              unit='samples',
              disable=not verbose)
  while count < n_samples:
    ## step 1: sample q(x, y)
    try:
      X, y = next(X_y)
    except StopIteration:
      warnings.warn(f'Not enough data for {n_samples} samples.')
      break
    batch_x = min(X.shape[0], n_samples - count)
    X = X[:batch_x]
    y = y[:batch_x]
    qy.append(y)
    qz_x = fn_qz_x(X)
    # infer the prior of z
    if pz is not None:
      pass
    elif hasattr(qz_x, 'KL_divergence'):
      pz = qz_x.KL_divergence.prior
    else:
      pz = tfd.Normal(tf.zeros(qz_x.event_shape), 1., dtype=qz_x.dtype)
    ## step 2: sample q(z|x)
    batch_llk_px = []
    batch_llk_qz = []
    batch_llk_pz = []
    for start in range(0, n_mcmc, batch_size):
      batch_z = min(n_mcmc - start, batch_size)
      z = qz_x.sample(batch_z)
      llk_qz = qz_x.log_prob(z)
      batch_llk_qz.append(llk_qz)
      llk_pz = pz.log_prob(z)
      batch_llk_pz.append(llk_pz)
      z = tf.reshape(z, (-1, z.shape[-1]))
      ## step 3: calculate log(p(x|z))
      px_z = fn_px_z(z)
      llk_x = px_z.log_prob(px_z.sample())
      llk_x = tf.reshape(llk_x, (batch_z, -1))
      batch_llk_px.append(llk_x)
      ## update progress
      prog.update(batch_z * batch_x)
    ## step 4: aggregate the log-likelihoods
    batch_llk_qz = tf.concat(batch_llk_qz, axis=0)
    log_qz_x.append(batch_llk_qz)
    batch_llk_pz = tf.concat(batch_llk_pz, axis=0)
    log_pz.append(batch_llk_pz)
    batch_llk_px = tf.concat(batch_llk_px, axis=0)
    log_px_z.append(batch_llk_px)
    count += batch_x
  ## finalizing results
  prog.clear()
  prog.close()
  log_px_z = tf.concat(log_px_z, axis=1)  # [n_mcmc, n_samples]
  log_qz_x = tf.concat(log_qz_x, axis=1)  # [n_mcmc, n_samples]
  log_pz = tf.concat(log_pz, axis=1)  # [n_mcmc, n_samples]
  qy = tf.concat(qy, axis=0)
  n_factors = qy.shape[1]
  ## Calculate I(X; Z)
  log_pxz = log_px_z + log_pz - log_qz_x
  log_px = tf.reduce_logsumexp(log_pxz, axis=0, keepdims=True) - C_mcmc
  log_qx = tf.math.log(1. / n_samples)
  pxz = tf.math.exp(log_pxz - log_qx)
  I_xz = pxz * (log_px_z - log_px)  # [n_mcmc, n_samples]
  # entropy of x
  H_x = tf.reduce_mean(-pxz * log_px)
  # entropy of z
  H_z = tf.reduce_mean(-tf.math.exp(log_pz - log_qz_x) * log_pz)
  I_xz = tf.reduce_mean(I_xz, axis=0)  # [n_samples]
  # I_xz = I_xz / tf.math.sqrt(H_x * H_z)
  I_xz = tf.reduce_mean(I_xz)
  ## Calculate I(Z; Y) - H(Z)
  I_zy = {}  # for each factor
  for i in range(n_factors):
    y = np.asarray(qy[:, i], dtype=np.int32)
    I_zyi = {}
    labels = np.unique(y)
    for yk in labels:
      ids = (y == yk)
      K = np.sum(ids)
      log_qz_xk = tf.boolean_mask(log_qz_x, ids, axis=1)
      log_qz_xk = tf.reduce_logsumexp(log_qz_xk, axis=1) - tf.math.log(
          tf.constant(K, dtype=tf.float32))
      I_zyi[yk] = tf.reduce_mean(log_qz_xk, axis=0)
    # average among labels within a factor
    I_zyi = sum(v for v in I_zyi.values()) / len(labels)
    I_zy[i] = I_zyi
  # average among all factors
  H_y = np.array([entropy1D(qy[:, i]) for i in range(n_factors)])
  I_zy = np.array(list(I_zy.values()))
  I_zy = np.mean(I_zy / H_y)
  ## giga
  return I_xz + I_zy