def compute_mig(ground_truth_data,
                Model,
                random_state,
                num_train,
                batch_size=16):

    score_dict = {}
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, Model, num_train, random_state, batch_size)
    #    assert mus_train.shape[1] == num_train
    mig_score = []
    for binsize in range(2, 42, 4):
        discretized_mus = _histogram_discretize(mus_train, num_bins=binsize)
        m = utils.discrete_mutual_info(discretized_mus, ys_train)
        assert m.shape[0] == mus_train.shape[0]
        assert m.shape[1] == ys_train.shape[0]
        # m is [num_latents, num_factors]

        entropy = utils.discrete_entropy(ys_train)
        sorted_m = np.sort(m, axis=0)[::-1]
        a = sorted_m[0, :] - sorted_m[1, :]
        a = np.delete(a, 0, 0)
        entropy = np.delete(entropy, 0, 0)
        mig = np.mean(np.divide(a, entropy))
    mig_score.append(mig)
    mig = max(mig_score)
    return mig
def aggregation_mig(m, ys_train):
  """Aggregation function of the MIG."""
  score = {}
  entropy = utils.discrete_entropy(ys_train)
  sorted_m = np.sort(m, axis=0)[::-1]
  mig_per_factor = np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:])
  score["mig"] = np.mean(mig_per_factor)
  assert len(mig_per_factor) == m.shape[1], "Wrong length."
  for i in range(len(mig_per_factor)):
    score["mig.factor_{}".format(i)] = mig_per_factor[i]
  return score
Esempio n. 3
0
def _compute_mig(mus_train, ys_train):
  """Computes score based on both training and testing codes and factors."""
  score_dict = {}
  discretized_mus, bins = utils.make_discretizer(mus_train)
  m = utils.discrete_mutual_info(discretized_mus, ys_train)
  assert m.shape[0] == mus_train.shape[0]
  assert m.shape[1] == ys_train.shape[0]
  # m is [num_latents, num_factors]
  entropy = utils.discrete_entropy(ys_train)
  sorted_m = np.sort(m, axis=0)[::-1]
  score_dict["discrete_mig"] = np.mean(
      np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:]))
  return score_dict
Esempio n. 4
0
    def compute_mi_matrix(mus_train,
                          ys_train,
                          need_discretized_1=False,
                          need_discretized_2=False):
        score_dict = {}
        if need_discretized_1:
            mus_train = utils.make_discretizer(mus_train)
        if need_discretized_2:
            ys_train = utils.make_discretizer(ys_train)
        m = utils.discrete_mutual_info(mus_train, ys_train)
        assert m.shape[0] == mus_train.shape[0]
        assert m.shape[1] == ys_train.shape[0]
        # m is [num_latents, num_factors]
        entropy = utils.discrete_entropy(ys_train)

        return m, entropy
Esempio n. 5
0
def _compute_mig(mus_train, ys_train):
    """Computes score based on both training and testing codes and factors."""
    score_dict = {}
    discretized_mus = utils.make_discretizer(mus_train)
    m = utils.discrete_mutual_info(discretized_mus, ys_train)
    assert m.shape[0] == mus_train.shape[0]
    assert m.shape[1] == ys_train.shape[0]
    # m is [num_latents, num_factors]
    entropy = utils.discrete_entropy(ys_train)
    sorted_m = np.sort(m, axis=0)[::-1]
    # for local sampling you won't sample along some dimensions,
    # so you will get some 0 entropies for the ys entropy (storing one
    # value per factor of ys), so we safe divide, replace with NaN
    # and remove NaNs using NaNmean
    score_dict["discrete_mig"] = np.nanmean(
        np.divide(sorted_m[0, :] - sorted_m[1, :],
                  entropy[:],
                  out=np.zeros_like(entropy[:]).fill(np.nan),
                  where=entropy[:] != 0))
    return score_dict
Esempio n. 6
0
 def test_discrete_entropy(self):
     target = np.array([[1, 1, 2, 2, 3, 3], [3, 3, 2, 2, 1, 1]])
     result = utils.discrete_entropy(target)
     shouldbe = np.log(3)
     np.testing.assert_allclose(result, [shouldbe, shouldbe])