コード例 #1
0
    def compute_mi_matrix(mus_train,
                          ys_train,
                          need_discretized_1=False,
                          need_discretized_2=False):
        score_dict = {}
        if need_discretized_1:
            mus_train = utils.make_discretizer(mus_train)
        if need_discretized_2:
            ys_train = utils.make_discretizer(ys_train)
        m = utils.discrete_mutual_info(mus_train, ys_train)
        assert m.shape[0] == mus_train.shape[0]
        assert m.shape[1] == ys_train.shape[0]
        # m is [num_latents, num_factors]
        entropy = utils.discrete_entropy(ys_train)

        return m, entropy
コード例 #2
0
def _compute_mig(mus_train, ys_train):
  """Computes score based on both training and testing codes and factors."""
  score_dict = {}
  discretized_mus, bins = utils.make_discretizer(mus_train)
  m = utils.discrete_mutual_info(discretized_mus, ys_train)
  assert m.shape[0] == mus_train.shape[0]
  assert m.shape[1] == ys_train.shape[0]
  # m is [num_latents, num_factors]
  entropy = utils.discrete_entropy(ys_train)
  sorted_m = np.sort(m, axis=0)[::-1]
  score_dict["discrete_mig"] = np.mean(
      np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:]))
  return score_dict
コード例 #3
0
def compute_modularity_explicitness(ground_truth_data,
                                    representation_function,
                                    random_state,
                                    artifact_dir=None,
                                    num_train=gin.REQUIRED,
                                    num_test=gin.REQUIRED,
                                    batch_size=16):
    """Computes the modularity metric according to Sec 3.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    num_test: Number of points used for testing.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with average modularity score and average explicitness
      (train and test).
  """
    del artifact_dir
    scores = {}
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_train, random_state,
        batch_size)
    mus_test, ys_test = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_test, random_state,
        batch_size)
    discretized_mus = utils.make_discretizer(mus_train)
    mutual_information = utils.discrete_mutual_info(discretized_mus, ys_train)
    # Mutual information should have shape [num_codes, num_factors].
    assert mutual_information.shape[0] == mus_train.shape[0]
    assert mutual_information.shape[1] == ys_train.shape[0]
    scores["modularity_score"] = modularity(mutual_information)
    explicitness_score_train = np.zeros([ys_train.shape[0], 1])
    explicitness_score_test = np.zeros([ys_test.shape[0], 1])
    mus_train_norm, mean_mus, stddev_mus = utils.normalize_data(mus_train)
    mus_test_norm, _, _ = utils.normalize_data(mus_test, mean_mus, stddev_mus)
    for i in range(ys_train.shape[0]):
        explicitness_score_train[i], explicitness_score_test[i] = \
            explicitness_per_factor(mus_train_norm, ys_train[i, :],
                                    mus_test_norm, ys_test[i, :])
    scores["explicitness_score_train"] = np.mean(explicitness_score_train)
    scores["explicitness_score_test"] = np.mean(explicitness_score_test)
    return scores
コード例 #4
0
def unsupervised_metrics(ground_truth_data,
                         representation_function,
                         random_state,
                         artifact_dir=None,
                         num_train=gin.REQUIRED,
                         batch_size=16):
    """Computes unsupervised scores based on covariance and mutual information.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with scores.
  """
    del artifact_dir
    scores = {}
    logging.info("Generating training set.")
    mus_train, _ = utils.generate_batch_factor_code(ground_truth_data,
                                                    representation_function,
                                                    num_train, random_state,
                                                    batch_size)
    num_codes = mus_train.shape[0]
    cov_mus = np.cov(mus_train)
    assert num_codes == cov_mus.shape[0]

    # Gaussian total correlation.
    scores["gaussian_total_correlation"] = gaussian_total_correlation(cov_mus)

    # Gaussian Wasserstein correlation.
    scores[
        "gaussian_wasserstein_correlation"] = gaussian_wasserstein_correlation(
            cov_mus)
    scores["gaussian_wasserstein_correlation_norm"] = (
        scores["gaussian_wasserstein_correlation"] / np.sum(np.diag(cov_mus)))

    # Compute average mutual information between different factors.
    mus_discrete = utils.make_discretizer(mus_train)
    mutual_info_matrix = utils.discrete_mutual_info(mus_discrete, mus_discrete)
    np.fill_diagonal(mutual_info_matrix, 0)
    mutual_info_score = np.sum(mutual_info_matrix) / (num_codes**2 - num_codes)
    scores["mutual_info_score"] = mutual_info_score
    return scores
コード例 #5
0
def compute_local_modularity(ground_truth_data,
                             representation_function,
                             random_state,
                             artifact_dir=None,
                             num_train=gin.REQUIRED,
                             num_local_clusters=gin.REQUIRED,
                             batch_size=16):
    """Computes the modularity metric according to Sec 3.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    num_local_clusters: how many times to run the local mig calculation.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with average modularity score and average explicitness
      (train and test).
  """
    del artifact_dir
    mod_results = []
    for modrun in range(num_local_clusters):
        #print("Generating training set %d." % modrun)
        mus_train, ys_train = utils.generate_local_batch_factor_code(
            ground_truth_data, representation_function, num_train,
            random_state, batch_size)
        discretized_mus = utils.make_discretizer(mus_train)
        #print(mus_train.shape, ys_train.shape)
        mutual_information = utils.discrete_mutual_info(
            discretized_mus, ys_train)
        # Mutual information should have shape [num_codes, num_factors].
        assert mutual_information.shape[0] == mus_train.shape[0]
        assert mutual_information.shape[1] == ys_train.shape[0]
        mod_results.append(
            modularity_explicitness.modularity(mutual_information))
    mod_results = np.array(mod_results)
    scores = {}
    scores["modularity_score"] = np.mean(mod_results)
    scores["local_modularity_scores_samples"] = mod_results.tolist()
    return scores
コード例 #6
0
def compute_irs(ground_truth_data,
                representation_function,
                random_state,
                artifact_dir=None,
                diff_quantile=0.99,
                num_train=gin.REQUIRED,
                batch_size=gin.REQUIRED):
    """Computes the Interventional Robustness Score.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    diff_quantile: Float value between 0 and 1 to decide what quantile of diffs
      to select (use 1.0 for the version in the paper).
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dict with IRS and number of active dimensions.
  """
    del artifact_dir
    logging.info("Generating training set.")
    mus, ys = utils.generate_batch_factor_code(ground_truth_data,
                                               representation_function,
                                               num_train, random_state,
                                               batch_size)
    assert mus.shape[1] == num_train

    ys_discrete = utils.make_discretizer(ys)
    active_mus = _drop_constant_dims(mus)

    if not active_mus.any():
        irs_score = 0.0
    else:
        irs_score = scalable_disentanglement_score(ys_discrete.T, active_mus.T,
                                                   diff_quantile)["avg_score"]

    score_dict = {}
    score_dict["IRS"] = irs_score
    score_dict["num_active_dims"] = np.sum(active_mus)
    return score_dict
コード例 #7
0
def mutual_information_matrix(mus_train, ys_train, mus_test, ys_test):
  """Computes the mutual information matrix between codes and factors.

  The mutual information matrix is used to compute the MIG and Modularity
  scores.

  Args:
    mus_train: Batch of learned representations to be used for training.
    ys_train: Observed factors of variation corresponding to the representations
      in mus_train.
    mus_test: Unused.
    ys_test: Unused.

  Returns:
    Mutual information matrix as computed for the MIG and Modularity scores.
  """
  del mus_test, ys_test
  discretized_mus = utils.make_discretizer(mus_train)
  m = utils.discrete_mutual_info(discretized_mus, ys_train)
  return m
コード例 #8
0
def _compute_mig(mus_train, ys_train):
    """Computes score based on both training and testing codes and factors."""
    score_dict = {}
    discretized_mus = utils.make_discretizer(mus_train)
    m = utils.discrete_mutual_info(discretized_mus, ys_train)
    assert m.shape[0] == mus_train.shape[0]
    assert m.shape[1] == ys_train.shape[0]
    # m is [num_latents, num_factors]
    entropy = utils.discrete_entropy(ys_train)
    sorted_m = np.sort(m, axis=0)[::-1]
    # for local sampling you won't sample along some dimensions,
    # so you will get some 0 entropies for the ys entropy (storing one
    # value per factor of ys), so we safe divide, replace with NaN
    # and remove NaNs using NaNmean
    score_dict["discrete_mig"] = np.nanmean(
        np.divide(sorted_m[0, :] - sorted_m[1, :],
                  entropy[:],
                  out=np.zeros_like(entropy[:]).fill(np.nan),
                  where=entropy[:] != 0))
    return score_dict