Beispiel #1
0
def compute_dci(ground_truth_data,
                representation_function,
                random_state,
                num_train=gin.REQUIRED,
                num_test=gin.REQUIRED,
                batch_size=16):
    """Computes the DCI scores according to Sec 2.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    num_train: Number of points used for training.
    num_test: Number of points used for testing.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with average disentanglement score, completeness and
      informativeness (train and test).
  """
    logging.info("Generating training set.")
    # mus_train are of shape [num_codes, num_train], while ys_train are of shape
    # [num_factors, num_train].
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_train, random_state,
        batch_size)
    assert mus_train.shape[1] == num_train
    assert ys_train.shape[1] == num_train
    mus_test, ys_test = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_test, random_state,
        batch_size)
    scores = _compute_dci(mus_train, ys_train, mus_test, ys_test)
    return scores
Beispiel #2
0
def compute_sap(ground_truth_data,
                representation_function,
                random_state,
                artifact_dir=None,
                num_train=gin.REQUIRED,
                num_test=gin.REQUIRED,
                batch_size=16,
                continuous_factors=gin.REQUIRED):
    """Computes the SAP score.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    num_test: Number of points used for testing discrete variables.
    batch_size: Batch size for sampling.
    continuous_factors: Factors are continuous variable (True) or not (False).

  Returns:
    Dictionary with SAP score.
  """
    del artifact_dir
    logging.info("Generating training set.")
    mus, ys = utils.generate_batch_factor_code(ground_truth_data,
                                               representation_function,
                                               num_train, random_state,
                                               batch_size)
    mus_test, ys_test = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_test, random_state,
        batch_size)
    logging.info("Computing score matrix.")
    return _compute_sap(mus, ys, mus_test, ys_test, continuous_factors)
Beispiel #3
0
def compute_downstream_task(ground_truth_data,
                            representation_function,
                            random_state,
                            artifact_dir=None,
                            num_train=gin.REQUIRED,
                            num_test=gin.REQUIRED,
                            batch_size=16):
    """Computes loss of downstream task.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    num_test: Number of points used for testing.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with scores.
  """
    del artifact_dir
    scores = {}
    for train_size in num_train:
        mus_train, ys_train = utils.generate_batch_factor_code(
            ground_truth_data, representation_function, train_size,
            random_state, batch_size)
        mus_test, ys_test = utils.generate_batch_factor_code(
            ground_truth_data, representation_function, num_test, random_state,
            batch_size)
        predictor_model = utils.make_predictor_fn()

        print(mus_train.shape, ys_train.shape)
        train_err, test_err = _compute_loss(np.transpose(mus_train), ys_train,
                                            np.transpose(mus_test), ys_test,
                                            predictor_model)
        size_string = str(train_size)
        scores[size_string + ":mean_train_accuracy"] = np.mean(train_err)
        scores[size_string + ":mean_test_accuracy"] = np.mean(test_err)
        scores[size_string + ":min_train_accuracy"] = np.min(train_err)
        scores[size_string + ":min_test_accuracy"] = np.min(test_err)
        for i in range(len(train_err)):
            scores[size_string +
                   ":train_accuracy_factor_{}".format(i)] = train_err[i]
            scores[size_string +
                   ":test_accuracy_factor_{}".format(i)] = test_err[i]

    return scores
Beispiel #4
0
def compute_modularity_explicitness(ground_truth_data,
                                    representation_function,
                                    random_state,
                                    artifact_dir=None,
                                    num_train=gin.REQUIRED,
                                    num_test=gin.REQUIRED,
                                    batch_size=16):
    """Computes the modularity metric according to Sec 3.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    num_test: Number of points used for testing.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with average modularity score and average explicitness
      (train and test).
  """
    del artifact_dir
    scores = {}
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_train, random_state,
        batch_size)
    mus_test, ys_test = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_test, random_state,
        batch_size)
    discretized_mus = utils.make_discretizer(mus_train)
    mutual_information = utils.discrete_mutual_info(discretized_mus, ys_train)
    # Mutual information should have shape [num_codes, num_factors].
    assert mutual_information.shape[0] == mus_train.shape[0]
    assert mutual_information.shape[1] == ys_train.shape[0]
    scores["modularity_score"] = modularity(mutual_information)
    explicitness_score_train = np.zeros([ys_train.shape[0], 1])
    explicitness_score_test = np.zeros([ys_test.shape[0], 1])
    mus_train_norm, mean_mus, stddev_mus = utils.normalize_data(mus_train)
    mus_test_norm, _, _ = utils.normalize_data(mus_test, mean_mus, stddev_mus)
    for i in range(ys_train.shape[0]):
        explicitness_score_train[i], explicitness_score_test[i] = \
            explicitness_per_factor(mus_train_norm, ys_train[i, :],
                                    mus_test_norm, ys_test[i, :])
    scores["explicitness_score_train"] = np.mean(explicitness_score_train)
    scores["explicitness_score_test"] = np.mean(explicitness_score_test)
    return scores
def compute_mcc(ground_truth_data,
                representation_function,
                random_state,
                artifact_dir=None,
                num_train=gin.REQUIRED,
                correlation_fn=gin.REQUIRED,
                batch_size=16):
  """Computes the mean correlation coefficient.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dict with mcc stats
  """
  del artifact_dir
  logging.info("Generating training set.")
  mus_train, ys_train = utils.generate_batch_factor_code(
      ground_truth_data, representation_function, num_train,
      random_state, batch_size)
  assert mus_train.shape[1] == num_train
  return _compute_mcc(mus_train, ys_train, correlation_fn, random_state)
Beispiel #6
0
def compute_custom_metric(ground_truth_data,
                          representation_function,
                          random_state,
                          num_train=gin.REQUIRED,
                          batch_size=16):
    """Example of a custom (dummy) metric.

  Preimplemented metrics can be found in disentanglement_lib.evaluation.metrics.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dict with disentanglement score.
  """
    score_dict = {}

    # This is how to obtain the representations of num_train points along with the
    # ground-truth factors of variation.
    representation, factors_of_variations = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_train, random_state,
        batch_size)
    # We could now compute a metric based on representation and
    # factors_of_variations. However, for the sake of brevity, we just return 1.
    del representation, factors_of_variations
    score_dict["custom_metric"] = 1.
    return score_dict
def compute_mig(ground_truth_data,
                Model,
                random_state,
                num_train,
                batch_size=16):

    score_dict = {}
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, Model, num_train, random_state, batch_size)
    #    assert mus_train.shape[1] == num_train
    mig_score = []
    for binsize in range(2, 42, 4):
        discretized_mus = _histogram_discretize(mus_train, num_bins=binsize)
        m = utils.discrete_mutual_info(discretized_mus, ys_train)
        assert m.shape[0] == mus_train.shape[0]
        assert m.shape[1] == ys_train.shape[0]
        # m is [num_latents, num_factors]

        entropy = utils.discrete_entropy(ys_train)
        sorted_m = np.sort(m, axis=0)[::-1]
        a = sorted_m[0, :] - sorted_m[1, :]
        a = np.delete(a, 0, 0)
        entropy = np.delete(entropy, 0, 0)
        mig = np.mean(np.divide(a, entropy))
    mig_score.append(mig)
    mig = max(mig_score)
    return mig
Beispiel #8
0
def compute_sap(ground_truth_data,
                representation_function,
                random_state,
                num_train=gin.REQUIRED,
                num_test=gin.REQUIRED,
                batch_size=16,
                continuous_factors=gin.REQUIRED):
    """Computes the SAP score.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    num_train: Number of points used for training.
    num_test: Number of points used for testing discrete variables.
    batch_size: Batch size for sampling.
    continuous_factors: Factors are continuous variable (True) or not (False).

  Returns:
    Dictionary with SAP score.
  """
    logging.info("Generating training set.")
    mus, ys = utils.generate_batch_factor_code(ground_truth_data,
                                               representation_function,
                                               num_train, random_state,
                                               batch_size)
    mus_test, ys_test = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_test, random_state,
        batch_size)
    logging.info("Computing score matrix.")
    score_matrix = _compute_score_matrix(mus, ys, mus_test, ys_test,
                                         continuous_factors)
    # Score matrix should have shape [num_latents, num_factors].
    assert score_matrix.shape[0] == mus.shape[0]
    assert score_matrix.shape[1] == ys.shape[0]
    scores_dict = {}
    scores_dict["SAP_score"] = _compute_avg_diff_top_two(score_matrix)
    logging.info("SAP score: %.2g", scores_dict["SAP_score"])

    return scores_dict
def compute_unified_scores(ground_truth_data,
                           representation_function,
                           random_state,
                           artifact_dir=None,
                           num_train=gin.REQUIRED,
                           num_test=gin.REQUIRED,
                           matrix_fns=gin.REQUIRED,
                           batch_size=16):
    """Computes the unified disentanglement scores.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    num_test: Number of points used for testing.
    matrix_fns: List of functions to relate factors of variations and codes.
    batch_size: Batch size for sampling.

  Returns:
    Unified scores.
  """
    logging.info("Generating training set.")
    # mus_train are of shape [num_codes, num_train], while ys_train are of shape
    # [num_factors, num_train].
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_train, random_state,
        batch_size)
    assert mus_train.shape[1] == num_train
    assert ys_train.shape[1] == num_train
    mus_test, ys_test = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_test, random_state,
        batch_size)

    return unified_scores(mus_train, ys_train, mus_test, ys_test, matrix_fns,
                          artifact_dir, ground_truth_data.factor_names)
Beispiel #10
0
 def test_generate_batch_factor_code(self):
     ground_truth_data = dummy_data.IdentityObservationsData()
     representation_function = lambda x: np.array(x, dtype=np.float64)
     num_points = 100
     random_state = np.random.RandomState(3)
     batch_size = 192
     represents, factors = utils.generate_batch_factor_code(
         ground_truth_data, representation_function, num_points,
         random_state, batch_size)
     # representation is identity
     for batch in [represents, factors]:
         np.testing.assert_equal(batch.shape, [10, num_points])
         for inx in range(10):
             self.assertEqual(np.min(batch[inx, :]), 0)
             self.assertEqual(np.max(batch[inx, :]), 10 - 1)
def unsupervised_metrics(ground_truth_data,
                         representation_function,
                         random_state,
                         artifact_dir=None,
                         num_train=gin.REQUIRED,
                         batch_size=16):
    """Computes unsupervised scores based on covariance and mutual information.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with scores.
  """
    del artifact_dir
    scores = {}
    logging.info("Generating training set.")
    mus_train, _ = utils.generate_batch_factor_code(ground_truth_data,
                                                    representation_function,
                                                    num_train, random_state,
                                                    batch_size)
    num_codes = mus_train.shape[0]
    cov_mus = np.cov(mus_train)
    assert num_codes == cov_mus.shape[0]

    # Gaussian total correlation.
    scores["gaussian_total_correlation"] = gaussian_total_correlation(cov_mus)

    # Gaussian Wasserstein correlation.
    scores[
        "gaussian_wasserstein_correlation"] = gaussian_wasserstein_correlation(
            cov_mus)
    scores["gaussian_wasserstein_correlation_norm"] = (
        scores["gaussian_wasserstein_correlation"] / np.sum(np.diag(cov_mus)))

    # Compute average mutual information between different factors.
    mus_discrete = utils.make_discretizer(mus_train)
    mutual_info_matrix = utils.discrete_mutual_info(mus_discrete, mus_discrete)
    np.fill_diagonal(mutual_info_matrix, 0)
    mutual_info_score = np.sum(mutual_info_matrix) / (num_codes**2 - num_codes)
    scores["mutual_info_score"] = mutual_info_score
    return scores
def compute_irs(ground_truth_data,
                representation_function,
                random_state,
                artifact_dir=None,
                diff_quantile=0.99,
                num_train=gin.REQUIRED,
                batch_size=gin.REQUIRED):
    """Computes the Interventional Robustness Score.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    diff_quantile: Float value between 0 and 1 to decide what quantile of diffs
      to select (use 1.0 for the version in the paper).
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dict with IRS and number of active dimensions.
  """
    del artifact_dir
    logging.info("Generating training set.")
    mus, ys = utils.generate_batch_factor_code(ground_truth_data,
                                               representation_function,
                                               num_train, random_state,
                                               batch_size)
    assert mus.shape[1] == num_train

    ys_discrete = utils.make_discretizer(ys)
    active_mus = _drop_constant_dims(mus)

    if not active_mus.any():
        irs_score = 0.0
    else:
        irs_score = scalable_disentanglement_score(ys_discrete.T, active_mus.T,
                                                   diff_quantile)["avg_score"]

    score_dict = {}
    score_dict["IRS"] = irs_score
    score_dict["num_active_dims"] = np.sum(active_mus)
    return score_dict
Beispiel #13
0
def compute_mig(ground_truth_data,
                representation_function,
                random_state,
                num_train=gin.REQUIRED,
                batch_size=16):
    """Computes the mutual information gap.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dict with average mutual information gap.
  """
    logging.info("Generating training set.")
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_train, random_state,
        batch_size)
    assert mus_train.shape[1] == num_train
    return _compute_mig(mus_train, ys_train)
Beispiel #14
0
def compute_fairness(ground_truth_data,
                     representation_function,
                     random_state,
                     artifact_dir=None,
                     num_train=gin.REQUIRED,
                     num_test_points_per_class=gin.REQUIRED,
                     batch_size=16):
    """Computes unfairness scores.

  We first compute either the mean or maximum total variation for a given
  sensitive and target variable. Then, we either average or take the maximum
  with respect to target and sensitive variable. For convenience, we compute and
  save all combinations. The score used in Section 4 of the paper is here called
  mean_fairness:mean_pred:mean_sens.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    num_test_points_per_class: Number of points used for testing.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with scores.
  """
    del artifact_dir
    factor_counts = ground_truth_data.factors_num_values
    num_factors = len(factor_counts)

    scores = {}
    # Training a predictive model.
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_train, random_state,
        batch_size)
    predictor_model_fn = utils.make_predictor_fn()

    # For each factor train a single predictive model.
    mean_fairness = np.zeros((num_factors, num_factors), dtype=np.float64)
    max_fairness = np.zeros((num_factors, num_factors), dtype=np.float64)
    for i in range(num_factors):
        model = predictor_model_fn()
        model.fit(np.transpose(mus_train), ys_train[i, :])

        for j in range(num_factors):
            if i == j:
                continue
            # Sample a random set of factors once.
            original_factors = ground_truth_data.sample_factors(
                num_test_points_per_class, random_state)
            counts = np.zeros((factor_counts[i], factor_counts[j]),
                              dtype=np.int64)
            for c in range(factor_counts[j]):
                # Intervene on the sensitive attribute.
                intervened_factors = np.copy(original_factors)
                intervened_factors[:, j] = c
                # Obtain the batched observations.
                observations = ground_truth_data.sample_observations_from_factors(
                    intervened_factors, random_state)
                representations = utils.obtain_representation(
                    observations, representation_function, batch_size)
                # Get the predictions.
                predictions = model.predict(np.transpose(representations))
                # Update the counts.
                counts[:, c] = np.bincount(predictions,
                                           minlength=factor_counts[i])
            mean_fairness[i, j], max_fairness[i,
                                              j] = inter_group_fairness(counts)

    # Report the scores.
    scores.update(compute_scores_dict(mean_fairness, "mean_fairness"))
    scores.update(compute_scores_dict(max_fairness, "max_fairness"))
    return scores
Beispiel #15
0
def compute_reduced_downstream_task(ground_truth_data,
                                    representation_function,
                                    random_state,
                                    artifact_dir=None,
                                    num_factors_to_remove=gin.REQUIRED,
                                    num_train=gin.REQUIRED,
                                    num_test=gin.REQUIRED,
                                    batch_size=16):
  """Computes loss of a reduced downstream task.

  Measure the information leakage in each latent component after removing the
  k ("factors_to_remove") most informative features for the prediction task.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_factors_to_remove: Number of factors to remove from the latent
      representation.
    num_train: Number of points used for training.
    num_test: Number of points used for testing.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with scores.
  """
  del artifact_dir
  scores = {}
  # Loop on different sizes of the training 'batch', as specified with gin.
  for train_size in num_train:
    size_string = str(train_size)
    mus_train, ys_train = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, train_size, random_state,
        batch_size)
    mus_test, ys_test = utils.generate_batch_factor_code(
        ground_truth_data, representation_function, num_test, random_state,
        batch_size)
    # Create variables for aggregated scores.
    reduced_factor_train_scores = []
    other_factors_train_scores = []
    reduced_factor_test_scores = []
    other_factors_test_scores = []
    # Compute the reduced representation and test it for each factor of
    # variation.
    for factor_of_interest in range(ground_truth_data.num_factors):
      # Copy the training data and eliminate the k most informative factors.
      reduced_mus_train = mus_train.copy()
      reduced_mus_test = mus_test.copy()
      for _ in range(num_factors_to_remove):
        reduced_mus_train, reduced_mus_test =\
          compute_reduced_representation(reduced_mus_train, ys_train,
                                         reduced_mus_test, ys_test,
                                         factor_of_interest)
      predictor_model = utils.make_predictor_fn()
      train_acc, test_acc = compute_predictive_accuracy(
          np.transpose(reduced_mus_train), ys_train,
          np.transpose(reduced_mus_test), ys_test, predictor_model)
      # Save scores for reduced factor.
      scores[size_string +
             ":reduced_factor_{}:mean_train_accuracy_reduced_factor".format(
                 factor_of_interest)] = train_acc[factor_of_interest]
      scores[size_string +
             ":reduced_factor_{}:mean_test_accuracy_reduced_factor".format(
                 factor_of_interest)] = test_acc[factor_of_interest]
      reduced_factor_train_scores.append(train_acc[factor_of_interest])
      reduced_factor_test_scores.append(test_acc[factor_of_interest])

      # Save the scores (accuracies) in the score dictionary.
      local_other_factors_train_scores = []
      local_other_factors_test_scores = []
      for i in range(len(train_acc)):
        scores[size_string +
               ":reduced_factor_{}:mean_train_accuracy_factor_{}".format(
                   factor_of_interest, i)] = train_acc[i]
        scores[size_string +
               ":reduced_factor_{}:mean_test_accuracy_factor_{}".format(
                   factor_of_interest, i)] = test_acc[i]
        if i != factor_of_interest:
          local_other_factors_train_scores.append(train_acc[i])
          local_other_factors_test_scores.append(test_acc[i])
      # Save mean score for non-reduced factors.
      scores[size_string +
             ":reduced_factor_{}:mean_train_accuracy_non_reduced_factor".format(
                 factor_of_interest)] = np.mean(
                     local_other_factors_train_scores)
      scores[size_string +
             ":reduced_factor_{}:mean_test_accuracy_non_reduced_factor".format(
                 factor_of_interest)] = np.mean(local_other_factors_test_scores)
      other_factors_train_scores.append(
          np.mean(local_other_factors_train_scores))
      other_factors_test_scores.append(np.mean(local_other_factors_test_scores))

    # Compute the aggregate scores.
    scores[size_string + ":mean_train_accuracy_reduced_factor"] = np.mean(
        reduced_factor_train_scores)
    scores[size_string + ":mean_test_accuracy_reduced_factor"] = np.mean(
        reduced_factor_test_scores)
    scores[size_string + ":mean_train_accuracy_other_factors"] = np.mean(
        other_factors_train_scores)
    scores[size_string + ":mean_test_accuracy_other_factors"] = np.mean(
        other_factors_test_scores)
  return scores