コード例 #1
0
 def setUp(self):
     super(FairnessMetricsTest, self).setUp()
     self.num_thresholds = 5
     self.label_column_name = 'income'
     self.protected_groups = ['sex', 'race']
     self.subgroups = [0, 1, 2, 3]
     self.model_dir = tempfile.mkdtemp()
     self.print_dir = tempfile.mkdtemp()
     self.primary_hidden_units = [16, 4]
     self.batch_size = 8
     self.train_steps = 10
     self.test_steps = 5
     self.pretrain_steps = 5
     self.dataset_base_dir = os.path.join(os.path.dirname(__file__), 'data/toy_data')  # pylint: disable=line-too-long
     self.train_file = [os.path.join(os.path.dirname(__file__), 'data/toy_data/train.csv')]  # pylint: disable=line-too-long
     self.test_file = [os.path.join(os.path.dirname(__file__), 'data/toy_data/test.csv')]  # pylint: disable=line-too-long
     self.load_dataset = UCIAdultInput(
         dataset_base_dir=self.dataset_base_dir,
         train_file=self.train_file,
         test_file=self.test_file)
     self.fairness_metrics = RobustFairnessMetrics(
         label_column_name=self.label_column_name,
         protected_groups=self.protected_groups,
         subgroups=self.subgroups)
     self.eval_metric_keys = [
         'accuracy', 'recall', 'precision', 'tp', 'tn', 'fp', 'fn', 'fpr',
         'fnr'
     ]
コード例 #2
0
 def setUp(self):
     super(BaselineModelTest, self).setUp()
     self.model_dir = tempfile.mkdtemp()
     self.hidden_units = [16, 4]
     self.batch_size = 8
     self.train_steps = 20
     self.test_steps = 5
     self.dataset_base_dir = os.path.join(os.path.dirname(__file__),
                                          'data/toy_data')
     self.train_file = [
         os.path.join(os.path.dirname(__file__), 'data/toy_data/train.csv')
     ]
     self.test_file = [
         os.path.join(os.path.dirname(__file__), 'data/toy_data/test.csv')
     ]
     self.load_dataset = UCIAdultInput(
         dataset_base_dir=self.dataset_base_dir,
         train_file=self.train_file,
         test_file=self.test_file)
     self.label_column_name = 'income'
     self.protected_groups = ['sex', 'race']
     self.subgroups = [0, 1, 2, 3]
     self.fairness_metrics = RobustFairnessMetrics(
         label_column_name=self.label_column_name,
         protected_groups=self.protected_groups,
         subgroups=self.subgroups)
 def setUp(self):
   super(AdversarialReweightingModelTest, self).setUp()
   self.model_dir = tempfile.mkdtemp()
   self.primary_hidden_units = [16, 4]
   self.batch_size = 8
   self.train_steps = 20
   self.test_steps = 5
   self.pretrain_steps = 5
   self.dataset_base_dir = os.path.join(os.path.dirname(__file__), 'data/toy_data')  # pylint: disable=line-too-long
   self.train_file = [os.path.join(os.path.dirname(__file__), 'data/toy_data/train.csv')]  # pylint: disable=line-too-long
   self.test_file = [os.path.join(os.path.dirname(__file__), 'data/toy_data/test.csv')]  # pylint: disable=line-too-long
   self.load_dataset = UCIAdultInput(
       dataset_base_dir=self.dataset_base_dir,
       train_file=self.train_file,
       test_file=self.test_file)
   self.target_column_name = 'income'
コード例 #4
0
def run_model():
    """Instantiate and run model.

  Raises:
    ValueError: if model_name is not implemented.
    ValueError: if dataset is not implemented.
  """
    if FLAGS.model_name not in MODEL_KEYS:
        raise ValueError("Model {} is not implemented.".format(
            FLAGS.model_name))
    else:
        model_dir, model_name, print_dir = _initialize_model_dir()

    tf.logging.info(
        "Creating experiment, storing model files in {}".format(model_dir))

    # Instantiates dataset and gets input_fn
    if FLAGS.dataset == "law_school":
        load_dataset = LawSchoolInput(dataset_base_dir=FLAGS.dataset_base_dir,
                                      train_file=FLAGS.train_file,
                                      test_file=FLAGS.test_file)
    elif FLAGS.dataset == "compas":
        load_dataset = CompasInput(dataset_base_dir=FLAGS.dataset_base_dir,
                                   train_file=FLAGS.train_file,
                                   test_file=FLAGS.test_file)
    elif FLAGS.dataset == "uci_adult":
        load_dataset = UCIAdultInput(dataset_base_dir=FLAGS.dataset_base_dir,
                                     train_file=FLAGS.train_file,
                                     test_file=FLAGS.test_file)
    else:
        raise ValueError("Input_fn for {} dataset is not implemented.".format(
            FLAGS.dataset))

    train_input_fn = load_dataset.get_input_fn(
        mode=tf.estimator.ModeKeys.TRAIN, batch_size=FLAGS.batch_size)
    test_input_fn = load_dataset.get_input_fn(mode=tf.estimator.ModeKeys.EVAL,
                                              batch_size=FLAGS.batch_size)

    feature_columns, _, protected_groups, label_column_name = load_dataset.get_feature_columns(
        embedding_dimension=FLAGS.embedding_dimension,
        include_sensitive_columns=FLAGS.include_sensitive_columns)

    # Constructs a int list enumerating the number of subgroups in the dataset.
    # # For example, if the dataset has two (binary) protected_groups. The dataset has 2^2 = 4 subgroups, which we enumerate as [0, 1, 2, 3].
    # # If the  dataset has two protected features ["race","sex"] that are cast as binary features race=["White"(0), "Black"(1)], and sex=["Male"(0), "Female"(1)].
    # # We call their catesian product ["White Male" (00), "White Female" (01), "Black Male"(10), "Black Female"(11)] as subgroups  which are enumerated as [0, 1, 2, 3].
    subgroups = np.arange(len(protected_groups))

    # Instantiates tf.estimator.Estimator object
    estimator = get_estimator(model_dir,
                              model_name,
                              feature_columns=feature_columns,
                              protected_groups=protected_groups,
                              label_column_name=label_column_name)

    # Adds additional fairness metrics
    fairness_metrics = RobustFairnessMetrics(
        label_column_name=label_column_name,
        protected_groups=protected_groups,
        subgroups=subgroups,
        print_dir=print_dir)
    eval_metrics_fn = fairness_metrics.create_fairness_metrics_fn()
    estimator = tf.estimator.add_metrics(estimator, eval_metrics_fn)

    # Creates training and evaluation specifications
    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                        max_steps=FLAGS.train_steps)
    eval_spec = tf.estimator.EvalSpec(input_fn=test_input_fn,
                                      steps=FLAGS.test_steps)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    tf.logging.info("Training completed.")

    eval_results = estimator.evaluate(input_fn=test_input_fn,
                                      steps=FLAGS.test_steps)

    eval_results_path = os.path.join(model_dir, FLAGS.output_file_name)
    write_to_output_file(eval_results, eval_results_path)