Example #1
0
 def testAggregateH5FormatSaveLoad(self):
     model_config = configs.AggregateFunctionConfig(
         feature_configs=feature_configs,
         regularizer_configs=[
             configs.RegularizerConfig('calib_hessian', l2=1e-4),
             configs.RegularizerConfig('torsion', l2=1e-3),
         ],
         middle_calibration=True,
         middle_monotonicity='increasing',
         output_min=0.0,
         output_max=1.0,
         output_calibration=True,
         output_calibration_num_keypoints=8,
         output_initialization=[0.0, 1.0])
     model = premade.AggregateFunction(model_config)
     # Compile and fit model.
     model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(0.1))
     model.fit(fake_data['train_xs'], fake_data['train_ys'])
     # Save model using H5 format.
     with tempfile.NamedTemporaryFile(suffix='.h5') as f:
         # Note: because of naming clashes in the optimizer, we cannot include it
         # when saving in HDF5. The keras team has informed us that we should not
         # push to support this since SavedModel format is the new default and no
         # new HDF5 functionality is desired.
         tf.keras.models.save_model(model, f.name, include_optimizer=False)
         loaded_model = tf.keras.models.load_model(
             f.name, custom_objects=premade.get_custom_objects())
         self.assertAllClose(model.predict(fake_data['eval_xs']),
                             loaded_model.predict(fake_data['eval_xs']))
Example #2
0
 def testCalibratedLatticeRegressor(self, feature_names, output_calibration,
                                    average_loss):
   self._ResetAllBackends()
   feature_columns = [
       feature_column for feature_column in self.boston_feature_columns
       if feature_column.name in feature_names
   ]
   feature_configs = [
       feature_config for feature_config in self.boston_feature_configs
       if feature_config.name in feature_names
   ]
   model_config = configs.CalibratedLinearConfig(
       regularizer_configs=[
           configs.RegularizerConfig(name='torsion', l2=1e-4),
           configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
       ],
       output_calibration=output_calibration,
       feature_configs=feature_configs)
   estimator = estimators.CannedRegressor(
       feature_columns=feature_columns,
       model_config=model_config,
       feature_analysis_input_fn=self._GetBostonTrainInputFn(num_epochs=1),
       optimizer=tf.keras.optimizers.Adam(0.01))
   estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))
   results = estimator.evaluate(input_fn=self._GetBostonTestInputFn())
   logging.info('Calibrated lattice regressor results:')
   logging.info(results)
   self.assertLess(results['average_loss'], average_loss)
Example #3
0
 def testLatticeEnsembleFromConfig(self):
     model_config = configs.CalibratedLatticeEnsembleConfig(
         feature_configs=copy.deepcopy(feature_configs),
         lattices=[['numerical_1', 'categorical'],
                   ['numerical_2', 'categorical']],
         num_lattices=2,
         lattice_rank=2,
         separate_calibrators=True,
         regularizer_configs=[
             configs.RegularizerConfig('calib_hessian', l2=1e-3),
             configs.RegularizerConfig('torsion', l2=1e-4),
         ],
         output_min=-1.0,
         output_max=1.0,
         output_calibration=True,
         output_calibration_num_keypoints=5,
         output_initialization=[-1.0, 1.0])
     model = premade.CalibratedLatticeEnsemble(model_config)
     loaded_model = premade.CalibratedLatticeEnsemble.from_config(
         model.get_config(), custom_objects=premade.get_custom_objects())
     self.assertEqual(
         json.dumps(model.get_config(), sort_keys=True, cls=self.Encoder),
         json.dumps(loaded_model.get_config(),
                    sort_keys=True,
                    cls=self.Encoder))
Example #4
0
 def testLatticeEnsembleH5FormatSaveLoad(self):
     model_config = configs.CalibratedLatticeEnsembleConfig(
         feature_configs=copy.deepcopy(feature_configs),
         lattices=[['numerical_1', 'categorical'],
                   ['numerical_2', 'categorical']],
         num_lattices=2,
         lattice_rank=2,
         separate_calibrators=True,
         regularizer_configs=[
             configs.RegularizerConfig('calib_hessian', l2=1e-3),
             configs.RegularizerConfig('torsion', l2=1e-4),
         ],
         output_min=-1.0,
         output_max=1.0,
         output_calibration=True,
         output_calibration_num_keypoints=5,
         output_initialization=[-1.0, 1.0])
     model = premade.CalibratedLatticeEnsemble(model_config)
     # Compile and fit model.
     model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(0.1))
     model.fit(fake_data['train_xs'], fake_data['train_ys'])
     # Save model using H5 format.
     with tempfile.NamedTemporaryFile(suffix='.h5') as f:
         tf.keras.models.save_model(model, f.name)
         loaded_model = tf.keras.models.load_model(
             f.name, custom_objects=premade.get_custom_objects())
         self.assertAllClose(model.predict(fake_data['eval_xs']),
                             loaded_model.predict(fake_data['eval_xs']))
Example #5
0
 def testLatticeH5FormatSaveLoad(self, parameterization, num_terms):
   model_config = configs.CalibratedLatticeConfig(
       feature_configs=copy.deepcopy(feature_configs),
       parameterization=parameterization,
       num_terms=num_terms,
       regularizer_configs=[
           configs.RegularizerConfig('calib_wrinkle', l2=1e-3),
           configs.RegularizerConfig('torsion', l2=1e-3),
       ],
       output_min=0.0,
       output_max=1.0,
       output_calibration=True,
       output_calibration_num_keypoints=6,
       output_initialization=[0.0, 1.0])
   if parameterization == 'kronecker_factored':
     model_config.regularizer_configs = None
     for feature_config in model_config.feature_configs:
       feature_config.lattice_size = 2
       feature_config.unimodality = 'none'
       feature_config.reflects_trust_in = None
       feature_config.dominates = None
       feature_config.regularizer_configs = None
   model = premade.CalibratedLattice(model_config)
   # Compile and fit model.
   model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(0.1))
   model.fit(fake_data['train_xs'], fake_data['train_ys'])
   # Save model using H5 format.
   with tempfile.NamedTemporaryFile(suffix='.h5') as f:
     tf.keras.models.save_model(model, f.name)
     loaded_model = tf.keras.models.load_model(
         f.name, custom_objects=premade.get_custom_objects())
     self.assertAllClose(
         model.predict(fake_data['eval_xs']),
         loaded_model.predict(fake_data['eval_xs']))
Example #6
0
 def testCalibratedLatticeEnsembleCrystals(self):
   # Construct model.
   self._ResetAllBackends()
   model_config = configs.CalibratedLatticeEnsembleConfig(
       regularizer_configs=[
           configs.RegularizerConfig(name='torsion', l2=1e-4),
           configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
       ],
       feature_configs=self.heart_feature_configs,
       lattices='crystals',
       num_lattices=6,
       lattice_rank=5,
       separate_calibrators=True,
       output_calibration=False,
       output_min=self.heart_min_label,
       output_max=self.heart_max_label - self.numerical_error_epsilon,
       output_initialization=[self.heart_min_label, self.heart_max_label],
   )
   # Perform prefitting steps.
   prefitting_model_config = premade_lib.construct_prefitting_model_config(
       model_config)
   prefitting_model = premade.CalibratedLatticeEnsemble(
       prefitting_model_config)
   prefitting_model.compile(
       loss=tf.keras.losses.BinaryCrossentropy(),
       optimizer=tf.keras.optimizers.Adam(0.01))
   prefitting_model.fit(
       self.heart_train_x,
       self.heart_train_y,
       batch_size=100,
       epochs=50,
       verbose=False)
   premade_lib.set_crystals_lattice_ensemble(model_config,
                                             prefitting_model_config,
                                             prefitting_model)
   # Construct and train final model
   model = premade.CalibratedLatticeEnsemble(model_config)
   model.compile(
       loss=tf.keras.losses.BinaryCrossentropy(),
       metrics=tf.keras.metrics.AUC(),
       optimizer=tf.keras.optimizers.Adam(0.01))
   model.fit(
       self.heart_train_x,
       self.heart_train_y,
       batch_size=100,
       epochs=200,
       verbose=False)
   results = model.evaluate(
       self.heart_test_x, self.heart_test_y, verbose=False)
   logging.info('Calibrated lattice ensemble classifier results:')
   logging.info(results)
   self.assertGreater(results[1], 0.85)
Example #7
0
 def testCalibratedLatticeEnsembleRegressor(self, feature_names, lattices,
                                            num_lattices, lattice_rank,
                                            separate_calibrators,
                                            output_calibration,
                                            average_loss):
     self._ResetAllBackends()
     feature_columns = [
         feature_column for feature_column in self.boston_feature_columns
         if feature_column.name in feature_names
     ]
     feature_configs = [
         feature_config for feature_config in self.boston_feature_configs
         if feature_config.name in feature_names
     ]
     if lattices == 'rtl_layer':
         # RTL Layer only supports monotonicity and bound constraints.
         feature_configs = copy.deepcopy(feature_configs)
         for feature_config in feature_configs:
             feature_config.lattice_size = 2
             feature_config.unimodality = 'none'
             feature_config.reflects_trust_in = None
             feature_config.dominates = None
             feature_config.regularizer_configs = None
     model_config = configs.CalibratedLatticeEnsembleConfig(
         regularizer_configs=[
             configs.RegularizerConfig(name='torsion', l2=1e-5),
             configs.RegularizerConfig(name='output_calib_hessian',
                                       l2=1e-5),
         ],
         feature_configs=feature_configs,
         lattices=lattices,
         num_lattices=num_lattices,
         lattice_rank=lattice_rank,
         separate_calibrators=separate_calibrators,
         output_calibration=output_calibration,
     )
     estimator = estimators.CannedRegressor(
         feature_columns=feature_columns,
         model_config=model_config,
         feature_analysis_input_fn=self._GetBostonTrainInputFn(
             num_epochs=1),
         prefitting_input_fn=self._GetBostonTrainInputFn(num_epochs=50),
         optimizer=tf.keras.optimizers.Adam(0.05),
         prefitting_optimizer=tf.keras.optimizers.Adam(0.05))
     estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))
     results = estimator.evaluate(input_fn=self._GetBostonTestInputFn())
     logging.info('Calibrated lattice ensemble regressor results:')
     logging.info(results)
     self.assertLess(results['average_loss'], average_loss)
Example #8
0
 def testCalibratedLatticeEnsembleRTL(self, interpolation, parameterization,
                                      num_terms, expected_minimum_auc):
   # Construct model.
   self._ResetAllBackends()
   rtl_feature_configs = copy.deepcopy(self.heart_feature_configs)
   for feature_config in rtl_feature_configs:
     feature_config.lattice_size = 2
     feature_config.unimodality = 'none'
     feature_config.reflects_trust_in = None
     feature_config.dominates = None
     feature_config.regularizer_configs = None
   model_config = configs.CalibratedLatticeEnsembleConfig(
       regularizer_configs=[
           configs.RegularizerConfig(name='torsion', l2=1e-4),
           configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
       ],
       feature_configs=rtl_feature_configs,
       lattices='rtl_layer',
       num_lattices=6,
       lattice_rank=5,
       interpolation=interpolation,
       parameterization=parameterization,
       num_terms=num_terms,
       separate_calibrators=True,
       output_calibration=False,
       output_min=self.heart_min_label,
       output_max=self.heart_max_label - self.numerical_error_epsilon,
       output_initialization=[self.heart_min_label, self.heart_max_label],
   )
   # We must remove all regularization if using 'kronecker_factored'.
   if parameterization == 'kronecker_factored':
     model_config.regularizer_configs = None
   # Construct and train final model
   model = premade.CalibratedLatticeEnsemble(model_config)
   model.compile(
       loss=tf.keras.losses.BinaryCrossentropy(),
       metrics=tf.keras.metrics.AUC(),
       optimizer=tf.keras.optimizers.Adam(0.01))
   model.fit(
       self.heart_train_x,
       self.heart_train_y,
       batch_size=100,
       epochs=200,
       verbose=False)
   results = model.evaluate(
       self.heart_test_x, self.heart_test_y, verbose=False)
   logging.info('Calibrated lattice ensemble rtl classifier results:')
   logging.info(results)
   self.assertGreater(results[1], expected_minimum_auc)
Example #9
0
 def testCalibratedLinearClassifier(self, feature_names, output_calibration,
                                    use_bias, auc):
   self._ResetAllBackends()
   feature_columns = [
       feature_column for feature_column in self.heart_feature_columns
       if feature_column.name in feature_names
   ]
   feature_configs = [
       feature_config for feature_config in self.heart_feature_configs
       if feature_config.name in feature_names
   ]
   model_config = configs.CalibratedLinearConfig(
       use_bias=use_bias,
       regularizer_configs=[
           configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
       ],
       output_calibration=output_calibration,
       feature_configs=feature_configs)
   estimator = estimators.CannedClassifier(
       feature_columns=feature_columns,
       model_config=model_config,
       feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
       optimizer=tf.keras.optimizers.Adam(0.01))
   estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=200))
   results = estimator.evaluate(input_fn=self._GetHeartTestInputFn())
   logging.info('Calibrated linear classifier results:')
   logging.info(results)
   self.assertGreater(results['auc'], auc)
Example #10
0
 def testLatticeFromConfig(self):
   model_config = configs.CalibratedLatticeConfig(
       feature_configs=copy.deepcopy(feature_configs),
       regularizer_configs=[
           configs.RegularizerConfig('calib_wrinkle', l2=1e-3),
           configs.RegularizerConfig('torsion', l2=1e-3),
       ],
       output_min=0.0,
       output_max=1.0,
       output_calibration=True,
       output_calibration_num_keypoints=6,
       output_initialization=[0.0, 1.0])
   model = premade.CalibratedLattice(model_config)
   loaded_model = premade.CalibratedLattice.from_config(model.get_config())
   self.assertEqual(
       json.dumps(model.get_config(), sort_keys=True, cls=self.Encoder),
       json.dumps(loaded_model.get_config(), sort_keys=True, cls=self.Encoder))
Example #11
0
 def testCalibratedLattice(self, interpolation, parameterization, num_terms,
                           expected_minimum_auc):
   # Construct model configuration.
   self._ResetAllBackends()
   lattice_feature_configs = copy.deepcopy(self.heart_feature_configs[:5])
   model_config = configs.CalibratedLatticeConfig(
       feature_configs=lattice_feature_configs,
       interpolation=interpolation,
       parameterization=parameterization,
       num_terms=num_terms,
       regularizer_configs=[
           configs.RegularizerConfig(name='torsion', l2=1e-4),
           configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
       ],
       output_min=self.heart_min_label,
       output_max=self.heart_max_label,
       output_calibration=False,
       output_initialization=[self.heart_min_label, self.heart_max_label],
   )
   if parameterization == 'kronecker_factored':
     model_config.regularizer_configs = None
     for feature_config in model_config.feature_configs:
       feature_config.lattice_size = 2
       feature_config.unimodality = 'none'
       feature_config.reflects_trust_in = None
       feature_config.dominates = None
       feature_config.regularizer_configs = None
   # Construct and train final model
   model = premade.CalibratedLattice(model_config)
   model.compile(
       loss=tf.keras.losses.BinaryCrossentropy(),
       metrics=tf.keras.metrics.AUC(),
       optimizer=tf.keras.optimizers.Adam(0.01))
   model.fit(
       self.heart_train_x[:5],
       self.heart_train_y,
       batch_size=100,
       epochs=200,
       verbose=False)
   results = model.evaluate(
       self.heart_test_x[:5], self.heart_test_y, verbose=False)
   logging.info('Calibrated lattice classifier results:')
   logging.info(results)
   self.assertGreater(results[1], expected_minimum_auc)
Example #12
0
 def testAggregateFromConfig(self):
   model_config = configs.AggregateFunctionConfig(
       feature_configs=feature_configs,
       regularizer_configs=[
           configs.RegularizerConfig('calib_hessian', l2=1e-4),
           configs.RegularizerConfig('torsion', l2=1e-3),
       ],
       middle_calibration=True,
       middle_monotonicity='increasing',
       output_min=0.0,
       output_max=1.0,
       output_calibration=True,
       output_calibration_num_keypoints=8,
       output_initialization=[0.0, 1.0])
   model = premade.AggregateFunction(model_config)
   loaded_model = premade.AggregateFunction.from_config(model.get_config())
   self.assertEqual(
       json.dumps(model.get_config(), sort_keys=True, cls=self.Encoder),
       json.dumps(loaded_model.get_config(), sort_keys=True, cls=self.Encoder))
Example #13
0
 def testLinearFromConfig(self):
   model_config = configs.CalibratedLinearConfig(
       feature_configs=copy.deepcopy(feature_configs),
       regularizer_configs=[
           configs.RegularizerConfig('calib_hessian', l2=1e-4),
           configs.RegularizerConfig('torsion', l2=1e-3),
       ],
       use_bias=True,
       output_min=0.0,
       output_max=1.0,
       output_calibration=True,
       output_calibration_num_keypoints=6,
       output_initialization=[0.0, 1.0])
   model = premade.CalibratedLinear(model_config)
   loaded_model = premade.CalibratedLinear.from_config(
       model.get_config(), custom_objects=premade.get_custom_objects())
   self.assertEqual(
       json.dumps(model.get_config(), sort_keys=True, cls=self.Encoder),
       json.dumps(loaded_model.get_config(), sort_keys=True, cls=self.Encoder))
Example #14
0
 def testCalibratedLatticeEnsembleClassifier(self, feature_names, lattices,
                                             num_lattices, lattice_rank,
                                             separate_calibrators,
                                             output_calibration, auc):
     self._ResetAllBackends()
     feature_columns = [
         feature_column for feature_column in self.heart_feature_columns
         if feature_column.name in feature_names
     ]
     feature_configs = [
         feature_config for feature_config in self.heart_feature_configs
         if feature_config.name in feature_names
     ]
     model_config = configs.CalibratedLatticeEnsembleConfig(
         regularizer_configs=[
             configs.RegularizerConfig(name='torsion', l2=1e-4),
             configs.RegularizerConfig(name='output_calib_hessian',
                                       l2=1e-4),
         ],
         feature_configs=feature_configs,
         lattices=lattices,
         num_lattices=num_lattices,
         lattice_rank=lattice_rank,
         separate_calibrators=separate_calibrators,
         output_calibration=output_calibration,
     )
     estimator = estimators.CannedClassifier(
         feature_columns=feature_columns,
         model_config=model_config,
         feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
         prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=50),
         optimizer=tf.keras.optimizers.Adam(0.01),
         prefitting_optimizer=tf.keras.optimizers.Adam(0.01))
     estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=200))
     results = estimator.evaluate(input_fn=self._GetHeartTestInputFn())
     logging.info('Calibrated lattice ensemble classifier results:')
     logging.info(results)
     self.assertGreater(results['auc'], auc)
Example #15
0
  def setUp(self):
    super(CannedEstimatorsTest, self).setUp()
    self.eps = 0.001

    # UCI Statlog (Heart) dataset.
    heart_csv_file = tf.keras.utils.get_file(
        'heart.csv',
        'http://storage.googleapis.com/download.tensorflow.org/data/heart.csv')
    heart_df = pd.read_csv(heart_csv_file)
    heart_target = heart_df.pop('target')
    heart_train_size = int(len(heart_df) * 0.8)
    self.heart_train_x = heart_df[:heart_train_size]
    self.heart_train_y = heart_target[:heart_train_size]
    self.heart_test_x = heart_df[heart_train_size:]
    self.heart_test_y = heart_target[heart_train_size:]

    # Feature columns.
    # - age
    # - sex
    # - cp        chest pain type (4 values)
    # - trestbps  resting blood pressure
    # - chol      serum cholestoral in mg/dl
    # - fbs       fasting blood sugar > 120 mg/dl
    # - restecg   resting electrocardiographic results (values 0,1,2)
    # - thalach   maximum heart rate achieved
    # - exang     exercise induced angina
    # - oldpeak   ST depression induced by exercise relative to rest
    # - slope     the slope of the peak exercise ST segment
    # - ca        number of major vessels (0-3) colored by flourosopy
    # - thal      3 = normal; 6 = fixed defect; 7 = reversable defect
    self.heart_feature_columns = [
        fc.numeric_column('age', default_value=-1),
        fc.categorical_column_with_vocabulary_list('sex', [0, 1]),
        fc.numeric_column('cp'),
        fc.numeric_column('trestbps', default_value=-1),
        fc.numeric_column('chol'),
        fc.categorical_column_with_vocabulary_list('fbs', [0, 1]),
        fc.categorical_column_with_vocabulary_list('restecg', [0, 1, 2]),
        fc.numeric_column('thalach'),
        fc.categorical_column_with_vocabulary_list('exang', [0, 1]),
        fc.numeric_column('oldpeak'),
        fc.categorical_column_with_vocabulary_list('slope', [0, 1, 2]),
        fc.numeric_column('ca'),
        fc.categorical_column_with_vocabulary_list(
            'thal', ['normal', 'fixed', 'reversible']),
    ]

    # Feature configs. Each model can pick and choose which features to use.
    self.heart_feature_configs = [
        configs.FeatureConfig(
            name='age',
            lattice_size=3,
            pwl_calibration_num_keypoints=5,
            monotonicity=1,
            pwl_calibration_clip_max=100,
        ),
        configs.FeatureConfig(
            name='cp',
            pwl_calibration_num_keypoints=4,
            pwl_calibration_input_keypoints='uniform',
            monotonicity='increasing',
        ),
        configs.FeatureConfig(
            name='chol',
            pwl_calibration_input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],
            monotonicity=1,
            pwl_calibration_clip_min=130,
            pwl_calibration_clamp_min=True,
            pwl_calibration_clamp_max=True,
            regularizer_configs=[
                configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
            ],
        ),
        configs.FeatureConfig(
            name='fbs',
            monotonicity=[(0, 1)],
        ),
        configs.FeatureConfig(
            name='trestbps',
            pwl_calibration_num_keypoints=5,
            monotonicity='decreasing',
        ),
        configs.FeatureConfig(
            name='thalach',
            pwl_calibration_num_keypoints=5,
            monotonicity=-1,
        ),
        configs.FeatureConfig(
            name='restecg',
            monotonicity=[(0, 1), (0, 2)],
        ),
        configs.FeatureConfig(
            name='exang',
            monotonicity=[(0, 1)],
        ),
        configs.FeatureConfig(
            name='oldpeak',
            pwl_calibration_num_keypoints=5,
            monotonicity=1,
        ),
        configs.FeatureConfig(
            name='slope',
            monotonicity=[(0, 1), (1, 2)],
        ),
        configs.FeatureConfig(
            name='ca',
            pwl_calibration_num_keypoints=4,
            monotonicity='increasing',
        ),
        configs.FeatureConfig(
            name='thal',
            monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],
        ),
    ]

    # UCI Boston dataset.
    boston_dataset = load_boston()
    boston_df = pd.DataFrame(
        boston_dataset.data, columns=boston_dataset.feature_names)
    boston_df['CHAS'] = boston_df['CHAS'].astype(np.int32)
    boston_target = pd.Series(boston_dataset.target)
    boston_train_size = int(len(boston_df) * 0.8)
    self.boston_train_x = boston_df[:boston_train_size]
    self.boston_train_y = boston_target[:boston_train_size]
    self.boston_test_x = boston_df[boston_train_size:]
    self.boston_test_y = boston_target[boston_train_size:]

    # Feature columns.
    # - CRIM     per capita crime rate by town
    # - ZN       proportion of residential land zoned for lots over 25,000 sq.ft
    # - INDUS    proportion of non-retail business acres per town
    # - CHAS     Charles River dummy variable (= 1 if tract bounds river)
    # - NOX      nitric oxides concentration (parts per 10 million)
    # - RM       average number of rooms per dwelling
    # - AGE      proportion of owner-occupied units built prior to 1940
    # - DIS      weighted distances to five Boston employment centres
    # - RAD      index of accessibility to radial highways
    # - TAX      full-value property-tax rate per $10,000
    # - PTRATIO  pupil-teacher ratio by town
    # - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
    # - LSTAT    % lower status of the population
    # - Target   Median value of owner-occupied homes in $1000's
    self.boston_feature_columns = [
        fc.numeric_column('CRIM'),
        fc.numeric_column('ZN'),
        fc.numeric_column('INDUS'),
        fc.categorical_column_with_vocabulary_list('CHAS', [0, 1]),
        fc.numeric_column('NOX'),
        fc.numeric_column('RM'),
        fc.numeric_column('AGE'),
        fc.numeric_column('DIS'),
        fc.numeric_column('RAD'),
        fc.numeric_column('TAX'),
        fc.numeric_column('PTRATIO'),
        fc.numeric_column('B'),
        fc.numeric_column('LSTAT'),
    ]

    # Feature configs. Each model can pick and choose which features to use.
    self.boston_feature_configs = [
        configs.FeatureConfig(
            name='CRIM',
            lattice_size=3,
            monotonicity=-1,
            pwl_calibration_convexity=1,
        ),
        configs.FeatureConfig(
            name='ZN',
            pwl_calibration_input_keypoints=[0.0, 25.0, 50.0, 75.0, 100.0],
            monotonicity=1,
            reflects_trust_in=[
                configs.TrustConfig(feature_name='RM', trust_type='trapezoid'),
            ],
        ),
        configs.FeatureConfig(
            name='INDUS',
            pwl_calibration_input_keypoints='uniform',
            pwl_calibration_always_monotonic=False,
            reflects_trust_in=[
                configs.TrustConfig(
                    feature_name='RM',
                    trust_type='edgeworth',
                    direction='negative'),
            ],
            regularizer_configs=[
                configs.RegularizerConfig(name='calib_wrinkle', l2=1e-4),
            ],
        ),
        configs.FeatureConfig(name='CHAS',),
        configs.FeatureConfig(name='NOX',),
        configs.FeatureConfig(
            name='RM',
            monotonicity='increasing',
            pwl_calibration_convexity='concave',
        ),
        configs.FeatureConfig(
            name='AGE',
            monotonicity=-1,
        ),
        configs.FeatureConfig(
            name='DIS',
            lattice_size=3,
            unimodality=1,
        ),
        configs.FeatureConfig(name='RAD',),
        configs.FeatureConfig(name='TAX',),
        configs.FeatureConfig(
            name='PTRATIO',
            monotonicity='decreasing',
        ),
        configs.FeatureConfig(name='B',),
        configs.FeatureConfig(
            name='LSTAT',
            monotonicity=-1,
            dominates=[
                configs.DominanceConfig(
                    feature_name='AGE', dominance_type='monotonic'),
            ],
        ),
    ]
Example #16
0
  def setUp(self):
    super(PremadeTest, self).setUp()

    # UCI Statlog (Heart) dataset.
    heart_csv_file = tf.keras.utils.get_file(
        'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')
    heart_df = pd.read_csv(heart_csv_file)
    heart_train_size = int(len(heart_df) * 0.8)
    heart_train_dataframe = heart_df[:heart_train_size]
    heart_test_dataframe = heart_df[heart_train_size:]

    # Features:
    # - age
    # - sex
    # - cp        chest pain type (4 values)
    # - trestbps  resting blood pressure
    # - chol      serum cholestoral in mg/dl
    # - fbs       fasting blood sugar > 120 mg/dl
    # - restecg   resting electrocardiographic results (values 0,1,2)
    # - thalach   maximum heart rate achieved
    # - exang     exercise induced angina
    # - oldpeak   ST depression induced by exercise relative to rest
    # - slope     the slope of the peak exercise ST segment
    # - ca        number of major vessels (0-3) colored by flourosopy
    # - thal      3 = normal; 6 = fixed defect; 7 = reversable defect
    #
    # This ordering of feature names will be the exact same order that we
    # construct our model to expect.
    self.heart_feature_names = [
        'age', 'sex', 'cp', 'chol', 'fbs', 'trestbps', 'thalach', 'restecg',
        'exang', 'oldpeak', 'slope', 'ca', 'thal'
    ]
    feature_name_indices = {
        name: index for index, name in enumerate(self.heart_feature_names)
    }
    # This is the vocab list and mapping we will use for the 'thal' categorical
    # feature.
    thal_vocab_list = ['normal', 'fixed', 'reversible']
    thal_map = {category: i for i, category in enumerate(thal_vocab_list)}

    # Custom function for converting thal categories to buckets
    def convert_thal_features(thal_features):
      # Note that two examples in the test set are already converted.
      return np.array([
          thal_map[feature] if feature in thal_vocab_list else feature
          for feature in thal_features
      ])

    # Custom function for extracting each feature.
    def extract_features(dataframe, label_name='target'):
      features = []
      for feature_name in self.heart_feature_names:
        if feature_name == 'thal':
          features.append(
              convert_thal_features(
                  dataframe[feature_name].values).astype(float))
        else:
          features.append(dataframe[feature_name].values.astype(float))
      labels = dataframe[label_name].values.astype(float)
      return features, labels

    self.heart_train_x, self.heart_train_y = extract_features(
        heart_train_dataframe)
    self.heart_test_x, self.heart_test_y = extract_features(
        heart_test_dataframe)

    # Let's define our label minimum and maximum.
    self.heart_min_label = float(np.min(self.heart_train_y))
    self.heart_max_label = float(np.max(self.heart_train_y))
    # Our lattice models may have predictions above 1.0 due to numerical errors.
    # We can subtract this small epsilon value from our output_max to make sure
    # we do not predict values outside of our label bound.
    self.numerical_error_epsilon = 1e-5

    def compute_quantiles(features,
                          num_keypoints=10,
                          clip_min=None,
                          clip_max=None,
                          missing_value=None):
      # Clip min and max if desired.
      if clip_min is not None:
        features = np.maximum(features, clip_min)
        features = np.append(features, clip_min)
      if clip_max is not None:
        features = np.minimum(features, clip_max)
        features = np.append(features, clip_max)
      # Make features unique.
      unique_features = np.unique(features)
      # Remove missing values if specified.
      if missing_value is not None:
        unique_features = np.delete(unique_features,
                                    np.where(unique_features == missing_value))
      # Compute and return quantiles over unique non-missing feature values.
      return np.quantile(
          unique_features,
          np.linspace(0., 1., num=num_keypoints),
          interpolation='nearest').astype(float)

    self.heart_feature_configs = [
        configs.FeatureConfig(
            name='age',
            lattice_size=3,
            monotonicity='increasing',
            # We must set the keypoints manually.
            pwl_calibration_num_keypoints=5,
            pwl_calibration_input_keypoints=compute_quantiles(
                self.heart_train_x[feature_name_indices['age']],
                num_keypoints=5,
                clip_max=100),
            # Per feature regularization.
            regularizer_configs=[
                configs.RegularizerConfig(name='calib_wrinkle', l2=0.1),
            ],
        ),
        configs.FeatureConfig(
            name='sex',
            num_buckets=2,
        ),
        configs.FeatureConfig(
            name='cp',
            monotonicity='increasing',
            # Keypoints that are uniformly spaced.
            pwl_calibration_num_keypoints=4,
            pwl_calibration_input_keypoints=np.linspace(
                np.min(self.heart_train_x[feature_name_indices['cp']]),
                np.max(self.heart_train_x[feature_name_indices['cp']]),
                num=4),
        ),
        configs.FeatureConfig(
            name='chol',
            monotonicity='increasing',
            # Explicit input keypoints initialization.
            pwl_calibration_input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],
            # Calibration can be forced to span the full output range
            # by clamping.
            pwl_calibration_clamp_min=True,
            pwl_calibration_clamp_max=True,
            # Per feature regularization.
            regularizer_configs=[
                configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
            ],
        ),
        configs.FeatureConfig(
            name='fbs',
            # Partial monotonicity: output(0) <= output(1)
            monotonicity=[(0, 1)],
            num_buckets=2,
        ),
        configs.FeatureConfig(
            name='trestbps',
            monotonicity='decreasing',
            pwl_calibration_num_keypoints=5,
            pwl_calibration_input_keypoints=compute_quantiles(
                self.heart_train_x[feature_name_indices['trestbps']],
                num_keypoints=5),
        ),
        configs.FeatureConfig(
            name='thalach',
            monotonicity='decreasing',
            pwl_calibration_num_keypoints=5,
            pwl_calibration_input_keypoints=compute_quantiles(
                self.heart_train_x[feature_name_indices['thalach']],
                num_keypoints=5),
        ),
        configs.FeatureConfig(
            name='restecg',
            # Partial monotonicity:
            # output(0) <= output(1), output(0) <= output(2)
            monotonicity=[(0, 1), (0, 2)],
            num_buckets=3,
        ),
        configs.FeatureConfig(
            name='exang',
            # Partial monotonicity: output(0) <= output(1)
            monotonicity=[(0, 1)],
            num_buckets=2,
        ),
        configs.FeatureConfig(
            name='oldpeak',
            monotonicity='increasing',
            pwl_calibration_num_keypoints=5,
            pwl_calibration_input_keypoints=compute_quantiles(
                self.heart_train_x[feature_name_indices['oldpeak']],
                num_keypoints=5),
        ),
        configs.FeatureConfig(
            name='slope',
            # Partial monotonicity:
            # output(0) <= output(1), output(1) <= output(2)
            monotonicity=[(0, 1), (1, 2)],
            num_buckets=3,
        ),
        configs.FeatureConfig(
            name='ca',
            monotonicity='increasing',
            pwl_calibration_num_keypoints=4,
            pwl_calibration_input_keypoints=compute_quantiles(
                self.heart_train_x[feature_name_indices['ca']],
                num_keypoints=4),
        ),
        configs.FeatureConfig(
            name='thal',
            # Partial monotonicity:
            # output(normal) <= output(fixed)
            # output(normal) <= output(reversible)
            monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],
            num_buckets=3,
            # We must specify the vocabulary list in order to later set the
            # monotonicities since we used names and not indices.
            vocabulary_list=thal_vocab_list,
        ),
    ]
    premade_lib.set_categorical_monotonicities(self.heart_feature_configs)
Example #17
0
    def test_updates(self):
        model_config = configs.CalibratedLatticeConfig(
            output_min=0,
            regularizer_configs=[
                configs.RegularizerConfig(name='torsion', l2=2e-3),
            ],
            feature_configs=[
                configs.FeatureConfig(
                    name='feature_a',
                    pwl_calibration_input_keypoints='quantiles',
                    pwl_calibration_num_keypoints=8,
                    monotonicity=1,
                    pwl_calibration_clip_max=100,
                ),
                configs.FeatureConfig(
                    name='feature_b',
                    lattice_size=3,
                    unimodality='valley',
                    pwl_calibration_input_keypoints='uniform',
                    pwl_calibration_num_keypoints=5,
                    pwl_calibration_clip_min=130,
                    pwl_calibration_convexity='convex',
                    regularizer_configs=[
                        configs.RegularizerConfig(name='calib_hessian',
                                                  l2=3e-3),
                    ],
                ),
                configs.FeatureConfig(
                    name='feature_c',
                    pwl_calibration_input_keypoints=[0.0, 0.5, 1.0],
                    reflects_trust_in=[
                        configs.TrustConfig(feature_name='feature_a'),
                        configs.TrustConfig(feature_name='feature_b',
                                            direction=-1),
                    ],
                ),
                configs.FeatureConfig(
                    name='feature_d',
                    num_buckets=3,
                    vocabulary_list=['a', 'b', 'c'],
                    default_value=-1,
                ),
            ])

        updates = [
            # Update values can be passed in as numbers.
            ('output_max', 1.0),  # update
            ('regularizer__torsion__l2', 0.004),  # update
            ('regularizer__calib_hessian__l1', 0.005),  # insert
            ('feature__feature_a__lattice_size', 3),  # update
            ('feature__feature_e__lattice_size', 4),  # insert
            # Update values can be strings.
            ('unrelated_hparams_not_affecting_config', 'unrelated'),
            ('feature__feature_a__regularizer__calib_wrinkle__l1',
             '0.6'),  # insert
            ('feature__feature_b__regularizer__calib_hessian__l1',
             '0.7'),  # update
            ('yet__another__unrelated_config', '4'),
        ]
        self.assertEqual(configs.apply_updates(model_config, updates), 7)

        model_config.feature_config_by_name('feature_a').monotonicity = 'none'
        model_config.feature_config_by_name(
            'feature_f').num_buckets = 4  # insert

        feature_names = [
            feature_config.name
            for feature_config in model_config.feature_configs
        ]
        expected_feature_names = [
            'feature_a', 'feature_b', 'feature_c', 'feature_d', 'feature_e',
            'feature_f'
        ]
        self.assertCountEqual(feature_names, expected_feature_names)

        global_regularizer_names = [
            regularizer_config.name
            for regularizer_config in model_config.regularizer_configs
        ]
        expected_global_regularizer_names = ['torsion', 'calib_hessian']
        self.assertCountEqual(global_regularizer_names,
                              expected_global_regularizer_names)

        self.assertEqual(model_config.output_max, 1.0)
        self.assertEqual(
            model_config.feature_config_by_name('feature_a').lattice_size, 3)
        self.assertEqual(
            model_config.feature_config_by_name(
                'feature_b').pwl_calibration_convexity, 'convex')
        self.assertEqual(
            model_config.feature_config_by_name('feature_e').lattice_size, 4)
        self.assertEqual(
            model_config.regularizer_config_by_name('torsion').l2, 0.004)
        self.assertEqual(
            model_config.regularizer_config_by_name('calib_hessian').l1, 0.005)
        self.assertEqual(
            model_config.feature_config_by_name(
                'feature_a').regularizer_config_by_name('calib_wrinkle').l1,
            0.6)
        self.assertEqual(
            model_config.feature_config_by_name(
                'feature_b').regularizer_config_by_name('calib_hessian').l1,
            0.7)
Example #18
0
 def test_from_config(self):
   feature_configs = [
       configs.FeatureConfig(
           name='feature_a',
           pwl_calibration_input_keypoints='quantiles',
           pwl_calibration_num_keypoints=8,
           monotonicity=1,
           pwl_calibration_clip_max=100,
       ),
       configs.FeatureConfig(
           name='feature_b',
           lattice_size=3,
           unimodality='valley',
           pwl_calibration_input_keypoints='uniform',
           pwl_calibration_num_keypoints=5,
           pwl_calibration_clip_min=130,
           pwl_calibration_convexity='convex',
           regularizer_configs=[
               configs.RegularizerConfig(name='calib_hesian', l2=3e-3),
           ],
       ),
       configs.FeatureConfig(
           name='feature_c',
           pwl_calibration_input_keypoints=[0.0, 0.5, 1.0],
           reflects_trust_in=[
               configs.TrustConfig(feature_name='feature_a'),
               configs.TrustConfig(feature_name='feature_b', direction=-1),
           ],
           dominates=[
               configs.DominanceConfig(
                   feature_name='feature_d', dominance_type='monotonic'),
           ],
       ),
       configs.FeatureConfig(
           name='feature_d',
           num_buckets=3,
           vocabulary_list=['a', 'b', 'c'],
           default_value=-1,
       ),
   ]
   # First we test CalibratedLatticeEnsembleConfig
   model_config = configs.CalibratedLatticeEnsembleConfig(
       feature_configs=feature_configs,
       lattices=[['feature_a', 'feature_b'], ['feature_c', 'feature_d']],
       separate_calibrators=True,
       regularizer_configs=[
           configs.RegularizerConfig('torsion', l2=1e-4),
       ],
       output_min=0.0,
       output_max=1.0,
       output_calibration=True,
       output_calibration_num_keypoints=5,
       output_initialization=[0.0, 1.0])
   model_config_copy = configs.CalibratedLatticeEnsembleConfig.from_config(
       model_config.get_config(), tfl_custom_objects)
   self.assertDictEqual(model_config.get_config(),
                        model_config_copy.get_config())
   # Next we test CalibratedLatticeConfig
   model_config = configs.CalibratedLatticeConfig(
       feature_configs=feature_configs,
       regularizer_configs=[
           configs.RegularizerConfig('torsion', l2=1e-4),
       ],
       output_min=0.0,
       output_max=1.0,
       output_calibration=True,
       output_calibration_num_keypoints=8,
       output_initialization='quantiles')
   model_config_copy = configs.CalibratedLatticeConfig.from_config(
       model_config.get_config(), tfl_custom_objects)
   self.assertDictEqual(model_config.get_config(),
                        model_config_copy.get_config())
   # Last we test CalibratedLinearConfig
   model_config = configs.CalibratedLinearConfig(
       feature_configs=feature_configs,
       regularizer_configs=[
           configs.RegularizerConfig('calib_hessian', l2=1e-4),
       ],
       use_bias=True,
       output_min=0.0,
       output_max=None,
       output_calibration=True,
       output_initialization='uniform')
   model_config_copy = configs.CalibratedLinearConfig.from_config(
       model_config.get_config(), tfl_custom_objects)
   self.assertDictEqual(model_config.get_config(),
                        model_config_copy.get_config())