Esempio n. 1
0
    def testInputCalibrationLayerMultiDimensional(self):
        x0 = [[0.1, 0.9], [0.2, 0.8], [0.3, 0.7]]
        x1 = [[0.9, 1.2], [0.8, 1.1], [0.7, 0.2]]
        input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1)
        num_keypoints = 10

        # Test case where feature columns are multi-dimensional.
        with ops.Graph().as_default():
            keypoints_init = self._UniformKeypoints(num_keypoints)
            columns_to_tensors = input_fn()
            calibrated, feature_names, projection_ops, regularization = (
                pwl_calibration_layers.input_calibration_layer(
                    columns_to_tensors=columns_to_tensors,
                    feature_columns=feature_columns,
                    num_keypoints={
                        'x0': num_keypoints,
                        'x1': 0
                    },
                    keypoints_initializers=keypoints_init))
            self.assertEqual(projection_ops, [])
            self.assertEqual(feature_names, ['x0', 'x0', 'x1', 'x1'])
            self.assertEqual(regularization, None)
            got = keypoints_initialization._materialize_locally(calibrated,
                                                                num_steps=1)
            self.assertAllClose(
                got, [[210., 290., 0.9, 1.2], [220., 280., 0.8, 1.1],
                      [230., 270., 0.7, 0.2]])
Esempio n. 2
0
    def testInputCalibrationLayerNonCalibrated(self):
        x0 = [[0.1], [0.2], [0.3], [0.3], [-1.]]
        x1 = [[0.9], [0.8], [0.7], [-1.], [0.7]]
        input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1)
        num_keypoints = 10

        # Test case where one feature is not calibrated.
        with ops.Graph().as_default():
            keypoints_init = self._UniformKeypoints(num_keypoints)
            columns_to_tensors = input_fn()

            calibrated, feature_names, projection_ops, regularization = (
                pwl_calibration_layers.input_calibration_layer(
                    columns_to_tensors=columns_to_tensors,
                    feature_columns=feature_columns,
                    num_keypoints={
                        'x0': num_keypoints,
                        'x1': 0
                    },
                    keypoints_initializers=keypoints_init,
                    missing_input_values={
                        'x0': -1.,
                        tools.DEFAULT_NAME: None
                    },
                    missing_output_values={
                        'x0': 7.,
                        tools.DEFAULT_NAME: None
                    }))
            self.assertEqual(projection_ops, [])
            self.assertEqual(feature_names, ['x0', 'x1'])
            self.assertEqual(regularization, None)
            got = keypoints_initialization._materialize_locally(calibrated,
                                                                num_steps=1)
            self.assertAllClose(got, [[210., 0.9], [220., 0.8], [230., 0.7],
                                      [230., -1.], [7., 0.7]])
Esempio n. 3
0
    def testInputCalibrationLayer(self):
        x0 = [[0.1], [0.2], [0.3], [0.3], [-1.]]
        x1 = [[0.9], [0.8], [0.7], [-1.], [0.7]]
        input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1)
        num_keypoints = 10

        # Test calibration of two features.
        with ops.Graph().as_default():
            keypoints_init = self._UniformKeypoints(num_keypoints)
            columns_to_tensors = input_fn()
            calibrated, feature_names, projection_ops, regularization = (
                pwl_calibration_layers.input_calibration_layer(
                    columns_to_tensors=columns_to_tensors,
                    feature_columns=feature_columns,
                    num_keypoints=num_keypoints,
                    keypoints_initializers=keypoints_init,
                    missing_input_values=-1.,
                    missing_output_values=7.))
            self.assertEqual(feature_names, ['x0', 'x1'])
            self.assertEqual(projection_ops, [])
            self.assertEqual(regularization, None)
            got = keypoints_initialization._materialize_locally(calibrated,
                                                                num_steps=1)
            self.assertAllClose(got, [[210., 290.], [220., 280.], [230., 270.],
                                      [230., 7.], [7., 270.]])
Esempio n. 4
0
    def testInputCalibrationLayerRegularization(self):
        x0 = [0.1, 0.2, 0.7]
        x1 = [0.9, 0.8, 0.7]
        input_fn, _, feature_columns = self._BuildInputs(x0, x1)
        num_keypoints = 10

        with ops.Graph().as_default():
            keypoints_init = self._UniformKeypoints(num_keypoints)
            columns_to_tensors = input_fn()
            _, _, _, regularization = (
                pwl_calibration_layers.input_calibration_layer(
                    columns_to_tensors=columns_to_tensors,
                    feature_columns=feature_columns,
                    num_keypoints={
                        'x0': num_keypoints,
                        'x1': num_keypoints
                    },
                    l1_reg={
                        'x0': 1.0,
                        'x1': 2.0
                    },
                    l2_reg={
                        'x0': 0.5,
                        'x1': None
                    },
                    l1_laplacian_reg={
                        'x0': None,
                        'x1': 3.0
                    },
                    l2_laplacian_reg={
                        'x0': None,
                        'x1': 5.0
                    },
                    keypoints_initializers=keypoints_init))
            with self.test_session() as sess:
                sess.run(variables.global_variables_initializer())
                got = sess.run(regularization)
                expected_value = 330948.12
                self.assertAlmostEqual(got, expected_value, delta=1e-1)
Esempio n. 5
0
def input_calibration_layer_from_hparams(columns_to_tensors,
                                         feature_columns,
                                         hparams,
                                         quantiles_dir=None,
                                         keypoints_initializers=None,
                                         name=None,
                                         dtype=dtypes.float32):
    """Creates a calibration layer for the input using hyper-parameters.

  Similar to `input_calibration_layer` but reads its parameters from a
  `CalibratedHParams` object.

  Args:
    columns_to_tensors: A mapping from feature name to tensors. 'string' key
      means a base feature (not-transformed). If feature_columns is not set
      these are the features calibrated. Otherwise the transformed
      feature_columns are the ones calibrated.
    feature_columns: An iterable containing all the feature columns used by the
      model. Optional, if not set the model will use all features given in
      columns_to_tensors. All items in the set should be instances of
      classes derived from `FeatureColumn`.
    hparams: Hyper-parameters, need to inherit from `CalibratedHParams`.
      See `CalibratedHParams` and `input_calibration_layer` for descriptions of
      how these hyper-parameters work.
    quantiles_dir: location where quantiles for the data was saved. Typically
      the same directory as the training data. These quantiles can be
      generated with `pwl_calibration_layers.calculate_quantiles_for_keypoints`,
      maybe in a separate invocation of your program. Different models that
      share the same quantiles information -- so this needs to be generated only
      once when hyper-parameter tuning. If you don't want to use quantiles, you
      can set `keypoints_initializers` instead.
    keypoints_initializers: if you know the distribution of your
      input features you can provide that directly instead of `quantiles_dir`.
      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
      a pair of tensors with keypoints inputs and outputs to use for
      initialization (must match `num_keypoints` configured in `hparams`).
      Alternatively can be given as a dict mapping feature name to pairs,
      for initialization per feature. If `quantiles_dir` and
      `keypoints_initializer` are set, the later takes precendence, and the
      features for which `keypoints_initializers` are not defined fallback to
      using the quantiles found in `quantiles_dir`.
    name: Name scope for layer.
    dtype: If any of the scalars are not given as tensors, they are converted
      to tensors with this dtype.

  Returns:
    A tuple of:
    * calibrated tensor of shape [batch_size, sum(features dimensions)].
    * list of the feature names in the order they feature in the calibrated
      tensor. A name may appear more than once if the feature is
      multi-dimension (for instance a multi-dimension embedding)
    * list of projection ops, that must be applied at each step (or every so
      many steps) to project the model to a feasible space: used for bounding
      the outputs or for imposing monotonicity. Empty if none are requested.
    * None or tensor with regularization loss.

  Raises:
    ValueError: if dtypes are incompatible.


  """
    with ops.name_scope(name or "input_calibration_layer_from_hparams"):

        # Sort out list of feature names.
        unique_feature_names = tools.get_sorted_feature_names(
            columns_to_tensors=columns_to_tensors,
            feature_columns=feature_columns)

        # Get per-feature parameters.
        num_keypoints = _get_per_feature_dict(hparams, "num_keypoints")
        calibration_output_min = _get_per_feature_dict(
            hparams, "calibration_output_min")
        calibration_output_max = _get_per_feature_dict(
            hparams, "calibration_output_max")
        calibration_bound = _get_per_feature_dict(hparams, "calibration_bound")
        monotonicity = _get_per_feature_dict(hparams, "monotonicity")
        missing_input_values = _get_per_feature_dict(hparams,
                                                     "missing_input_value")
        missing_output_values = _get_per_feature_dict(hparams,
                                                      "missing_output_value")
        # Define keypoints_initializers to use in this invocation of model_fn.
        kp_init = None
        if quantiles_dir is not None:
            # Skip features for which an explicit initializer was given.
            if isinstance(keypoints_initializers, dict):
                quantiles_feature_names = []
                for name in unique_feature_names:
                    if name not in keypoints_initializers:
                        quantiles_feature_names.append(name)
            else:
                quantiles_feature_names = unique_feature_names

            # Reverse initial output keypoints for decreasing monotonic features.
            reversed_dict = {}
            for feature_name in quantiles_feature_names:
                if monotonicity[feature_name] == -1:
                    reversed_dict[feature_name] = True
                else:
                    reversed_dict[feature_name] = False

            # Read initializers from quantiles_dir, for those not already
            # defined.
            #
            # Notice that output_min and output_max won't matter much if
            # they are not bounded, since they will be adjusted during training.
            kp_init = keypoints_initialization.load_keypoints_from_quantiles(
                feature_names=quantiles_feature_names,
                save_dir=quantiles_dir,
                num_keypoints=num_keypoints,
                output_min=calibration_output_min,
                output_max=calibration_output_max,
                reversed_dict=reversed_dict,
                missing_input_values_dict=missing_input_values,
                dtype=dtype)

            # Merge with explicit initializers.
            if isinstance(keypoints_initializers, dict):
                kp_init.update(keypoints_initializers)

        else:
            # Take given initializers.
            kp_init = keypoints_initializers

        # Update num_keypoints according to keypoints actually used by the
        # initialization functions: some initialization functions may change
        # them, for instance if there are not enough unique values.
        if isinstance(kp_init, dict):
            # One initializer (kp_init) per feature.
            for (feature_name, initializers) in six.iteritems(kp_init):
                kp_init_keypoints = initializers[0].shape.as_list()[0]
                num_keypoints[feature_name] = _update_keypoints(
                    feature_name, num_keypoints[feature_name],
                    kp_init_keypoints)
        else:
            # Check generic initializer (kp_init).
            kp_init_keypoints = kp_init[0].shape.as_list()[0]
            for feature_name in six.iterkeys(num_keypoints):
                num_keypoints[feature_name] = _update_keypoints(
                    feature_name, num_keypoints[feature_name],
                    kp_init_keypoints)

        # Setup the regularization.
        calibration_l1_regs = _get_per_feature_dict(hparams,
                                                    "calibration_l1_reg")
        calibration_l2_regs = _get_per_feature_dict(hparams,
                                                    "calibration_l2_reg")
        calibration_l1_laplacian_regs = _get_per_feature_dict(
            hparams, "calibration_l1_laplacian_reg")
        calibration_l2_laplacian_regs = _get_per_feature_dict(
            hparams, "calibration_l2_laplacian_reg")

        return pwl_calibration_layers.input_calibration_layer(
            columns_to_tensors=columns_to_tensors,
            feature_columns=feature_columns,
            num_keypoints=num_keypoints,
            keypoints_initializers=kp_init,
            bound=calibration_bound,
            monotonic=monotonicity,
            missing_input_values=missing_input_values,
            missing_output_values=missing_output_values,
            l1_reg=calibration_l1_regs,
            l2_reg=calibration_l2_regs,
            l1_laplacian_reg=calibration_l1_laplacian_regs,
            l2_laplacian_reg=calibration_l2_laplacian_regs)
Esempio n. 6
0
def input_calibration_layer_from_hparams(columns_to_tensors,
                                         hparams,
                                         quantiles_dir=None,
                                         keypoints_initializers=None,
                                         name=None,
                                         dtype=dtypes.float32):
    """Creates a calibration layer for the input using hyper-parameters.

  Similar to `input_calibration_layer` but reads its parameters from a
  `CalibratedHParams` object.

  Args:
    columns_to_tensors: A mapping from feature name to tensors.
    hparams: Hyper-parameters, need to inherit from `CalibratedHParams`.
      See `CalibratedHParams` and `input_calibration_layer` for descriptions of
      how these hyper-parameters work.
    quantiles_dir: location where quantiles for the data was saved. Typically
      the same directory as the training data. These quantiles can be
      generated with `pwl_calibration_layers.calculate_quantiles_for_keypoints`,
      maybe in a separate invocation of your program. Different models that
      share the same quantiles information -- so this needs to be generated only
      once when hyper-parameter tuning. If you don't want to use quantiles, you
      can set `keypoints_initializers` instead.
    keypoints_initializers: if you know the distribution of your
      input features you can provide that directly instead of `quantiles_dir`.
      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
      a pair of tensors with keypoints inputs and outputs to use for
      initialization (must match `num_keypoints` configured in `hparams`).
      Alternatively can be given as a dict mapping feature name to pairs,
      for initialization per feature. If `quantiles_dir` and
      `keypoints_initializer` are set, the latter takes precendence, and the
      features for which `keypoints_initializers` are not defined fallback to
      using the quantiles found in `quantiles_dir`.
    name: Name scope for layer.
    dtype: If any of the scalars are not given as tensors, they are converted
      to tensors with this dtype.

  Returns:
    A tuple of:
    * calibrated tensor of shape [batch_size, sum(features dimensions)].
    * list of the feature names in the order they appear in the calibrated
      tensor. A name may appear more than once if the feature is
      multi-dimension (for instance a multi-dimension embedding)
    * list of projection ops, that must be applied at each step (or every so
      many steps) to project the model to a feasible space: used for bounding
      the outputs or for imposing monotonicity. Empty if none are requested.
    * tensor with regularization loss, or None for no regularization.

  Raises:
    ValueError: if dtypes are incompatible.


  """
    with ops.name_scope(name or "input_calibration_layer_from_hparams"):

        # Sort out list of feature names.
        unique_feature_names = tools.get_sorted_feature_names(
            columns_to_tensors=columns_to_tensors)

        # Get per-feature parameters.
        num_keypoints = _get_per_feature_dict(hparams, "num_keypoints")
        calibration_output_min = _get_per_feature_dict(
            hparams, "calibration_output_min")
        calibration_output_max = _get_per_feature_dict(
            hparams, "calibration_output_max")
        calibration_bound = _get_per_feature_dict(hparams, "calibration_bound")
        monotonicity = _get_per_feature_dict(hparams, "monotonicity")
        missing_input_values = _get_per_feature_dict(hparams,
                                                     "missing_input_value")
        missing_output_values = _get_per_feature_dict(hparams,
                                                      "missing_output_value")

        # Convert keypoints_initializers to a dict if needed, or otherwise make a
        # copy of the original keypoints_initializers dict.
        if keypoints_initializers is None:
            keypoints_initializers = {}
        elif not isinstance(keypoints_initializers, dict):
            keypoints_initializers = {
                name: keypoints_initializers
                for name in unique_feature_names
            }
        else:
            keypoints_initializers = keypoints_initializers.copy()

        # If quantiles_dir is given, add any missing keypoint initializers with
        # keypoints based on quantiles.
        if quantiles_dir is not None:
            quantiles_feature_names = [
                name for name in unique_feature_names
                if name not in keypoints_initializers
            ]

            # Reverse initial output keypoints for decreasing monotonic features.
            reversed_dict = {
                feature_name: (monotonicity[feature_name] == -1)
                for feature_name in quantiles_feature_names
            }

            # Read initializers from quantiles_dir, for those not already
            # defined.
            #
            # Notice that output_min and output_max won't matter much if
            # they are not bounded, since they will be adjusted during training.
            quantiles_init = keypoints_initialization.load_keypoints_from_quantiles(
                feature_names=quantiles_feature_names,
                save_dir=quantiles_dir,
                num_keypoints=num_keypoints,
                output_min=calibration_output_min,
                output_max=calibration_output_max,
                reversed_dict=reversed_dict,
                missing_input_values_dict=missing_input_values,
                dtype=dtype)

            # Merge with explicit initializers.
            keypoints_initializers.update(quantiles_init)

        # Update num_keypoints according to keypoints actually used by the
        # initialization functions: some initialization functions may change
        # them, for instance if there are not enough unique values.
        for (feature_name,
             initializers) in six.iteritems(keypoints_initializers):
            kp_init_keypoints = initializers[0].shape.as_list()[0]
            num_keypoints[feature_name] = _update_keypoints(
                feature_name, num_keypoints[feature_name], kp_init_keypoints)

        # Setup the regularization.
        regularizer_amounts = {}
        for regularizer_name in regularizers.CALIBRATOR_REGULARIZERS:
            regularizer_amounts[regularizer_name] = _get_per_feature_dict(
                hparams, "calibration_{}".format(regularizer_name))

        return pwl_calibration_layers.input_calibration_layer(
            columns_to_tensors=columns_to_tensors,
            num_keypoints=num_keypoints,
            keypoints_initializers=keypoints_initializers,
            bound=calibration_bound,
            monotonic=monotonicity,
            missing_input_values=missing_input_values,
            missing_output_values=missing_output_values,
            **regularizer_amounts)