Esempio n. 1
0
def make_custom_export_strategy(name, convert_fn, feature_columns,
                                export_input_fn):
    """Makes custom exporter of GTFlow tree format.

  Args:
    name: A string, for the name of the export strategy.
    convert_fn: A function that converts the tree proto to desired format and
      saves it to the desired location. Can be None to skip conversion.
    feature_columns: A list of feature columns.
    export_input_fn: A function that takes no arguments and returns an
      `InputFnOps`.

  Returns:
    An `ExportStrategy`.
  """
    base_strategy = saved_model_export_utils.make_export_strategy(
        serving_input_fn=export_input_fn)
    input_fn = export_input_fn()
    (sorted_feature_names, dense_floats, sparse_float_indices, _, _,
     sparse_int_indices, _,
     _) = gbdt_batch.extract_features(input_fn.features, feature_columns)

    def export_fn(estimator,
                  export_dir,
                  checkpoint_path=None,
                  eval_result=None):
        """A wrapper to export to SavedModel, and convert it to other formats."""
        result_dir = base_strategy.export(estimator, export_dir,
                                          checkpoint_path, eval_result)
        with ops.Graph().as_default() as graph:
            with tf_session.Session(graph=graph) as sess:
                saved_model_loader.load(sess, [tag_constants.SERVING],
                                        result_dir)
                # Note: This is GTFlow internal API and might change.
                ensemble_model = graph.get_operation_by_name(
                    "ensemble_model/TreeEnsembleSerialize")
                _, dfec_str = sess.run(ensemble_model.outputs)
                dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
                dtec.ParseFromString(dfec_str)
                # Export the result in the same folder as the saved model.
                if convert_fn:
                    convert_fn(dtec, sorted_feature_names, len(dense_floats),
                               len(sparse_float_indices),
                               len(sparse_int_indices), result_dir,
                               eval_result)
                feature_importances = _get_feature_importances(
                    dtec, sorted_feature_names, len(dense_floats),
                    len(sparse_float_indices), len(sparse_int_indices))
                sorted_by_importance = sorted(feature_importances.items(),
                                              key=lambda x: -x[1])
                assets_dir = os.path.join(result_dir, "assets.extra")
                gfile.MakeDirs(assets_dir)
                with gfile.GFile(
                        os.path.join(assets_dir, "feature_importances"),
                        "w") as f:
                    f.write("\n".join("%s, %f" % (k, v)
                                      for k, v in sorted_by_importance))
        return result_dir

    return export_strategy.ExportStrategy(name, export_fn)
def make_custom_export_strategy(name,
                                convert_fn,
                                feature_columns,
                                export_input_fn):
  """Makes custom exporter of GTFlow tree format.

  Args:
    name: A string, for the name of the export strategy.
    convert_fn: A function that converts the tree proto to desired format and
      saves it to the desired location. Can be None to skip conversion.
    feature_columns: A list of feature columns.
    export_input_fn: A function that takes no arguments and returns an
      `InputFnOps`.

  Returns:
    An `ExportStrategy`.
  """
  base_strategy = saved_model_export_utils.make_export_strategy(
      serving_input_fn=export_input_fn)
  input_fn = export_input_fn()
  (sorted_feature_names, dense_floats, sparse_float_indices, _, _,
   sparse_int_indices, _, _) = gbdt_batch.extract_features(
       input_fn.features, feature_columns)

  def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None):
    """A wrapper to export to SavedModel, and convert it to other formats."""
    result_dir = base_strategy.export(estimator, export_dir,
                                      checkpoint_path,
                                      eval_result)
    with ops.Graph().as_default() as graph:
      with tf_session.Session(graph=graph) as sess:
        saved_model_loader.load(
            sess, [tag_constants.SERVING], result_dir)
        # Note: This is GTFlow internal API and might change.
        ensemble_model = graph.get_operation_by_name(
            "ensemble_model/TreeEnsembleSerialize")
        _, dfec_str = sess.run(ensemble_model.outputs)
        dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
        dtec.ParseFromString(dfec_str)
        # Export the result in the same folder as the saved model.
        if convert_fn:
          convert_fn(dtec, sorted_feature_names,
                     len(dense_floats),
                     len(sparse_float_indices),
                     len(sparse_int_indices), result_dir, eval_result)
        feature_importances = _get_feature_importances(
            dtec, sorted_feature_names,
            len(dense_floats),
            len(sparse_float_indices), len(sparse_int_indices))
        sorted_by_importance = sorted(
            feature_importances.items(), key=lambda x: -x[1])
        assets_dir = os.path.join(result_dir, "assets.extra")
        gfile.MakeDirs(assets_dir)
        with gfile.GFile(os.path.join(assets_dir, "feature_importances"),
                         "w") as f:
          f.write("\n".join("%s, %f" % (k, v) for k, v in sorted_by_importance))
    return result_dir

  return export_strategy.ExportStrategy(
      name, export_fn, strip_default_attrs=True)
Esempio n. 3
0
 def testExtractFeaturesWithTransformation(self):
     """Tests feature extraction."""
     with self.test_session():
         features = {}
         features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
         features["sparse_float"] = sparse_tensor.SparseTensor(
             array_ops.zeros([2, 2], dtypes.int64),
             array_ops.zeros([2], dtypes.float32),
             array_ops.zeros([2], dtypes.int64))
         features["sparse_categorical"] = sparse_tensor.SparseTensor(
             array_ops.zeros([2, 2], dtypes.int64),
             array_ops.zeros([2], dtypes.string),
             array_ops.zeros([2], dtypes.int64))
         feature_columns = set()
         feature_columns.add(layers.real_valued_column("dense_float"))
         feature_columns.add(
             layers.feature_column._real_valued_var_len_column(
                 "sparse_float", is_sparse=True))
         feature_columns.add(
             feature_column_lib.sparse_column_with_hash_bucket(
                 "sparse_categorical", hash_bucket_size=1000000))
         (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
          sparse_float_shapes, sparse_int_indices, sparse_int_values,
          sparse_int_shapes) = (gbdt_batch.extract_features(
              features, feature_columns))
         self.assertEqual(len(fc_names), 3)
         self.assertAllEqual(
             fc_names,
             ["dense_float", "sparse_float", "sparse_categorical"])
         self.assertEqual(len(dense_floats), 1)
         self.assertEqual(len(sparse_float_indices), 1)
         self.assertEqual(len(sparse_float_values), 1)
         self.assertEqual(len(sparse_float_shapes), 1)
         self.assertEqual(len(sparse_int_indices), 1)
         self.assertEqual(len(sparse_int_values), 1)
         self.assertEqual(len(sparse_int_shapes), 1)
         self.assertAllEqual(dense_floats[0].eval(),
                             features["dense_float"].eval())
         self.assertAllEqual(sparse_float_indices[0].eval(),
                             features["sparse_float"].indices.eval())
         self.assertAllEqual(sparse_float_values[0].eval(),
                             features["sparse_float"].values.eval())
         self.assertAllEqual(sparse_float_shapes[0].eval(),
                             features["sparse_float"].dense_shape.eval())
         self.assertAllEqual(sparse_int_indices[0].eval(),
                             features["sparse_categorical"].indices.eval())
         self.assertAllEqual(sparse_int_values[0].eval(), [397263, 397263])
         self.assertAllEqual(
             sparse_int_shapes[0].eval(),
             features["sparse_categorical"].dense_shape.eval())
Esempio n. 4
0
 def testExtractFeaturesWithTransformation(self):
   """Tests feature extraction."""
   with self.test_session():
     features = {}
     features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
     features["sparse_float"] = sparse_tensor.SparseTensor(
         array_ops.zeros([2, 2], dtypes.int64),
         array_ops.zeros([2], dtypes.float32),
         array_ops.zeros([2], dtypes.int64))
     features["sparse_categorical"] = sparse_tensor.SparseTensor(
         array_ops.zeros([2, 2], dtypes.int64),
         array_ops.zeros(
             [2], dtypes.string), array_ops.zeros([2], dtypes.int64))
     feature_columns = set()
     feature_columns.add(layers.real_valued_column("dense_float"))
     feature_columns.add(
         layers.feature_column._real_valued_var_len_column(
             "sparse_float", is_sparse=True))
     feature_columns.add(
         feature_column_lib.sparse_column_with_hash_bucket(
             "sparse_categorical", hash_bucket_size=1000000))
     (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
      sparse_float_shapes, sparse_int_indices, sparse_int_values,
      sparse_int_shapes) = (gbdt_batch.extract_features(
          features, feature_columns))
     self.assertEqual(len(fc_names), 3)
     self.assertAllEqual(fc_names,
                         ["dense_float", "sparse_float", "sparse_categorical"])
     self.assertEqual(len(dense_floats), 1)
     self.assertEqual(len(sparse_float_indices), 1)
     self.assertEqual(len(sparse_float_values), 1)
     self.assertEqual(len(sparse_float_shapes), 1)
     self.assertEqual(len(sparse_int_indices), 1)
     self.assertEqual(len(sparse_int_values), 1)
     self.assertEqual(len(sparse_int_shapes), 1)
     self.assertAllEqual(dense_floats[0].eval(),
                         features["dense_float"].eval())
     self.assertAllEqual(sparse_float_indices[0].eval(),
                         features["sparse_float"].indices.eval())
     self.assertAllEqual(sparse_float_values[0].eval(),
                         features["sparse_float"].values.eval())
     self.assertAllEqual(sparse_float_shapes[0].eval(),
                         features["sparse_float"].dense_shape.eval())
     self.assertAllEqual(sparse_int_indices[0].eval(),
                         features["sparse_categorical"].indices.eval())
     self.assertAllEqual(sparse_int_values[0].eval(), [397263, 397263])
     self.assertAllEqual(sparse_int_shapes[0].eval(),
                         features["sparse_categorical"].dense_shape.eval())
def make_custom_export_strategy(name, convert_fn, feature_columns,
                                export_input_fn):
  """Makes custom exporter of GTFlow tree format.

  Args:
    name: A string, for the name of the export strategy.
    convert_fn: A function that converts the tree proto to desired format and
      saves it to the desired location.
    feature_columns: A list of feature columns.
    export_input_fn: A function that takes no arguments and returns an
      `InputFnOps`.

  Returns:
    An `ExportStrategy`.
  """
  base_strategy = saved_model_export_utils.make_export_strategy(
      serving_input_fn=export_input_fn)
  input_fn = export_input_fn()
  (sorted_feature_names, dense_floats, sparse_float_indices, _, _,
   sparse_int_indices, _, _) = gbdt_batch.extract_features(
       input_fn.features, feature_columns)

  def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None):
    """A wrapper to export to SavedModel, and convert it to other formats."""
    result_dir = base_strategy.export(estimator, export_dir,
                                      checkpoint_path,
                                      eval_result)
    with ops.Graph().as_default() as graph:
      with tf_session.Session(graph=graph) as sess:
        saved_model_loader.load(
            sess, [tag_constants.SERVING], result_dir)
        # Note: This is GTFlow internal API and might change.
        ensemble_model = graph.get_operation_by_name(
            "ensemble_model/TreeEnsembleSerialize")
        _, dfec_str = sess.run(ensemble_model.outputs)
        dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
        dtec.ParseFromString(dfec_str)
        # Export the result in the same folder as the saved model.
        convert_fn(dtec, sorted_feature_names, len(dense_floats),
                   len(sparse_float_indices), len(sparse_int_indices),
                   result_dir, eval_result)
    return result_dir
  return export_strategy.ExportStrategy(name, export_fn)
Esempio n. 6
0
 def testExtractFeatures(self):
   """Tests feature extraction."""
   with self.test_session():
     features = {}
     features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
     features["sparse_float"] = sparse_tensor.SparseTensor(
         array_ops.zeros([2, 2], dtypes.int64),
         array_ops.zeros([2], dtypes.float32),
         array_ops.zeros([2], dtypes.int64))
     features["sparse_int"] = sparse_tensor.SparseTensor(
         array_ops.zeros([2, 2], dtypes.int64),
         array_ops.zeros([2], dtypes.int64),
         array_ops.zeros([2], dtypes.int64))
     (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
      sparse_float_shapes, sparse_int_indices, sparse_int_values,
      sparse_int_shapes) = (gbdt_batch.extract_features(features, None))
     self.assertEqual(len(fc_names), 3)
     self.assertAllEqual(fc_names,
                         ["dense_float", "sparse_float", "sparse_int"])
     self.assertEqual(len(dense_floats), 1)
     self.assertEqual(len(sparse_float_indices), 1)
     self.assertEqual(len(sparse_float_values), 1)
     self.assertEqual(len(sparse_float_shapes), 1)
     self.assertEqual(len(sparse_int_indices), 1)
     self.assertEqual(len(sparse_int_values), 1)
     self.assertEqual(len(sparse_int_shapes), 1)
     self.assertAllEqual(dense_floats[0].eval(),
                         features["dense_float"].eval())
     self.assertAllEqual(sparse_float_indices[0].eval(),
                         features["sparse_float"].indices.eval())
     self.assertAllEqual(sparse_float_values[0].eval(),
                         features["sparse_float"].values.eval())
     self.assertAllEqual(sparse_float_shapes[0].eval(),
                         features["sparse_float"].dense_shape.eval())
     self.assertAllEqual(sparse_int_indices[0].eval(),
                         features["sparse_int"].indices.eval())
     self.assertAllEqual(sparse_int_values[0].eval(),
                         features["sparse_int"].values.eval())
     self.assertAllEqual(sparse_int_shapes[0].eval(),
                         features["sparse_int"].dense_shape.eval())
Esempio n. 7
0
 def testExtractFeatures(self):
     """Tests feature extraction."""
     with self.test_session():
         features = {}
         features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
         features["sparse_float"] = sparse_tensor.SparseTensor(
             array_ops.zeros([2, 2], dtypes.int64),
             array_ops.zeros([2], dtypes.float32),
             array_ops.zeros([2], dtypes.int64))
         features["sparse_int"] = sparse_tensor.SparseTensor(
             array_ops.zeros([2, 2], dtypes.int64),
             array_ops.zeros([2], dtypes.int64),
             array_ops.zeros([2], dtypes.int64))
         (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
          sparse_float_shapes, sparse_int_indices, sparse_int_values,
          sparse_int_shapes) = (gbdt_batch.extract_features(features, None))
         self.assertEqual(len(fc_names), 3)
         self.assertAllEqual(fc_names,
                             ["dense_float", "sparse_float", "sparse_int"])
         self.assertEqual(len(dense_floats), 1)
         self.assertEqual(len(sparse_float_indices), 1)
         self.assertEqual(len(sparse_float_values), 1)
         self.assertEqual(len(sparse_float_shapes), 1)
         self.assertEqual(len(sparse_int_indices), 1)
         self.assertEqual(len(sparse_int_values), 1)
         self.assertEqual(len(sparse_int_shapes), 1)
         self.assertAllEqual(dense_floats[0].eval(),
                             features["dense_float"].eval())
         self.assertAllEqual(sparse_float_indices[0].eval(),
                             features["sparse_float"].indices.eval())
         self.assertAllEqual(sparse_float_values[0].eval(),
                             features["sparse_float"].values.eval())
         self.assertAllEqual(sparse_float_shapes[0].eval(),
                             features["sparse_float"].dense_shape.eval())
         self.assertAllEqual(sparse_int_indices[0].eval(),
                             features["sparse_int"].indices.eval())
         self.assertAllEqual(sparse_int_values[0].eval(),
                             features["sparse_int"].values.eval())
         self.assertAllEqual(sparse_int_shapes[0].eval(),
                             features["sparse_int"].dense_shape.eval())
def make_custom_export_strategy(name,
                                convert_fn,
                                feature_columns,
                                export_input_fn,
                                use_core_columns=False,
                                feature_engineering_fn=None,
                                default_output_alternative_key=None):
    """Makes custom exporter of GTFlow tree format.

  Args:
    name: A string, for the name of the export strategy.
    convert_fn: A function that converts the tree proto to desired format and
      saves it to the desired location. Can be None to skip conversion.
    feature_columns: A list of feature columns.
    export_input_fn: A function that takes no arguments and returns an
      `InputFnOps`.
    use_core_columns: A boolean, whether core feature columns were used.
    feature_engineering_fn: Feature eng function to be called on the input.
    default_output_alternative_key: the name of the head to serve when an
      incoming serving request does not explicitly request a specific head.
      Not needed for single-headed models.

  Returns:
    An `ExportStrategy`.
  """
    base_strategy = saved_model_export_utils.make_export_strategy(
        serving_input_fn=export_input_fn,
        strip_default_attrs=True,
        default_output_alternative_key=default_output_alternative_key)
    input_fn = export_input_fn()
    features = input_fn.features
    if feature_engineering_fn is not None:
        features, _ = feature_engineering_fn(features, labels=None)
    (sorted_feature_names, dense_floats, sparse_float_indices, _, _,
     sparse_int_indices, _,
     _) = gbdt_batch.extract_features(features, feature_columns,
                                      use_core_columns)

    def export_fn(estimator,
                  export_dir,
                  checkpoint_path=None,
                  eval_result=None):
        """A wrapper to export to SavedModel, and convert it to other formats."""
        result_dir = base_strategy.export(estimator, export_dir,
                                          checkpoint_path, eval_result)
        with ops.Graph().as_default() as graph:
            with tf_session.Session(graph=graph) as sess:
                saved_model_loader.load(sess, [tag_constants.SERVING],
                                        result_dir)
                # Note: This is GTFlow internal API and might change.
                ensemble_model = graph.get_operation_by_name(
                    "ensemble_model/TreeEnsembleSerialize")
                _, dfec_str = sess.run(ensemble_model.outputs)
                dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
                dtec.ParseFromString(dfec_str)
                # Export the result in the same folder as the saved model.
                if convert_fn:
                    convert_fn(dtec, sorted_feature_names, len(dense_floats),
                               len(sparse_float_indices),
                               len(sparse_int_indices), result_dir,
                               eval_result)
                feature_importances = _get_feature_importances(
                    dtec, sorted_feature_names, len(dense_floats),
                    len(sparse_float_indices), len(sparse_int_indices))
                sorted_by_importance = sorted(feature_importances.items(),
                                              key=lambda x: -x[1])
                assets_dir = os.path.join(compat.as_bytes(result_dir),
                                          compat.as_bytes("assets.extra"))
                gfile.MakeDirs(assets_dir)
                with gfile.GFile(
                        os.path.join(compat.as_bytes(assets_dir),
                                     compat.as_bytes("feature_importances")),
                        "w") as f:
                    f.write("\n".join("%s, %f" % (k, v)
                                      for k, v in sorted_by_importance))
        return result_dir

    return export_strategy.ExportStrategy(name,
                                          export_fn,
                                          strip_default_attrs=True)