コード例 #1
0
  def _test_ckpt_converter(self, train_input_fn, eval_input_fn,
                           predict_input_fn, input_dimension, label_dimension,
                           batch_size, dnn_optimizer, linear_optimizer):

    # Create checkpoint in CannedEstimator v1.
    linear_feature_columns_v1 = [
        feature_column._numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns_v1 = [
        feature_column._numeric_column('x', shape=(input_dimension,))
    ]
    est_v1 = dnn_linear_combined.DNNLinearCombinedEstimator(
        head=head_lib._regression_head(label_dimension=label_dimension),
        linear_feature_columns=linear_feature_columns_v1,
        dnn_feature_columns=dnn_feature_columns_v1,
        dnn_hidden_units=(2, 2),
        model_dir=self._old_ckpt_dir,
        dnn_optimizer=dnn_optimizer,
        linear_optimizer=linear_optimizer)
    # Train
    num_steps = 10
    est_v1.train(train_input_fn, steps=num_steps)
    self.assertIsNotNone(est_v1.latest_checkpoint())
    self.assertTrue(est_v1.latest_checkpoint().startswith(self._old_ckpt_dir))

    # Convert checkpoint from v1 to v2.
    source_checkpoint = os.path.join(self._old_ckpt_dir, 'model.ckpt-10')
    source_graph = os.path.join(self._old_ckpt_dir, 'graph.pbtxt')
    target_checkpoint = os.path.join(self._new_ckpt_dir, 'model.ckpt-10')
    checkpoint_converter.convert_checkpoint('combined', source_checkpoint,
                                            source_graph, target_checkpoint)

    # Create CannedEstimator V2 and restore from the converted checkpoint.
    linear_feature_columns_v2 = [
        tf.feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns_v2 = [
        tf.feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    est_v2 = dnn_linear_combined.DNNLinearCombinedEstimatorV2(
        head=regression_head.RegressionHead(label_dimension=label_dimension),
        linear_feature_columns=linear_feature_columns_v2,
        dnn_feature_columns=dnn_feature_columns_v2,
        dnn_hidden_units=(2, 2),
        model_dir=self._new_ckpt_dir,
        dnn_optimizer=dnn_optimizer,
        linear_optimizer=linear_optimizer)
    # Train
    extra_steps = 10
    est_v2.train(train_input_fn, steps=extra_steps)
    self.assertIsNotNone(est_v2.latest_checkpoint())
    self.assertTrue(est_v2.latest_checkpoint().startswith(self._new_ckpt_dir))
    # Make sure estimator v2 restores from the converted checkpoint, and
    # continues training extra steps.
    self.assertEqual(num_steps + extra_steps,
                     est_v2.get_variable_value(tf.compat.v1.GraphKeys.GLOBAL_STEP))
コード例 #2
0
  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
                          input_dimension, n_classes, batch_size):
    feature_columns = [
        feature_column._numeric_column('x', shape=(input_dimension,))
    ]
    est = dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        n_classes=n_classes,
        model_dir=self._model_dir)

    # TRAIN
    num_steps = 10
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predicted_proba = np.array([
        x[prediction_keys.PredictionKeys.PROBABILITIES]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
コード例 #3
0
  def _testCheckpointCompatibleWithNonAnnotatedEstimator(
      self, train_input_fn, predict_input_fn, non_annotated_class,
      annotated_class, prediction_key, estimator_args):
    input_dimension = 2
    feature_columns = [
        feature_column._numeric_column('x', shape=(input_dimension,))
    ]
    estimator = non_annotated_class(
        model_dir=self._model_dir,
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        **estimator_args)

    estimator.train(train_input_fn, steps=10)

    predictions = np.array(
        [x[prediction_key] for x in estimator.predict(predict_input_fn)])

    annotated_estimator = annotated_class(
        model_dir=self._model_dir,
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        warm_start_from=self._model_dir,
        **estimator_args)

    annotated_predictions = np.array([
        x[prediction_key] for x in annotated_estimator.predict(predict_input_fn)
    ])

    self.assertAllEqual(predictions.shape, annotated_predictions.shape)
    for i, (a, b) in enumerate(
        zip(predictions.flatten(), annotated_predictions.flatten())):
      self.assertAlmostEqual(a, b, msg='index=%d' % i)
コード例 #4
0
    def _test_parsed_sequence_example(self, col_name, col_fn, col_arg, shape,
                                      values):
        """Helper function to check that each FeatureColumn parses correctly.

    Args:
      col_name: string, name to give to the feature column. Should match
        the name that the column will parse out of the features dict.
      col_fn: function used to create the feature column. For example,
        sequence_numeric_column.
      col_arg: second arg that the target feature column is expecting.
      shape: the expected dense_shape of the feature after parsing into
        a SparseTensor.
      values: the expected values at index [0, 2, 6] of the feature
        after parsing into a SparseTensor.
    """
        example = _make_sequence_example()
        columns = [
            fc._categorical_column_with_identity('int_ctx', num_buckets=100),
            fc._numeric_column('float_ctx'),
            col_fn(col_name, col_arg)
        ]
        context, seq_features = parsing_ops.parse_single_sequence_example(
            example.SerializeToString(),
            context_features=fc.make_parse_example_spec(columns[:2]),
            sequence_features=fc.make_parse_example_spec(columns[2:]))

        with self.cached_session() as sess:
            ctx_result, seq_result = sess.run([context, seq_features])
            self.assertEqual(list(seq_result[col_name].dense_shape), shape)
            self.assertEqual(list(seq_result[col_name].values[[0, 2, 6]]),
                             values)
            self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
            self.assertEqual(ctx_result['int_ctx'].values[0], 5)
            self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
            self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
コード例 #5
0
  def _test_parsed_sequence_example(
      self, col_name, col_fn, col_arg, shape, values):
    """Helper function to check that each FeatureColumn parses correctly.

    Args:
      col_name: string, name to give to the feature column. Should match
        the name that the column will parse out of the features dict.
      col_fn: function used to create the feature column. For example,
        sequence_numeric_column.
      col_arg: second arg that the target feature column is expecting.
      shape: the expected dense_shape of the feature after parsing into
        a SparseTensor.
      values: the expected values at index [0, 2, 6] of the feature
        after parsing into a SparseTensor.
    """
    example = _make_sequence_example()
    columns = [
        fc._categorical_column_with_identity('int_ctx', num_buckets=100),
        fc._numeric_column('float_ctx'),
        col_fn(col_name, col_arg)
    ]
    context, seq_features = parsing_ops.parse_single_sequence_example(
        example.SerializeToString(),
        context_features=fc.make_parse_example_spec(columns[:2]),
        sequence_features=fc.make_parse_example_spec(columns[2:]))

    with self.cached_session() as sess:
      ctx_result, seq_result = sess.run([context, seq_features])
      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
      self.assertEqual(
          list(seq_result[col_name].values[[0, 2, 6]]), values)
      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
コード例 #6
0
 def _test_complete_flow_linear_fc_v1(self, train_input_fn, eval_input_fn,
                                      predict_input_fn, input_dimension,
                                      label_dimension, batch_size, fc_impl):
   del fc_impl
   linear_feature_columns = [
       feature_column._numeric_column('x', shape=(input_dimension,))
   ]
   dnn_feature_columns = [
       feature_column_v2.numeric_column('x', shape=(input_dimension,))
   ]
   feature_columns = linear_feature_columns + dnn_feature_columns
   feature_spec = feature_column.make_parse_example_spec(feature_columns)
   self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
                                   feature_spec, train_input_fn, eval_input_fn,
                                   predict_input_fn, input_dimension,
                                   label_dimension, batch_size)
コード例 #7
0
    def _build_feature_columns(self):
        col = fc._categorical_column_with_identity('int_ctx', num_buckets=100)
        ctx_cols = [
            fc._embedding_column(col, dimension=10),
            fc._numeric_column('float_ctx')
        ]

        identity_col = sfc.sequence_categorical_column_with_identity(
            'int_list', num_buckets=10)
        bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
            'bytes_list', hash_bucket_size=100)
        seq_cols = [
            fc._embedding_column(identity_col, dimension=10),
            fc._embedding_column(bucket_col, dimension=20)
        ]

        return ctx_cols, seq_cols
コード例 #8
0
  def _build_feature_columns(self):
    col = fc._categorical_column_with_identity('int_ctx', num_buckets=100)
    ctx_cols = [
        fc._embedding_column(col, dimension=10),
        fc._numeric_column('float_ctx')
    ]

    identity_col = sfc.sequence_categorical_column_with_identity(
        'int_list', num_buckets=10)
    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
        'bytes_list', hash_bucket_size=100)
    seq_cols = [
        fc._embedding_column(identity_col, dimension=10),
        fc._embedding_column(bucket_col, dimension=20)
    ]

    return ctx_cols, seq_cols
コード例 #9
0
  def _testAnnotationsPresentForEstimator(self, estimator_class):
    feature_columns = [
        feature_column._numeric_column('x', shape=(1,)),
        feature_column._embedding_column(
            feature_column._categorical_column_with_vocabulary_list(
                'y', vocabulary_list=['a', 'b', 'c']),
            dimension=3)
    ]
    estimator = estimator_class(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        model_dir=self._model_dir)
    model_fn = estimator.model_fn

    graph = ops.Graph()
    with graph.as_default():
      model_fn({
          'x': array_ops.constant([1.0]),
          'y': array_ops.constant(['a'])
      }, {},
               model_fn_lib.ModeKeys.PREDICT,
               config=None)

      unprocessed_features = self._getLayerAnnotationCollection(
          graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames
          .UNPROCESSED_FEATURES)
      processed_features = self._getLayerAnnotationCollection(
          graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames
          .PROCESSED_FEATURES)
      feature_columns = graph.get_collection(
          dnn_with_layer_annotations.LayerAnnotationsCollectionNames
          .FEATURE_COLUMNS)

      self.assertItemsEqual(unprocessed_features.keys(), ['x', 'y'])
      self.assertEqual(2, len(processed_features.keys()))
      self.assertEqual(2, len(feature_columns))