def _test_ckpt_converter(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, dnn_optimizer, linear_optimizer): # Create checkpoint in CannedEstimator v1. linear_feature_columns_v1 = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns_v1 = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] est_v1 = dnn_linear_combined.DNNLinearCombinedEstimator( head=head_lib._regression_head(label_dimension=label_dimension), linear_feature_columns=linear_feature_columns_v1, dnn_feature_columns=dnn_feature_columns_v1, dnn_hidden_units=(2, 2), model_dir=self._old_ckpt_dir, dnn_optimizer=dnn_optimizer, linear_optimizer=linear_optimizer) # Train num_steps = 10 est_v1.train(train_input_fn, steps=num_steps) self.assertIsNotNone(est_v1.latest_checkpoint()) self.assertTrue(est_v1.latest_checkpoint().startswith(self._old_ckpt_dir)) # Convert checkpoint from v1 to v2. source_checkpoint = os.path.join(self._old_ckpt_dir, 'model.ckpt-10') source_graph = os.path.join(self._old_ckpt_dir, 'graph.pbtxt') target_checkpoint = os.path.join(self._new_ckpt_dir, 'model.ckpt-10') checkpoint_converter.convert_checkpoint('combined', source_checkpoint, source_graph, target_checkpoint) # Create CannedEstimator V2 and restore from the converted checkpoint. linear_feature_columns_v2 = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns_v2 = [ tf.feature_column.numeric_column('x', shape=(input_dimension,)) ] est_v2 = dnn_linear_combined.DNNLinearCombinedEstimatorV2( head=regression_head.RegressionHead(label_dimension=label_dimension), linear_feature_columns=linear_feature_columns_v2, dnn_feature_columns=dnn_feature_columns_v2, dnn_hidden_units=(2, 2), model_dir=self._new_ckpt_dir, dnn_optimizer=dnn_optimizer, linear_optimizer=linear_optimizer) # Train extra_steps = 10 est_v2.train(train_input_fn, steps=extra_steps) self.assertIsNotNone(est_v2.latest_checkpoint()) self.assertTrue(est_v2.latest_checkpoint().startswith(self._new_ckpt_dir)) # Make sure estimator v2 restores from the converted checkpoint, and # continues training extra steps. self.assertEqual(num_steps + extra_steps, est_v2.get_variable_value(tf.compat.v1.GraphKeys.GLOBAL_STEP))
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size): feature_columns = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] est = dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _testCheckpointCompatibleWithNonAnnotatedEstimator( self, train_input_fn, predict_input_fn, non_annotated_class, annotated_class, prediction_key, estimator_args): input_dimension = 2 feature_columns = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] estimator = non_annotated_class( model_dir=self._model_dir, hidden_units=(2, 2), feature_columns=feature_columns, **estimator_args) estimator.train(train_input_fn, steps=10) predictions = np.array( [x[prediction_key] for x in estimator.predict(predict_input_fn)]) annotated_estimator = annotated_class( model_dir=self._model_dir, hidden_units=(2, 2), feature_columns=feature_columns, warm_start_from=self._model_dir, **estimator_args) annotated_predictions = np.array([ x[prediction_key] for x in annotated_estimator.predict(predict_input_fn) ]) self.assertAllEqual(predictions.shape, annotated_predictions.shape) for i, (a, b) in enumerate( zip(predictions.flatten(), annotated_predictions.flatten())): self.assertAlmostEqual(a, b, msg='index=%d' % i)
def _test_parsed_sequence_example(self, col_name, col_fn, col_arg, shape, values): """Helper function to check that each FeatureColumn parses correctly. Args: col_name: string, name to give to the feature column. Should match the name that the column will parse out of the features dict. col_fn: function used to create the feature column. For example, sequence_numeric_column. col_arg: second arg that the target feature column is expecting. shape: the expected dense_shape of the feature after parsing into a SparseTensor. values: the expected values at index [0, 2, 6] of the feature after parsing into a SparseTensor. """ example = _make_sequence_example() columns = [ fc._categorical_column_with_identity('int_ctx', num_buckets=100), fc._numeric_column('float_ctx'), col_fn(col_name, col_arg) ] context, seq_features = parsing_ops.parse_single_sequence_example( example.SerializeToString(), context_features=fc.make_parse_example_spec(columns[:2]), sequence_features=fc.make_parse_example_spec(columns[2:])) with self.cached_session() as sess: ctx_result, seq_result = sess.run([context, seq_features]) self.assertEqual(list(seq_result[col_name].dense_shape), shape) self.assertEqual(list(seq_result[col_name].values[[0, 2, 6]]), values) self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) self.assertEqual(ctx_result['int_ctx'].values[0], 5) self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
def _test_parsed_sequence_example( self, col_name, col_fn, col_arg, shape, values): """Helper function to check that each FeatureColumn parses correctly. Args: col_name: string, name to give to the feature column. Should match the name that the column will parse out of the features dict. col_fn: function used to create the feature column. For example, sequence_numeric_column. col_arg: second arg that the target feature column is expecting. shape: the expected dense_shape of the feature after parsing into a SparseTensor. values: the expected values at index [0, 2, 6] of the feature after parsing into a SparseTensor. """ example = _make_sequence_example() columns = [ fc._categorical_column_with_identity('int_ctx', num_buckets=100), fc._numeric_column('float_ctx'), col_fn(col_name, col_arg) ] context, seq_features = parsing_ops.parse_single_sequence_example( example.SerializeToString(), context_features=fc.make_parse_example_spec(columns[:2]), sequence_features=fc.make_parse_example_spec(columns[2:])) with self.cached_session() as sess: ctx_result, seq_result = sess.run([context, seq_features]) self.assertEqual(list(seq_result[col_name].dense_shape), shape) self.assertEqual( list(seq_result[col_name].values[[0, 2, 6]]), values) self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) self.assertEqual(ctx_result['int_ctx'].values[0], 5) self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
def _test_complete_flow_linear_fc_v1(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size, fc_impl): del fc_impl linear_feature_columns = [ feature_column._numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column_v2.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns feature_spec = feature_column.make_parse_example_spec(feature_columns) self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns, feature_spec, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size)
def _build_feature_columns(self): col = fc._categorical_column_with_identity('int_ctx', num_buckets=100) ctx_cols = [ fc._embedding_column(col, dimension=10), fc._numeric_column('float_ctx') ] identity_col = sfc.sequence_categorical_column_with_identity( 'int_list', num_buckets=10) bucket_col = sfc.sequence_categorical_column_with_hash_bucket( 'bytes_list', hash_bucket_size=100) seq_cols = [ fc._embedding_column(identity_col, dimension=10), fc._embedding_column(bucket_col, dimension=20) ] return ctx_cols, seq_cols
def _build_feature_columns(self): col = fc._categorical_column_with_identity('int_ctx', num_buckets=100) ctx_cols = [ fc._embedding_column(col, dimension=10), fc._numeric_column('float_ctx') ] identity_col = sfc.sequence_categorical_column_with_identity( 'int_list', num_buckets=10) bucket_col = sfc.sequence_categorical_column_with_hash_bucket( 'bytes_list', hash_bucket_size=100) seq_cols = [ fc._embedding_column(identity_col, dimension=10), fc._embedding_column(bucket_col, dimension=20) ] return ctx_cols, seq_cols
def _testAnnotationsPresentForEstimator(self, estimator_class): feature_columns = [ feature_column._numeric_column('x', shape=(1,)), feature_column._embedding_column( feature_column._categorical_column_with_vocabulary_list( 'y', vocabulary_list=['a', 'b', 'c']), dimension=3) ] estimator = estimator_class( hidden_units=(2, 2), feature_columns=feature_columns, model_dir=self._model_dir) model_fn = estimator.model_fn graph = ops.Graph() with graph.as_default(): model_fn({ 'x': array_ops.constant([1.0]), 'y': array_ops.constant(['a']) }, {}, model_fn_lib.ModeKeys.PREDICT, config=None) unprocessed_features = self._getLayerAnnotationCollection( graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames .UNPROCESSED_FEATURES) processed_features = self._getLayerAnnotationCollection( graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames .PROCESSED_FEATURES) feature_columns = graph.get_collection( dnn_with_layer_annotations.LayerAnnotationsCollectionNames .FEATURE_COLUMNS) self.assertItemsEqual(unprocessed_features.keys(), ['x', 'y']) self.assertEqual(2, len(processed_features.keys())) self.assertEqual(2, len(feature_columns))