def _get_estimator(self, train_distribute, eval_distribute, remote_cluster=None): input_dimension = LABEL_DIMENSION linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] return dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=LABEL_DIMENSION, model_dir=self._model_dir, dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=train_distribute, eval_distribute=eval_distribute, remote_cluster=remote_cluster)))
def test_parse_features(self): """Tests the various behaviours of kmeans._parse_features_if_necessary.""" # No-op if a tensor is passed in. features = constant_op.constant(self.points) parsed_features = kmeans_lib._parse_features_if_necessary(features, None) self.assertAllEqual(features, parsed_features) # All values from a feature dict are transformed into a tensor. feature_dict = { 'x': [[point[0]] for point in self.points], 'y': [[point[1]] for point in self.points] } parsed_feature_dict = kmeans_lib._parse_features_if_necessary( feature_dict, None) self._parse_feature_dict_helper(features, parsed_feature_dict) # Only the feature_columns of a feature dict are transformed into a tensor. feature_dict_with_extras = { 'foo': 'bar', 'x': [[point[0]] for point in self.points], 'baz': {'fizz': 'buzz'}, 'y': [[point[1]] for point in self.points] } feature_columns = [fc.numeric_column(key='x'), fc.numeric_column(key='y')] parsed_feature_dict = kmeans_lib._parse_features_if_necessary( feature_dict_with_extras, feature_columns) self._parse_feature_dict_helper(features, parsed_feature_dict)
def testTrainEvaluateInferDoesNotThrowErrorWithNoDnnInput(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreDNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=False, tree_feature_columns=[core_feature_column.numeric_column("x")]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) # 10 steps for dnn, 3 for 1 tree of depth 3 + 1 after the tree finished self._assert_checkpoint(est.model_dir, global_step=15) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn)
def testRankingDontThrowExceptionForForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) est = estimator.CoreGradientBoostedDecisionTreeRanker( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[ core_feature_column.numeric_column("f1"), core_feature_column.numeric_column("f2") ], ranking_model_pair_keys=("a", "b")) # Train for a few steps. est.train(input_fn=_ranking_train_input_fn, steps=1000) est.evaluate(input_fn=_ranking_train_input_fn, steps=1) est.predict(input_fn=_infer_ranking_train_input_fn)
def testWithFeatureColumns(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, feature_columns=[core_feature_column.numeric_column('x')]) iris = base.load_iris() data = {'x': iris.data.astype(np.float32)} labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn( x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss'])
def testFitAndEvaluateMultiClassFullDontThrowException(self): n_classes = 3 learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = n_classes learner_config.constraints.max_tree_depth = 1 learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.FULL_HESSIAN) head_fn = estimator.core_multiclass_head(n_classes=n_classes) model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.CoreGradientBoostedDecisionTreeEstimator( learner_config=learner_config, head=head_fn, num_trees=1, center_bias=False, examples_per_layer=7, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")]) classifier.train(input_fn=_multiclass_train_input_fn, steps=100) classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1) classifier.predict(input_fn=_eval_input_fn)
def test_linear_model_numpy_input_fn(self): price = fc.numeric_column('price') price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,]) body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([-1., 2., 13., 104.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.linear_model(features, [price_buckets, body_style]) # self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) bias = self._get_linear_model_bias() price_buckets_var = self._get_linear_model_column_var(price_buckets) body_style_var = self._get_linear_model_column_var(body_style) sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_ar_lstm_regressor(self): dtype = dtypes.float32 model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) estimator = estimators.LSTMAutoRegressor( periodicities=10, input_window_size=10, output_window_size=6, model_dir=model_dir, num_features=1, extra_feature_columns=exogenous_feature_columns, num_units=10, config=_SeedRunConfig()) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values, "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=1) evaluation = estimator.evaluate( input_fn=eval_input_fn, steps=1) self.assertAllEqual(evaluation["loss"], evaluation["average_loss"]) self.assertAllEqual([], evaluation["loss"].shape)
def testFitAndEvaluateDontThrowExceptionWithCore(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() # Use core head head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) classifier = estimator.DNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], # Use core feature columns dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, tree_feature_columns=[], use_core_versions=True) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features,)) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length,) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length,) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length,), dtype=dtypes.string) else: # VarLenFeature raise ValueError("VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder( shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example( serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver( features, {"examples": tfexamples})
def test_subclassed_model_with_feature_columns(self): col_a = fc.numeric_column('a') col_b = fc.numeric_column('b') dnn_model = TestDNNModel([col_a, col_b], 20) dnn_model.compile( optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=testing_utils.should_run_eagerly()) x = {'a': np.random.random((10, 1)), 'b': np.random.random((10, 1))} y = np.random.randint(20, size=(10, 1)) y = keras.utils.to_categorical(y, num_classes=20) dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) dnn_model.evaluate(x=x, y=y, batch_size=5) dnn_model.predict(x=x, batch_size=5)
def test_subclassed_model_with_feature_columns_with_ds_input(self): col_a = fc.numeric_column('a') col_b = fc.numeric_column('b') dnn_model = TestDNNModel([col_a, col_b], 20) dnn_model.compile( optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=testing_utils.should_run_eagerly()) y = np.random.randint(20, size=(100, 1)) y = keras.utils.to_categorical(y, num_classes=20) x = {'a': np.random.random((100, 1)), 'b': np.random.random((100, 1))} ds1 = dataset_ops.Dataset.from_tensor_slices(x) ds2 = dataset_ops.Dataset.from_tensor_slices(y) ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5) dnn_model.fit(ds, steps_per_epoch=1) dnn_model.fit(ds, steps_per_epoch=1) dnn_model.evaluate(ds, steps=1) dnn_model.predict(ds, steps=1)
def DISABLED_test_function_model_feature_layer_input(self): col_a = fc.numeric_column('a') col_b = fc.numeric_column('b') feature_layer = fc.DenseFeatures([col_a, col_b], name='fc') dense = keras.layers.Dense(4) # This seems problematic.... We probably need something for DenseFeatures # the way Input is for InputLayer. output = dense(feature_layer) model = keras.models.Model([feature_layer], [output]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile( optimizer, loss, metrics=[metrics_module.CategoricalAccuracy(), 'mae'], loss_weights=loss_weights) data = ({'a': np.arange(10), 'b': np.arange(10)}, np.arange(10, 20)) print(model.fit(*data, epochs=1))
def testTrainEvaluateWithDnnForInputAndTreeForPredict(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreDNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, predict_with_tree_only=True, dnn_to_tree_distillation_param=(0.5, None), tree_feature_columns=[]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn) serving_input_fn = ( export.build_parsing_serving_input_receiver_fn( feature_spec={"x": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32)})) base_exporter = exporter.FinalExporter( name="Servo", serving_input_receiver_fn=serving_input_fn, assets_extra=None) export_path = os.path.join(model_dir, "export") base_exporter.export( est, export_path=export_path, checkpoint_path=None, eval_result={}, is_the_final_export=True)
def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = fc.numeric_column('price') body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = fc.embedding_column(body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.input_layer(features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual( [[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads)
def testFitAndEvaluateDontThrowExceptionWithCoreForClassifier(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], use_core_libs=True) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) classifier.export(self._export_dir_base)
def test_sequential_model(self): columns = [fc.numeric_column('a')] model = keras.models.Sequential([ fc.DenseFeatures(columns), keras.layers.Dense(64, activation='relu'), keras.layers.Dense(20, activation='softmax') ]) model.compile( optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=testing_utils.should_run_eagerly()) x = {'a': np.random.random((10, 1))} y = np.random.randint(20, size=(10, 1)) y = keras.utils.to_categorical(y, num_classes=20) model.fit(x, y, epochs=1, batch_size=5) model.fit(x, y, epochs=1, batch_size=5) model.evaluate(x, y, batch_size=5) model.predict(x, batch_size=5)
def test_sequential_model_with_ds_input(self): columns = [fc.numeric_column('a')] model = keras.models.Sequential([ fc.DenseFeatures(columns), keras.layers.Dense(64, activation='relu'), keras.layers.Dense(20, activation='softmax') ]) model.compile( optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=testing_utils.should_run_eagerly()) y = np.random.randint(20, size=(100, 1)) y = keras.utils.to_categorical(y, num_classes=20) x = {'a': np.random.random((100, 1))} ds1 = dataset_ops.Dataset.from_tensor_slices(x) ds2 = dataset_ops.Dataset.from_tensor_slices(y) ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5) model.fit(ds, steps_per_epoch=1) model.fit(ds, steps_per_epoch=1) model.evaluate(ds, steps=1) model.predict(ds, steps=1)
def testTrainEvaluateInferDoesNotThrowError(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreGradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) est.evaluate(input_fn=_eval_input_fn, steps=1) est.predict(input_fn=_eval_input_fn)
def test_sequential_model_with_crossed_column(self): feature_columns = [] age_buckets = fc.bucketized_column( fc.numeric_column('age'), boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) feature_columns.append(age_buckets) # indicator cols thal = fc.categorical_column_with_vocabulary_list( 'thal', ['fixed', 'normal', 'reversible']) crossed_feature = fc.crossed_column([age_buckets, thal], hash_bucket_size=1000) crossed_feature = fc.indicator_column(crossed_feature) feature_columns.append(crossed_feature) feature_layer = fc.DenseFeatures(feature_columns) model = keras.models.Sequential([ feature_layer, keras.layers.Dense(128, activation='relu'), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(1, activation='sigmoid') ]) age_data = np.random.randint(10, 100, size=100) thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100) inp_x = {'age': age_data, 'thal': thal_data} inp_y = np.random.randint(0, 1, size=100) ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'],) model.fit(ds, epochs=1) model.fit(ds, epochs=1) model.evaluate(ds) model.predict(ds)
def test_saving_with_dense_features(self): cols = [ feature_column_lib.numeric_column('a'), feature_column_lib.indicator_column( feature_column_lib.categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(1, ), name='a'), 'b': keras.layers.Input(shape=(1, ), name='b', dtype='string') } fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile(loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype('str') with self.cached_session(): # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }), 10)
def test_raise_error_with_multi_worker(self): tf_config = { 'cluster': { run_config_lib.TaskType.CHIEF: ['host0:0'], run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'] }, 'task': { 'type': run_config_lib.TaskType.CHIEF, 'index': 0 } } with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): dnn = estimator_lib.DNNClassifier( feature_columns=[feature_column_lib.numeric_column('x')], hidden_units=[3, 1]) def eval_input_fn(): pass with self.assertRaisesRegexp(ValueError, 'supports only single machine'): hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn)
def test_string_input(self): x = { 'age': np.random.random((1024, 1)), 'cabin': np.array(['a'] * 1024) } y = np.random.randint(2, size=(1024, 1)) ds1 = dataset_ops.Dataset.from_tensor_slices(x) ds2 = dataset_ops.Dataset.from_tensor_slices(y) dataset = dataset_ops.Dataset.zip((ds1, ds2)).batch(4) categorical_cols = [ fc.categorical_column_with_hash_bucket('cabin', 10) ] feature_cols = ([fc.numeric_column('age')] + [fc.indicator_column(cc) for cc in categorical_cols]) layers = [ fc.DenseFeatures(feature_cols), keras.layers.Dense(128), keras.layers.Dense(1) ] model = keras.models.Sequential(layers) model.compile(keras.optimizers.SGD(0.1), loss=keras.losses.BinaryCrossentropy()) model.fit(dataset)
def test_forward_in_exported(self): def serving_input_fn(): features_ph = { 'x': array_ops.placeholder(dtypes.float32, [None]), 'id': array_ops.placeholder(dtypes.int32, [None]) } features = { key: array_ops.expand_dims(tensor, -1) for key, tensor in features_ph.items() } return estimator_lib.export.ServingInputReceiver( features, features_ph) def input_fn(): return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]] # create estimator feature_columns = [fc.numeric_column('x')] estimator = linear.LinearRegressor(feature_columns) estimator.train(input_fn=input_fn, steps=1) estimator = extenders.forward_features(estimator, 'id') # export saved model export_dir, tmpdir = self._export_estimator(estimator, serving_input_fn) # restore model predict_fn = from_saved_model(export_dir, signature_def_key='predict') predictions = predict_fn({'x': [3], 'id': [101]}) # verify that 'id' exists in predictions self.assertIn('id', predictions) self.assertEqual(101, predictions['id']) # Clean up. gfile.DeleteRecursively(tmpdir)
def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() # Use core head head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) model = estimator.GradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], use_core_libs=True) model.fit(input_fn=_train_input_fn, steps=15) model.evaluate(input_fn=_eval_input_fn, steps=1) model.export(self._export_dir_base)
def test_sequential_model_with_ds_input(self): columns = [fc.numeric_column('a')] model = keras.models.Sequential([ fc.DenseFeatures(columns), keras.layers.Dense(64, activation='relu'), keras.layers.Dense(20, activation='softmax') ]) model.compile( optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=testing_utils.should_run_eagerly(), experimental_run_tf_function=testing_utils.should_run_tf_function()) y = np.random.randint(20, size=(100, 1)) y = keras.utils.to_categorical(y, num_classes=20) x = {'a': np.random.random((100, 1))} ds1 = dataset_ops.Dataset.from_tensor_slices(x) ds2 = dataset_ops.Dataset.from_tensor_slices(y) ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5) model.fit(ds, steps_per_epoch=1) model.fit(ds, steps_per_epoch=1) model.evaluate(ds, steps=1) model.predict(ds, steps=1)
def test_key_should_be_list_of_string(self): estimator = linear.LinearRegressor([fc.numeric_column('x')]) with self.assertRaisesRegexp(TypeError, 'should be a string'): extenders.forward_features(estimator, ['x', estimator])
def test_one_shot_prediction_head_export(self, estimator_factory): def _new_temp_dir(): return os.path.join(test.get_temp_dir(), str(ops.uid())) model_dir = _new_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column( "2d_exogenous_feature", shape=(2,)), feature_column.embedding_column( categorical_column=categorical_column, dimension=10)] estimator = estimator_factory( model_dir=model_dir, exogenous_feature_columns=exogenous_feature_columns, head_type=ts_head_lib.OneShotPredictionHead) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange( 20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array( ["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) result = estimator.evaluate(input_fn=train_input_fn, steps=1) self.assertIn("average_loss", result) self.assertNotIn(feature_keys.State.STATE_TUPLE, result) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_saved_model(_new_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load( session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual( self, [feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature"], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile(numpy.array( ["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items()} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) # Build a parsing input function, then make a tf.Example for it to parse. export_location = estimator.export_saved_model( _new_temp_dir(), estimator.build_one_shot_parsing_serving_input_receiver_fn( filtering_length=20, prediction_length=15)) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: example = example_pb2.Example() times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES] values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES] times.int64_list.value.extend(range(35)) for i in range(20): values.float_list.value.extend( [float(i) * 2. + feature_number for feature_number in range(5)]) real_feature = example.features.feature["2d_exogenous_feature"] categortical_feature = example.features.feature[ "categorical_exogenous_feature"] for i in range(35): real_feature.float_list.value.extend([1, 1]) categortical_feature.bytes_list.value.append(b"strkey") # Serialize the tf.Example for feeding to the Session examples = [example.SerializeToString()] * 2 signatures = loader.load( session, [tag_constants.SERVING], export_location) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] ((_, input_value),) = predict_signature.inputs.items() feeds = {graph.as_graph_element(input_value.name): examples} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto( log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _fit_restore_fit_test_template(self, estimator_fn, dtype): """Tests restoring previously fit models.""" model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) first_estimator = estimator_fn(model_dir, exogenous_feature_columns) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values, "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) first_estimator.train(input_fn=train_input_fn, steps=1) first_evaluation = first_estimator.evaluate( input_fn=eval_input_fn, steps=1) first_loss_before_fit = first_evaluation["loss"] self.assertAllEqual(first_loss_before_fit, first_evaluation["average_loss"]) self.assertAllEqual([], first_loss_before_fit.shape) first_estimator.train(input_fn=train_input_fn, steps=1) first_loss_after_fit = first_estimator.evaluate( input_fn=eval_input_fn, steps=1)["loss"] self.assertAllEqual([], first_loss_after_fit.shape) second_estimator = estimator_fn(model_dir, exogenous_feature_columns) second_estimator.train(input_fn=train_input_fn, steps=1) whole_dataset_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) whole_dataset_evaluation = second_estimator.evaluate( input_fn=whole_dataset_input_fn, steps=1) exogenous_values_ten_steps = { "exogenous": numpy.arange( 10, dtype=dtype.as_numpy_dtype)[None, :, None] } predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation=whole_dataset_evaluation, exogenous_features=exogenous_values_ten_steps, steps=10) # Also tests that limit_epochs in predict_continuation_input_fn prevents # infinite iteration (estimator_predictions, ) = list(second_estimator.predict(input_fn=predict_input_fn)) self.assertAllEqual([10, 1], estimator_predictions["mean"].shape) input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn() export_location = first_estimator.export_saved_model( self.get_temp_dir(), input_receiver_fn) with ops.Graph().as_default(): with session.Session() as sess: signatures = loader.load(sess, [tag_constants.SERVING], export_location) # Test that prediction and filtering can continue from evaluation output saved_prediction = saved_model_utils.predict_continuation( continue_from=whole_dataset_evaluation, steps=10, exogenous_features=exogenous_values_ten_steps, signatures=signatures, session=sess) # Saved model predictions should be the same as Estimator predictions # starting from the same evaluation. for prediction_key, prediction_value in estimator_predictions.items(): self.assertAllClose(prediction_value, numpy.squeeze( saved_prediction[prediction_key], axis=0)) first_filtering = saved_model_utils.filter_continuation( continue_from=whole_dataset_evaluation, features={ feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2, feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2., "exogenous": values[None, -1, None] + 12. }, signatures=signatures, session=sess) # Test that prediction and filtering can continue from filtering output second_saved_prediction = saved_model_utils.predict_continuation( continue_from=first_filtering, steps=1, exogenous_features={ "exogenous": numpy.arange( 1, dtype=dtype.as_numpy_dtype)[None, :, None] }, signatures=signatures, session=sess) self.assertEqual( times[-1] + 3, numpy.squeeze( second_saved_prediction[feature_keys.PredictionResults.TIMES])) saved_model_utils.filter_continuation( continue_from=first_filtering, features={ feature_keys.FilteringFeatures.TIMES: times[-1] + 3, feature_keys.FilteringFeatures.VALUES: values[-1] + 3., "exogenous": values[-1, None] + 13. }, signatures=signatures, session=sess) # Test cold starting six.assertCountEqual( self, [feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "exogenous"], signatures.signature_def[ feature_keys.SavedModelLabels.COLD_START_FILTER].inputs.keys()) batch_numpy_times = numpy.tile( numpy.arange(30, dtype=numpy.int64)[None, :], (10, 1)) batch_numpy_values = numpy.ones([10, 30, 1]) state = saved_model_utils.cold_start_filter( signatures=signatures, session=sess, features={ feature_keys.FilteringFeatures.TIMES: batch_numpy_times, feature_keys.FilteringFeatures.VALUES: batch_numpy_values, "exogenous": 10. + batch_numpy_values } ) predict_times = numpy.tile( numpy.arange(30, 45, dtype=numpy.int64)[None, :], (10, 1)) predictions = saved_model_utils.predict_continuation( continue_from=state, times=predict_times, exogenous_features={ "exogenous": numpy.tile(numpy.arange( 15, dtype=dtype.as_numpy_dtype), (10,))[None, :, None] }, signatures=signatures, session=sess) self.assertAllEqual([10, 15, 1], predictions["mean"].shape)
def DISABLED_test_function_model_multiple_feature_layer_inputs(self): col_a = fc.numeric_column('a') col_b = fc.numeric_column('b') col_c = fc.numeric_column('c') fc1 = fc.DenseFeatures([col_a, col_b], name='fc1') fc2 = fc.DenseFeatures([col_b, col_c], name='fc2') dense = keras.layers.Dense(4) # This seems problematic.... We probably need something for DenseFeatures # the way Input is for InputLayer. output = dense(fc1) + dense(fc2) model = keras.models.Model([fc1, fc2], [output]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile( optimizer, loss, metrics=[metrics_module.CategoricalAccuracy(), 'mae'], loss_weights=loss_weights) data_list = ([{ 'a': np.arange(10), 'b': np.arange(10) }, { 'b': np.arange(10), 'c': np.arange(10) }], np.arange(10, 100)) print(model.fit(*data_list, epochs=1)) data_bloated_list = ([{ 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }, { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }], np.arange(10, 100)) print(model.fit(*data_bloated_list, epochs=1)) data_dict = ({ 'fc1': { 'a': np.arange(10), 'b': np.arange(10) }, 'fc2': { 'b': np.arange(10), 'c': np.arange(10) } }, np.arange(10, 100)) print(model.fit(*data_dict, epochs=1)) data_bloated_dict = ({ 'fc1': { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }, 'fc2': { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) } }, np.arange(10, 100)) print(model.fit(*data_bloated_dict, epochs=1))
def get_weights_and_check_match_logits(features, weight_column, logits, allow_per_logit_weights=False): """Fetches weights from features and checks that the shape matches logits. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: features: The features dict that contains weights. weight_column: The weight column. If not given, this method returns 1. logits: logits Tensor. allow_per_logit_weights: Boolean. Whether we allow weights along the logits dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. Raises: ValueError: If the weights `Tensor` cannot be cast into float. """ if allow_per_logit_weights: err_msg = ( 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' '[D0, D1, ... DN, logits_dimension]') else: err_msg = ( 'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') with ops.name_scope('weights', values=tuple(six.itervalues(features)) + (logits, )) as scope: # Fetch the weights. if weight_column is None: return 1. # TODO(b/117839674): update feature_column if isinstance(weight_column, six.string_types): weight_column = feature_column_lib.numeric_column( key=weight_column, shape=(1, )) if not isinstance(weight_column, (feature_column_lib.NumericColumn, _NumericColumn)): raise TypeError( 'Weight column must be either a string or NumericColumn.' ' Given type: {}.'.format(type(weight_column))) weights = weight_column._get_dense_tensor( # pylint: disable=protected-access _LazyBuilder(features)) if not (weights.dtype.is_floating or weights.dtype.is_integer): raise ValueError('Weight column should be castable to float. ' 'Given dtype: {}'.format(weights.dtype)) weights = math_ops.to_float(weights, name='weights') # Validate the weights shape. # Eager mode. if context.executing_eagerly(): weights_shape = weights._shape_tuple() # pylint: disable=protected-access logits_shape = logits._shape_tuple() # pylint: disable=protected-access weights_rank = weights._rank() # pylint: disable=protected-access logits_rank = logits._rank() # pylint: disable=protected-access if (weights_rank is not None and logits_rank is not None and weights_rank == logits_rank - 1): if logits_shape[:-1] != weights_shape: raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = logits_shape[:-1] + (1, ) if allow_per_logit_weights: if (logits_shape != weights_shape and supported_weights_shape != weights_shape): raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) else: if supported_weights_shape != weights_shape: raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return weights # Graph mode. weights_shape = array_ops.shape(weights, name='weights_shape') logits_shape = array_ops.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and weights.shape.ndims == logits.shape.ndims - 1): assert_dimension = check_ops.assert_equal(logits_shape[:-1], weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with ops.control_dependencies([assert_dimension]): return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0) if allow_per_logit_weights: condition = math_ops.reduce_any([ math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), math_ops.reduce_all( math_ops.equal(supported_weights_shape, weights_shape)) ]) assert_dimension = control_flow_ops.Assert(condition=condition, data=[ err_msg, 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) else: assert_dimension = check_ops.assert_equal(supported_weights_shape, weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with ops.control_dependencies([assert_dimension]): return array_ops.identity(weights, name=scope)
def testBiasAndOtherColumns(self): """Tests LinearRegressor with LinearSDCA and validates bias weight.""" def input_fn(): """Testing the bias weight when there are other features present. 1/2 of the instances in this input have feature 'a', the rest have feature 'b', and we expect the bias to be added to each instance as well. 0.4 of all instances that have feature 'a' are positive, and 0.2 of all instances that have feature 'b' are positive. The labels in the dataset are ordered to appear shuffled since SDCA expects shuffled data, and converges faster with this pseudo-random ordering. If the bias was not regularized we would expect the weights to be: bias: 0.3 a: 0.1 b: -0.1 Bu with bias regularization the optimal values are: bias: 0.2 a: 0.2 b: 0.0 Returns: The test dataset. """ num_examples = 200 half = int(num_examples / 2) return { 'example_id': constant_op.constant([str(x + 1) for x in range(num_examples)]), 'a': constant_op.constant([[1]] * int(half) + [[0]] * int(half)), 'b': constant_op.constant([[0]] * int(half) + [[1]] * int(half)), }, constant_op.constant( [[x] for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) + [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)]) optimizer = linear.LinearSDCA(example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2(feature_columns=[ feature_column_lib.numeric_column('a'), feature_column_lib.numeric_column('b') ], optimizer=optimizer) regressor.train(input_fn=input_fn, steps=200) variable_names = regressor.get_variable_names() self.assertIn('linear/linear_model/bias_weights', variable_names) self.assertIn('linear/linear_model/a/weights', variable_names) self.assertIn('linear/linear_model/b/weights', variable_names) # TODO(b/29339026): Change the expected results to expect a centered bias. self.assertNear(regressor.get_variable_value( 'linear/linear_model/bias_weights')[0], 0.2, err=0.05) self.assertNear( regressor.get_variable_value('linear/linear_model/a/weights')[0], 0.2, err=0.05) self.assertNear( regressor.get_variable_value('linear/linear_model/b/weights')[0], 0.0, err=0.05)
import sys import tensorflow as tf sys.stderr.write("Using TensorFlow " + tf.__version__ + "\n") mtcars_input_fn = tf.estimator.inputs.numpy_input_fn( x = { "disp": numpy.array([160,160,108,258,360,225,360,146.7,140.8,167.6,167.6,275.8,275.8,275.8,472,460,440,78.7,75.7,71.1,120.1,318,304,350,400,79,120.3,95.1,351,145,301,121]), "cyl": numpy.array([6,6,4,6,8,6,8,4,4,6,6,8,8,8,8,8,8,4,4,4,4,8,8,8,8,4,4,4,8,6,8,4]) }, y = numpy.array([21,21,22.8,21.4,18.7,18.1,14.3,24.4,22.8,19.2,17.8,16.4,17.3,15.2,10.4,10.4,14.7,32.4,30.4,33.9,21.5,15.5,15.2,13.3,19.2,27.3,26,30.4,15.8,19.7,15,21.4]), num_epochs = None, shuffle = True) estimator = LinearRegressor( feature_columns=[ feature_column_lib.numeric_column( key = "disp", shape = [1], dtype = tf.float32), feature_column_lib.numeric_column( key = "cyl", shape = [1], dtype = tf.float32) ]) sys.stderr.write("Train Start\n") estimator.train(input_fn = mtcars_input_fn, steps = 2000) sys.stderr.write("Train End\n")
def _fit_restore_fit_test_template(self, estimator_fn, test_saved_model): """Tests restoring previously fit models.""" temp_dir = self.get_temp_dir() model_dir = tempfile.mkdtemp(dir=temp_dir) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) first_estimator = estimator_fn(model_dir, exogenous_feature_columns) train_input_fn = _build_input_fn_with_seed(2) eval_input_fn = _build_input_fn_with_seed(3) first_estimator.train(input_fn=train_input_fn, steps=1) first_evaluation = first_estimator.evaluate(input_fn=eval_input_fn, steps=1) first_loss_before_fit = first_evaluation["loss"] self.assertAllEqual(first_loss_before_fit, first_evaluation["average_loss"]) self.assertAllEqual([], first_loss_before_fit.shape) first_estimator.train(input_fn=train_input_fn, steps=1) first_loss_after_fit = first_estimator.evaluate(input_fn=eval_input_fn, steps=1)["loss"] self.assertAllEqual([], first_loss_after_fit.shape) second_estimator = estimator_fn(model_dir, exogenous_feature_columns) second_estimator.train(input_fn=train_input_fn, steps=1) second_evaluation = second_estimator.evaluate(input_fn=eval_input_fn, steps=1) exogenous_values_ten_steps = { "exogenous": math_ops.range(10, dtype=dtypes.float32)[None, :, None] } input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn( ) export_location = first_estimator.export_saved_model( temp_dir, input_receiver_fn) if not test_saved_model: return with ops.Graph().as_default(): with session.Session() as sess: signatures = loader.load(sess, [tag_constants.SERVING], export_location) # Test that prediction and filtering can continue from evaluation output _ = saved_model_utils.predict_continuation( continue_from=second_evaluation, steps=10, exogenous_features=exogenous_values_ten_steps, signatures=signatures, session=sess) times, values, _ = _generate_data() first_filtering = saved_model_utils.filter_continuation( continue_from=second_evaluation, features={ feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2, feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2., "exogenous": values[None, -1, None] + 12. }, signatures=signatures, session=sess) # Test that prediction and filtering can continue from filtering output second_saved_prediction = saved_model_utils.predict_continuation( continue_from=first_filtering, steps=1, exogenous_features={ "exogenous": math_ops.range(1, dtype=dtypes.float32)[None, :, None] }, signatures=signatures, session=sess) self.assertEqual( times[-1] + 3, array_ops.squeeze(second_saved_prediction[ feature_keys.PredictionResults.TIMES])) saved_model_utils.filter_continuation( continue_from=first_filtering, features={ feature_keys.FilteringFeatures.TIMES: times[-1] + 3, feature_keys.FilteringFeatures.VALUES: values[-1] + 3., "exogenous": values[-1, None] + 13. }, signatures=signatures, session=sess) # Test cold starting six.assertCountEqual( self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "exogenous" ], signatures.signature_def[feature_keys.SavedModelLabels. COLD_START_FILTER].inputs.keys()) batched_times = array_ops.tile( math_ops.range(30, dtype=dtypes.int64)[None, :], (10, 1)) batched_values = array_ops.ones([10, 30, 1]) state = saved_model_utils.cold_start_filter( signatures=signatures, session=sess, features={ feature_keys.FilteringFeatures.TIMES: batched_times, feature_keys.FilteringFeatures.VALUES: batched_values, "exogenous": 10. + batched_values }) predict_times = math_ops.tile( math_ops.range(30, 45, dtype=dtypes.int64)[None, :], (10, 1)) predictions = saved_model_utils.predict_continuation( continue_from=state, times=predict_times, exogenous_features={ "exogenous": math_ops.tile(math_ops.range(15, dtype=dtypes.float32), (10, ))[None, :, None] }, signatures=signatures, session=sess) self.assertAllEqual([10, 15, 1], predictions["mean"].shape)
def testCalibratedEtlMonotonicClassifierTraining(self): # Construct the following training pair. # # Training: (x, y) # ([0., 0.], 0.0) # ([0., 1.], 1.0) # ([1., 0.], 1.0) # ([1., 1.], 0.0) # # which is not a monotonic function. Then check the forcing monotonicity # resulted in the following monotonicity or not. # f(0, 0) <= f(0, 1), f(0, 0) <= f(1, 0), f(0, 1) <= f(1, 1), # f(1, 0) < = f(1, 1). x0 = np.array([0.0, 0.0, 1.0, 1.0]) x1 = np.array([0.0, 1.0, 0.0, 1.0]) x_samples = {'x0': x0, 'x1': x1} training_y = np.array([[False], [True], [True], [False]]) train_input_fn = numpy_io.numpy_input_fn(x=x_samples, y=training_y, batch_size=4, num_epochs=1000, shuffle=False) test_input_fn = numpy_io.numpy_input_fn(x=x_samples, y=None, shuffle=False) # Define monotonic lattice classifier. feature_columns = [ feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1'), ] def init_fn(): return keypoints_initialization.uniform_keypoints_for_signal( 2, 0., 1., 0., 1.) hparams = tfl_hparams.CalibratedEtlHParams(num_keypoints=2, monotonic_num_lattices=2, monotonic_lattice_rank=2, monotonic_lattice_size=2) hparams.set_param('calibration_monotonic', +1) hparams.set_param('lattice_monotonic', True) hparams.set_param('learning_rate', 0.1) estimator = calibrated_etl.calibrated_etl_classifier( feature_columns=feature_columns, hparams=hparams, keypoints_initializers_fn=init_fn) estimator.train(input_fn=train_input_fn) predictions = [ results['logits'][0] for results in estimator.predict(input_fn=test_input_fn) ] self.assertEqual(len(predictions), 4) # Check monotonicity. Note that projection has its own precision, so we # add a small number. self.assertLess(predictions[0], predictions[1] + 1e-6) self.assertLess(predictions[0], predictions[2] + 1e-6) self.assertLess(predictions[1], predictions[3] + 1e-6) self.assertLess(predictions[2], predictions[3] + 1e-6)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn(x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto(log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_label_key_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'label should not be used as feature'): parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='a')
def testSparseFeaturesWithL1Reg(self): """Tests LinearRegressor with LinearSDCA and sparse features.""" def input_fn(): return { 'example_id': constant_op.constant(['1', '2', '3']), 'price': constant_op.constant([[0.4], [0.6], [0.3]]), 'country': sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'], indices=[[0, 0], [1, 3], [2, 1]], dense_shape=[3, 5]), 'weights': constant_op.constant([[10.0], [10.0], [10.0]]) }, constant_op.constant([[1.4], [-0.8], [2.6]]) price = feature_column_lib.numeric_column('price') country = feature_column_lib.categorical_column_with_hash_bucket( 'country', hash_bucket_size=5) # Regressor with no L1 regularization. optimizer = linear.LinearSDCA(example_id_column='example_id', symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2(feature_columns=[price, country], weight_column='weights', optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) no_l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] variable_names = regressor.get_variable_names() self.assertIn('linear/linear_model/price/weights', variable_names) self.assertIn('linear/linear_model/country/weights', variable_names) no_l1_reg_weights = { 'linear/linear_model/price/weights': regressor.get_variable_value('linear/linear_model/price/weights'), 'linear/linear_model/country/weights': regressor.get_variable_value( 'linear/linear_model/country/weights'), } # Regressor with L1 regularization. optimizer = linear.LinearSDCA(example_id_column='example_id', symmetric_l1_regularization=1.0, symmetric_l2_regularization=0.1) regressor = linear.LinearRegressorV2(feature_columns=[price, country], weight_column='weights', optimizer=optimizer) regressor.train(input_fn=input_fn, steps=20) l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss'] l1_reg_weights = { 'linear/linear_model/price/weights': regressor.get_variable_value('linear/linear_model/price/weights'), 'linear/linear_model/country/weights': regressor.get_variable_value( 'linear/linear_model/country/weights'), } # Unregularized loss is lower when there is no L1 regularization. self.assertLess(no_l1_reg_loss, l1_reg_loss) self.assertLess(no_l1_reg_loss, 0.05) # But weights returned by the regressor with L1 regularization have smaller # L1 norm. l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0 for var_name in sorted(l1_reg_weights): l1_reg_weights_norm += sum( np.absolute(l1_reg_weights[var_name].flatten())) no_l1_reg_weights_norm += sum( np.absolute(no_l1_reg_weights[var_name].flatten())) print('Var name: %s, value: %s' % (var_name, no_l1_reg_weights[var_name].flatten())) self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm)
def _complete_flow(self, train_distribute, eval_distribute, remote_cluster=None, use_train_and_evaluate=True): estimator = self._get_estimator(train_distribute, eval_distribute, remote_cluster) input_dimension = LABEL_DIMENSION train_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync, shuffle=True) if eval_distribute: eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync else: eval_batch_size = BATCH_SIZE eval_input_fn = self.dataset_input_fn( x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns eval_spec = estimator_training.EvalSpec( name=EVAL_NAME, input_fn=eval_input_fn, steps=None, exporters=self._get_exporter(EXPORTER_NAME, feature_columns), start_delay_secs=0, throttle_secs=1) if use_train_and_evaluate: estimator_training.train_and_evaluate( estimator, estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS), eval_spec) else: estimator.train(train_input_fn, max_steps=MAX_STEPS) latest_ckpt_path = estimator.latest_checkpoint() metrics = estimator.evaluate(eval_input_fn, checkpoint_path=latest_ckpt_path, name=EVAL_NAME) # Export the eval result to files. eval_result = estimator_training._EvalResult( status=estimator_training._EvalStatus.EVALUATED, metrics=metrics, checkpoint_path=latest_ckpt_path) evaluator = estimator_training._TrainingExecutor._Evaluator(estimator, eval_spec, None) evaluator._export_eval_result(eval_result, True) return estimator
def DISABLED_test_function_model_multiple_feature_layer_inputs(self): col_a = fc.numeric_column('a') col_b = fc.numeric_column('b') col_c = fc.numeric_column('c') fc1 = df.DenseFeatures([col_a, col_b], name='fc1') fc2 = df.DenseFeatures([col_b, col_c], name='fc2') dense = keras.layers.Dense(4) # This seems problematic.... We probably need something for DenseFeatures # the way Input is for InputLayer. output = dense(fc1) + dense(fc2) model = keras.models.Model([fc1, fc2], [output]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=[metrics_module.CategoricalAccuracy(), 'mae'], loss_weights=loss_weights) data_list = ([{ 'a': np.arange(10), 'b': np.arange(10) }, { 'b': np.arange(10), 'c': np.arange(10) }], np.arange(10, 100)) model.fit(*data_list, epochs=1) data_bloated_list = ([{ 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }, { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }], np.arange(10, 100)) model.fit(*data_bloated_list, epochs=1) data_dict = ({ 'fc1': { 'a': np.arange(10), 'b': np.arange(10) }, 'fc2': { 'b': np.arange(10), 'c': np.arange(10) } }, np.arange(10, 100)) model.fit(*data_dict, epochs=1) data_bloated_dict = ({ 'fc1': { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) }, 'fc2': { 'a': np.arange(10), 'b': np.arange(10), 'c': np.arange(10) } }, np.arange(10, 100)) model.fit(*data_bloated_dict, epochs=1)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', scores) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_encode_listwise_features_renaming(self): """Tests for using different names in feature columns vs features.""" with tf.Graph().as_default(): # Batch size = 2, list_size = 2. features = { "query_length": tf.convert_to_tensor(value=[[1], [2]]), "utility": tf.convert_to_tensor(value=[[[1.0], [0.0]], [[0.0], [1.0]]]), "unigrams": tf.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]], values=[ "ranking", "regression", "classification", "ordinal" ], dense_shape=[2, 2, 1]) } context_feature_columns = { "query_length": feature_column.numeric_column("query_length", shape=(1, ), default_value=0, dtype=tf.int64) } example_feature_columns = { "utility_renamed": feature_column.numeric_column("utility", shape=(1, ), default_value=0.0, dtype=tf.float32), "unigrams_renamed": feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( "unigrams", vocabulary_list=[ "ranking", "regression", "classification", "ordinal" ]), dimension=10) } context_features, example_features = feature_lib.encode_listwise_features( features, input_size=2, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) self.assertAllEqual(["query_length"], sorted(context_features)) self.assertAllEqual(["unigrams_renamed", "utility_renamed"], sorted(example_features)) self.assertAllEqual( [2, 2, 10], example_features["unigrams_renamed"].get_shape().as_list()) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) context_features, example_features = sess.run( [context_features, example_features]) self.assertAllEqual([[1], [2]], context_features["query_length"]) self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]], example_features["utility_renamed"])
def testCalibratedRtlMonotonicClassifierTraining(self): # Construct the following training/testing pair. # # Training: (x, y) # ([0., 0.], 0.0) # ([0., 1.], 1.0) # ([1., 0.], 1.0) # ([1., 1.], 0.0) # # Test: (x, y) # ([0., 0.], 0.0) # ([0., 1.], 1.0) # ([1., 0.], 1.0) # ([1., 1.], 1.0) # # Note that training example has a noisy sample, ([1., 1.], 0.0), and test # examples are generated by the logical-OR function. Therefore by enforcing # increasing monotonicity to all features, we should be able to work well # in the test examples. x0 = np.array([0.0, 0.0, 1.0, 1.0]) x1 = np.array([0.0, 1.0, 0.0, 1.0]) x_samples = {'x0': x0, 'x1': x1} training_y = np.array([[False], [True], [True], [False]]) test_y = np.array([[False], [True], [True], [True]]) train_input_fn = numpy_io.numpy_input_fn(x=x_samples, y=training_y, batch_size=4, num_epochs=1000, shuffle=False) test_input_fn = numpy_io.numpy_input_fn(x=x_samples, y=test_y, shuffle=False) # Define monotonic lattice classifier. feature_columns = [ feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1'), ] def init_fn(): return keypoints_initialization.uniform_keypoints_for_signal( 2, 0., 1., 0., 1.) hparams = tfl_hparams.CalibratedRtlHParams(num_keypoints=2, num_lattices=3, lattice_rank=2) # Monotonic calibrated lattice. hparams.set_param('monotonicity', +1) hparams.set_param('learning_rate', 0.1) hparams.set_param('interpolation_type', 'hypercube') estimator = calibrated_rtl.calibrated_rtl_classifier( feature_columns=feature_columns, hparams=hparams, keypoints_initializers_fn=init_fn) estimator.train(input_fn=train_input_fn) results = estimator.evaluate(input_fn=test_input_fn) # We should expect 1.0 accuracy. self.assertGreater(results['accuracy'], 0.999)
def test_encode_listwise_features(self): # Batch size = 2, list_size = 2. features = { "query_length": ops.convert_to_tensor([[1], [2]]), "utility": ops.convert_to_tensor([[[1.0], [0.0]], [[0.0], [1.0]]]), "unigrams": sparse_tensor_lib.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]], values=["ranking", "regression", "classification", "ordinal"], dense_shape=[2, 2, 1]) } context_feature_columns = { "query_length": feature_column.numeric_column("query_length", shape=(1, ), default_value=0, dtype=dtypes.int64) } example_feature_columns = { "utility": feature_column.numeric_column("utility", shape=(1, ), default_value=0.0, dtype=dtypes.float32), "unigrams": feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( "unigrams", vocabulary_list=[ "ranking", "regression", "classification", "ordinal" ]), dimension=10) } with self.assertRaisesRegexp( ValueError, r"2nd dimesion of tensor must be equal to input size: 3, but found .*" ): feature_lib.encode_listwise_features( features, input_size=3, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) context_features, example_features = feature_lib.encode_listwise_features( features, input_size=2, context_feature_columns=context_feature_columns, example_feature_columns=example_feature_columns) self.assertAllEqual(["query_length"], sorted(context_features)) self.assertAllEqual(["unigrams", "utility"], sorted(example_features)) self.assertAllEqual([2, 2, 10], example_features["unigrams"].get_shape().as_list()) with session.Session() as sess: sess.run(variables.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) context_features, example_features = sess.run( [context_features, example_features]) self.assertAllEqual([[1], [2]], context_features["query_length"]) self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]], example_features["utility"])
def _fit_restore_fit_test_template(self, estimator_fn, dtype): """Tests restoring previously fit models.""" model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) first_estimator = estimator_fn(model_dir, exogenous_feature_columns) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values, "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) first_estimator.train(input_fn=train_input_fn, steps=1) first_evaluation = first_estimator.evaluate(input_fn=eval_input_fn, steps=1) first_loss_before_fit = first_evaluation["loss"] self.assertAllEqual(first_loss_before_fit, first_evaluation["average_loss"]) self.assertAllEqual([], first_loss_before_fit.shape) first_estimator.train(input_fn=train_input_fn, steps=1) first_loss_after_fit = first_estimator.evaluate(input_fn=eval_input_fn, steps=1)["loss"] self.assertAllEqual([], first_loss_after_fit.shape) second_estimator = estimator_fn(model_dir, exogenous_feature_columns) second_estimator.train(input_fn=train_input_fn, steps=1) whole_dataset_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) whole_dataset_evaluation = second_estimator.evaluate( input_fn=whole_dataset_input_fn, steps=1) exogenous_values_ten_steps = { "exogenous": numpy.arange(10, dtype=dtype.as_numpy_dtype)[None, :, None] } predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation=whole_dataset_evaluation, exogenous_features=exogenous_values_ten_steps, steps=10) # Also tests that limit_epochs in predict_continuation_input_fn prevents # infinite iteration (estimator_predictions, ) = list( second_estimator.predict(input_fn=predict_input_fn)) self.assertAllEqual([10, 1], estimator_predictions["mean"].shape) input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn( ) export_location = first_estimator.export_saved_model( self.get_temp_dir(), input_receiver_fn) with ops.Graph().as_default(): with session.Session() as sess: signatures = loader.load(sess, [tag_constants.SERVING], export_location) # Test that prediction and filtering can continue from evaluation output saved_prediction = saved_model_utils.predict_continuation( continue_from=whole_dataset_evaluation, steps=10, exogenous_features=exogenous_values_ten_steps, signatures=signatures, session=sess) # Saved model predictions should be the same as Estimator predictions # starting from the same evaluation. for prediction_key, prediction_value in estimator_predictions.items( ): self.assertAllClose( prediction_value, numpy.squeeze(saved_prediction[prediction_key], axis=0)) first_filtering = saved_model_utils.filter_continuation( continue_from=whole_dataset_evaluation, features={ feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2, feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2., "exogenous": values[None, -1, None] + 12. }, signatures=signatures, session=sess) # Test that prediction and filtering can continue from filtering output second_saved_prediction = saved_model_utils.predict_continuation( continue_from=first_filtering, steps=1, exogenous_features={ "exogenous": numpy.arange(1, dtype=dtype.as_numpy_dtype)[None, :, None] }, signatures=signatures, session=sess) self.assertEqual( times[-1] + 3, numpy.squeeze(second_saved_prediction[ feature_keys.PredictionResults.TIMES])) saved_model_utils.filter_continuation( continue_from=first_filtering, features={ feature_keys.FilteringFeatures.TIMES: times[-1] + 3, feature_keys.FilteringFeatures.VALUES: values[-1] + 3., "exogenous": values[-1, None] + 13. }, signatures=signatures, session=sess) # Test cold starting six.assertCountEqual( self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "exogenous" ], signatures.signature_def[feature_keys.SavedModelLabels. COLD_START_FILTER].inputs.keys()) batch_numpy_times = numpy.tile( numpy.arange(30, dtype=numpy.int64)[None, :], (10, 1)) batch_numpy_values = numpy.ones([10, 30, 1]) state = saved_model_utils.cold_start_filter( signatures=signatures, session=sess, features={ feature_keys.FilteringFeatures.TIMES: batch_numpy_times, feature_keys.FilteringFeatures.VALUES: batch_numpy_values, "exogenous": 10. + batch_numpy_values }) predict_times = numpy.tile( numpy.arange(30, 45, dtype=numpy.int64)[None, :], (10, 1)) predictions = saved_model_utils.predict_continuation( continue_from=state, times=predict_times, exogenous_features={ "exogenous": numpy.tile( numpy.arange(15, dtype=dtype.as_numpy_dtype), (10, ))[None, :, None] }, signatures=signatures, session=sess) self.assertAllEqual([10, 15, 1], predictions["mean"].shape)
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // distribution.num_replicas_in_sync, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', scores) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_saved_model(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_weight_column_should_not_be_used_as_feature(self): with self.assertRaisesRegexp( ValueError, 'weight_column should not be used as feature'): self._parse_example_fn(feature_columns=[fc.numeric_column('a')], label_key='b', weight_column=fc.numeric_column('a'))