def test_ar_lstm_regressor(self): dtype = dtypes.float32 model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) estimator = estimators.LSTMAutoRegressor( periodicities=10, input_window_size=10, output_window_size=6, model_dir=model_dir, num_features=1, extra_feature_columns=exogenous_feature_columns, num_units=10, config=_SeedRunConfig()) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values, "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=1) evaluation = estimator.evaluate( input_fn=eval_input_fn, steps=1) self.assertAllEqual(evaluation["loss"], evaluation["average_loss"]) self.assertAllEqual([], evaluation["loss"].shape)
def _test_initialization(self, warmup_iterations, batch_size): stub_model = StubTimeSeriesModel() data = self._make_test_data(length=20, cut_start=None, cut_end=None, offset=0.) if batch_size == -1: input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=10) else: input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(data), window_size=10, batch_size=batch_size) chainer = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss(model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # Warm up saved state model_outputs.loss.eval() outputs = model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return outputs
def test_multivariate(self): dtype = dtypes.float32 num_features = 3 covariance = numpy.eye(num_features) # A single off-diagonal has a non-zero value in the true transition # noise covariance. covariance[-1, 0] = 1. covariance[0, -1] = 1. dataset_size = 100 values = numpy.cumsum(numpy.random.multivariate_normal( mean=numpy.zeros(num_features), cov=covariance, size=dataset_size), axis=0) times = numpy.arange(dataset_size) model = MultivariateLevelModel( configuration=state_space_model.StateSpaceModelConfiguration( num_features=num_features, dtype=dtype, use_observation_noise=False, transition_covariance_initial_log_scale_bias=5.)) estimator = estimators.StateSpaceRegressor( model=model, optimizer=gradient_descent.GradientDescentOptimizer(0.1)) data = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(data), batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=1) for component in model._ensemble_members: # Check that input statistics propagated to component models self.assertTrue(component._input_statistics)
def test_exogenous_input(self): """Test that no errors are raised when using exogenous features.""" dtype = dtypes.float64 times = [1, 2, 3, 4, 5, 6] values = [[0.01], [5.10], [5.21], [0.30], [5.41], [0.50]] feature_a = [["off"], ["on"], ["on"], ["off"], ["on"], ["off"]] sparse_column_a = feature_column.sparse_column_with_keys( column_name="feature_a", keys=["on", "off"]) one_hot_a = layers.one_hot_column(sparse_id_column=sparse_column_a) regressor = estimators.StructuralEnsembleRegressor( periodicities=[], num_features=1, moving_average_order=0, exogenous_feature_columns=[one_hot_a], dtype=dtype) features = {TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values, "feature_a": feature_a} train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), window_size=6, batch_size=1) regressor.train(input_fn=train_input_fn, steps=1) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) evaluation = regressor.evaluate(input_fn=eval_input_fn, steps=1) predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation, times=[[7, 8, 9]], exogenous_features={"feature_a": [[["on"], ["off"], ["on"]]]}) regressor.predict(input_fn=predict_input_fn)
def dry_run_train_helper( self, sample_every, period, num_samples, model_type, model_args, num_features=1): numpy.random.seed(1) dtype = dtypes.float32 features = self.simple_data( sample_every, dtype=dtype, period=period, num_samples=num_samples, num_features=num_features) model = model_type( configuration=( state_space_model.StateSpaceModelConfiguration( num_features=num_features, dtype=dtype, covariance_prior_fn=lambda _: 0.)), **model_args) class _RunConfig(estimator_lib.RunConfig): @property def tf_random_seed(self): return 4 estimator = estimators.StateSpaceRegressor(model, config=_RunConfig()) train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), num_threads=1, shuffle_seed=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) estimator.train(input_fn=train_input_fn, max_steps=1) first_evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1) estimator.train(input_fn=train_input_fn, max_steps=3) second_evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(second_evaluation["loss"], first_evaluation["loss"])
def _random_window_input_fn_test_template( self, time_series_reader, window_size, batch_size, num_features, discard_out_of_order=False): input_fn = input_pipeline.RandomWindowInputFn( time_series_reader=time_series_reader, window_size=window_size, batch_size=batch_size) result, _ = input_fn() init_op = variables.local_variables_initializer() with self.test_session() as session: coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) session.run(init_op) features = session.run(result) coordinator.request_stop() coordinator.join() self.assertAllEqual([batch_size, window_size], features[TrainEvalFeatures.TIMES].shape) for window_position in range(window_size - 1): for batch_position in range(batch_size): # Checks that all times are contiguous self.assertEqual( features[TrainEvalFeatures.TIMES][batch_position, window_position + 1], features[TrainEvalFeatures.TIMES][batch_position, window_position] + 1) self.assertAllEqual([batch_size, window_size, num_features], features[TrainEvalFeatures.VALUES].shape) self.assertEqual("int64", features[TrainEvalFeatures.TIMES].dtype) for feature_number in range(num_features): self.assertAllEqual( features[TrainEvalFeatures.TIMES] * 2. + feature_number, features[TrainEvalFeatures.VALUES][:, :, feature_number]) return features
def _input_statistics_test_template( self, stat_object, num_features, dtype, give_full_data, warmup_iterations=0, rtol=1e-6, data_length=500, chunk_size=4): graph = ops.Graph() with graph.as_default(): numpy_dtype = dtype.as_numpy_dtype values = ( (numpy.arange(data_length, dtype=numpy_dtype)[..., None] + numpy.arange(num_features, dtype=numpy_dtype)[None, ...])[None]) times = 2 * (numpy.arange(data_length)[None]) - 3 if give_full_data: stat_object.set_data((times, values)) features = {TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values} input_fn = input_pipeline.RandomWindowInputFn( batch_size=16, window_size=chunk_size, time_series_reader=input_pipeline.NumpyReader(features)) statistics = stat_object.initialize_graph( features=input_fn()[0]) with self.session(graph=graph) as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # A control dependency should ensure that, for queue-based statistics, # a use of any statistic is preceded by an update of all adaptive # statistics. statistics.total_observation_count.eval() self.assertAllClose( range(num_features) + numpy.mean(numpy.arange(chunk_size))[None], statistics.series_start_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.tile(numpy.var(numpy.arange(chunk_size))[None], [num_features]), statistics.series_start_moments.variance.eval(), rtol=rtol) self.assertAllClose( numpy.mean(values[0], axis=0), statistics.overall_feature_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.var(values[0], axis=0), statistics.overall_feature_moments.variance.eval(), rtol=rtol) self.assertAllClose( -3, statistics.start_time.eval(), rtol=rtol) self.assertAllClose( data_length, statistics.total_observation_count.eval(), rtol=rtol) coordinator.request_stop() coordinator.join()
def test_loop_unrolling(self): """Tests running/restoring from a checkpoint with static unrolling.""" model = TimeDependentStateSpaceModel( # Unroll during training, but not evaluation static_unrolling_window_size_threshold=2) estimator = estimators.StateSpaceRegressor(model=model) times = numpy.arange(100) values = numpy.arange(100) dataset = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(dataset), batch_size=16, window_size=2) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(dataset)) estimator.train(input_fn=train_input_fn, max_steps=1) estimator.evaluate(input_fn=eval_input_fn, steps=1)
def test_no_periodicity(self): """Test that no errors are raised when periodicites is None.""" dtype = dtypes.float64 times = [1, 2, 3, 4, 5, 6] values = [[0.01], [5.10], [5.21], [0.30], [5.41], [0.50]] regressor = estimators.StructuralEnsembleRegressor( periodicities=None, num_features=1, moving_average_order=0, dtype=dtype) features = {TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values} train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), window_size=6, batch_size=1) regressor.train(input_fn=train_input_fn, steps=1) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) evaluation = regressor.evaluate(input_fn=eval_input_fn, steps=1) predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation, times=[[7, 8, 9]]) regressor.predict(input_fn=predict_input_fn)
def _fit_restore_fit_test_template(self, estimator_fn, dtype): """Tests restoring previously fit models.""" model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) first_estimator = estimator_fn(model_dir, exogenous_feature_columns) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values, "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) first_estimator.train(input_fn=train_input_fn, steps=5) first_loss_before_fit = first_estimator.evaluate( input_fn=eval_input_fn, steps=1)["loss"] first_estimator.train(input_fn=train_input_fn, steps=50) first_loss_after_fit = first_estimator.evaluate(input_fn=eval_input_fn, steps=1)["loss"] self.assertLess(first_loss_after_fit, first_loss_before_fit) second_estimator = estimator_fn(model_dir, exogenous_feature_columns) second_estimator.train(input_fn=train_input_fn, steps=2) whole_dataset_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) whole_dataset_evaluation = second_estimator.evaluate( input_fn=whole_dataset_input_fn, steps=1) exogenous_values_ten_steps = { "exogenous": numpy.arange(10, dtype=dtype.as_numpy_dtype)[None, :, None] } predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation=whole_dataset_evaluation, exogenous_features=exogenous_values_ten_steps, steps=10) # Also tests that limit_epochs in predict_continuation_input_fn prevents # infinite iteration (estimator_predictions, ) = list( second_estimator.predict(input_fn=predict_input_fn)) self.assertAllEqual([10, 1], estimator_predictions["mean"].shape) input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn( ) export_location = first_estimator.export_savedmodel( self.get_temp_dir(), input_receiver_fn) with ops.Graph().as_default(): with session.Session() as sess: signatures = loader.load(sess, [tag_constants.SERVING], export_location) # Test that prediction and filtering can continue from evaluation output saved_prediction = saved_model_utils.predict_continuation( continue_from=whole_dataset_evaluation, steps=10, exogenous_features=exogenous_values_ten_steps, signatures=signatures, session=sess) # Saved model predictions should be the same as Estimator predictions # starting from the same evaluation. for prediction_key, prediction_value in estimator_predictions.items( ): self.assertAllClose( prediction_value, numpy.squeeze(saved_prediction[prediction_key], axis=0)) first_filtering = saved_model_utils.filter_continuation( continue_from=whole_dataset_evaluation, features={ feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2, feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2., "exogenous": values[None, -1, None] + 12. }, signatures=signatures, session=sess) # Test that prediction and filtering can continue from filtering output second_saved_prediction = saved_model_utils.predict_continuation( continue_from=first_filtering, steps=1, exogenous_features={ "exogenous": numpy.arange(1, dtype=dtype.as_numpy_dtype)[None, :, None] }, signatures=signatures, session=sess) self.assertEqual( times[-1] + 3, numpy.squeeze(second_saved_prediction[ feature_keys.PredictionResults.TIMES])) saved_model_utils.filter_continuation( continue_from=first_filtering, features={ feature_keys.FilteringFeatures.TIMES: times[-1] + 3, feature_keys.FilteringFeatures.VALUES: values[-1] + 3., "exogenous": values[-1, None] + 13. }, signatures=signatures, session=sess) # Test cold starting six.assertCountEqual( self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "exogenous" ], signatures.signature_def[feature_keys.SavedModelLabels. COLD_START_FILTER].inputs.keys()) batch_numpy_times = numpy.tile( numpy.arange(30, dtype=numpy.int64)[None, :], (10, 1)) batch_numpy_values = numpy.ones([10, 30, 1]) state = saved_model_utils.cold_start_filter( signatures=signatures, session=sess, features={ feature_keys.FilteringFeatures.TIMES: batch_numpy_times, feature_keys.FilteringFeatures.VALUES: batch_numpy_values, "exogenous": 10. + batch_numpy_values }) predict_times = numpy.tile( numpy.arange(30, 45, dtype=numpy.int64)[None, :], (10, 1)) predictions = saved_model_utils.predict_continuation( continue_from=state, times=predict_times, exogenous_features={ "exogenous": numpy.tile( numpy.arange(15, dtype=dtype.as_numpy_dtype), (10, ))[None, :, None] }, signatures=signatures, session=sess) self.assertAllEqual([10, 15, 1], predictions["mean"].shape)
def train_helper(self, input_window_size, loss, max_loss=None, train_steps=200, anomaly_prob=0.01, anomaly_distribution=None, multiple_periods=False): np.random.seed(3) data_noise_stddev = 0.2 if max_loss is None: if loss == ARModel.NORMAL_LIKELIHOOD_LOSS: max_loss = 1.0 else: max_loss = 0.05 / (data_noise_stddev ** 2) train_data, test_data = self.create_data( noise_stddev=data_noise_stddev, anomaly_prob=anomaly_prob, multiple_periods=multiple_periods) output_window_size = 10 window_size = input_window_size + output_window_size class _RunConfig(estimator_lib.RunConfig): @property def tf_random_seed(self): return 3 estimator = ARRegressor( periodicities=self.period, anomaly_prior_probability=0.01 if anomaly_distribution else None, anomaly_distribution=anomaly_distribution, num_features=2, output_window_size=output_window_size, num_time_buckets=20, input_window_size=input_window_size, hidden_layer_sizes=[16], loss=loss, config=_RunConfig()) train_input_fn = input_pipeline.RandomWindowInputFn( time_series_reader=input_pipeline.NumpyReader(train_data), window_size=window_size, batch_size=64, num_threads=1, shuffle_seed=2) test_input_fn = test_utils.AllWindowInputFn( time_series_reader=input_pipeline.NumpyReader(test_data), window_size=window_size) # Test training estimator.train( input_fn=train_input_fn, steps=train_steps) test_evaluation = estimator.evaluate(input_fn=test_input_fn, steps=1) test_loss = test_evaluation["loss"] logging.info("Final test loss: %f", test_loss) self.assertLess(test_loss, max_loss) if loss == ARModel.SQUARED_LOSS: # Test that the evaluation loss is reported without input scaling. self.assertAllClose( test_loss, np.mean((test_evaluation["mean"] - test_evaluation["observed"]) ** 2)) # Test predict train_data_times = train_data[TrainEvalFeatures.TIMES] train_data_values = train_data[TrainEvalFeatures.VALUES] test_data_times = test_data[TrainEvalFeatures.TIMES] test_data_values = test_data[TrainEvalFeatures.VALUES] predict_times = np.expand_dims(np.concatenate( [train_data_times[input_window_size:], test_data_times]), 0) predict_true_values = np.expand_dims(np.concatenate( [train_data_values[input_window_size:], test_data_values]), 0) state_times = np.expand_dims(train_data_times[:input_window_size], 0) state_values = np.expand_dims( train_data_values[:input_window_size, :], 0) state_exogenous = state_times[:, :, None][:, :, :0] def prediction_input_fn(): return ({ PredictionFeatures.TIMES: training.limit_epochs( predict_times, num_epochs=1), PredictionFeatures.STATE_TUPLE: (state_times, state_values, state_exogenous) }, {}) (predictions,) = tuple(estimator.predict(input_fn=prediction_input_fn)) predicted_mean = predictions["mean"][:, 0] true_values = predict_true_values[0, :, 0] if loss == ARModel.NORMAL_LIKELIHOOD_LOSS: variances = predictions["covariance"][:, 0] standard_deviations = np.sqrt(variances) # Note that we may get tighter bounds with more training steps. errors = np.abs(predicted_mean - true_values) > 4 * standard_deviations fraction_errors = np.mean(errors) logging.info("Fraction errors: %f", fraction_errors)
def test_one_shot_prediction_head_export(self, estimator_factory): def _new_temp_dir(): return os.path.join(test.get_temp_dir(), str(ops.uid())) model_dir = _new_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column("2d_exogenous_feature", shape=(2, )), feature_column.embedding_column( categorical_column=categorical_column, dimension=10) ] estimator = estimator_factory( model_dir=model_dir, exogenous_feature_columns=exogenous_feature_columns, head_type=ts_head_lib.OneShotPredictionHead) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange(20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array(["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) result = estimator.evaluate(input_fn=train_input_fn, steps=1) self.assertIn("average_loss", result) self.assertNotIn(feature_keys.State.STATE_TUPLE, result) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(_new_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load(session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual(self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature" ], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile( numpy.arange(20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile( numpy.array(["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items() } fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) # Build a parsing input function, then make a tf.Example for it to parse. export_location = estimator.export_savedmodel( _new_temp_dir(), estimator.build_one_shot_parsing_serving_input_receiver_fn( filtering_length=20, prediction_length=15)) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: example = example_pb2.Example() times = example.features.feature[ feature_keys.TrainEvalFeatures.TIMES] values = example.features.feature[ feature_keys.TrainEvalFeatures.VALUES] times.int64_list.value.extend(range(35)) for i in range(20): values.float_list.value.extend([ float(i) * 2. + feature_number for feature_number in range(5) ]) real_feature = example.features.feature["2d_exogenous_feature"] categortical_feature = example.features.feature[ "categorical_exogenous_feature"] for i in range(35): real_feature.float_list.value.extend([1, 1]) categortical_feature.bytes_list.value.append(b"strkey") # Serialize the tf.Example for feeding to the Session examples = [example.SerializeToString()] * 2 signatures = loader.load(session, [tag_constants.SERVING], export_location) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] ((_, input_value), ) = predict_signature.inputs.items() feeds = {graph.as_graph_element(input_value.name): examples} fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape)
def test_one_shot_prediction_head_export(self): model_dir = self.get_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column("2d_exogenous_feature", shape=(2, )), feature_column.embedding_column( categorical_column=categorical_column, dimension=10) ] estimator = ts_estimators.TimeSeriesRegressor( model=lstm_example._LSTMModel( num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), optimizer=adam.AdamOptimizer(0.001), config=estimator_lib.RunConfig(tf_random_seed=4), state_manager=state_management.ChainingStateManager(), head_type=ts_head_lib.OneShotPredictionHead, model_dir=model_dir) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange(20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array(["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(self.get_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load(session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual(self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature" ], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile( numpy.arange(20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile( numpy.array(["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items() } fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertAllEqual((2, 15, 5), output["mean"].shape)
def _fit_restore_fit_test_template(self, estimator_fn, dtype): """Tests restoring previously fit models.""" model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) first_estimator = estimator_fn(model_dir) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) first_estimator.train(input_fn=train_input_fn, steps=5) first_loss_before_fit = first_estimator.evaluate( input_fn=eval_input_fn, steps=1)["loss"] first_estimator.train(input_fn=train_input_fn, steps=50) first_loss_after_fit = first_estimator.evaluate( input_fn=eval_input_fn, steps=1)["loss"] self.assertLess(first_loss_after_fit, first_loss_before_fit) second_estimator = estimator_fn(model_dir) second_estimator.train(input_fn=train_input_fn, steps=2) whole_dataset_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) whole_dataset_evaluation = second_estimator.evaluate( input_fn=whole_dataset_input_fn, steps=1) predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation=whole_dataset_evaluation, steps=10) # Also tests that limit_epochs in predict_continuation_input_fn prevents # infinite iteration (estimator_predictions, ) = list(second_estimator.predict(input_fn=predict_input_fn)) self.assertAllEqual([10, 1], estimator_predictions["mean"].shape) input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn() export_location = first_estimator.export_savedmodel(self.get_temp_dir(), input_receiver_fn) with ops.Graph().as_default(): with session.Session() as sess: signatures = loader.load(sess, [tag_constants.SERVING], export_location) # Test that prediction and filtering can continue from evaluation output saved_prediction = saved_model_utils.predict_continuation( continue_from=whole_dataset_evaluation, steps=10, signatures=signatures, session=sess) # Saved model predictions should be the same as Estimator predictions # starting from the same evaluation. for prediction_key, prediction_value in estimator_predictions.items(): self.assertAllClose(prediction_value, numpy.squeeze( saved_prediction[prediction_key], axis=0)) first_filtering = saved_model_utils.filter_continuation( continue_from=whole_dataset_evaluation, features={ feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2, feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2. }, signatures=signatures, session=sess) # Test that prediction and filtering can continue from filtering output second_saved_prediction = saved_model_utils.predict_continuation( continue_from=first_filtering, steps=1, signatures=signatures, session=sess) self.assertEqual( times[-1] + 3, numpy.squeeze( second_saved_prediction[feature_keys.PredictionResults.TIMES])) saved_model_utils.filter_continuation( continue_from=first_filtering, features={ feature_keys.FilteringFeatures.TIMES: times[-1] + 3, feature_keys.FilteringFeatures.VALUES: values[-1] + 3. }, signatures=signatures, session=sess)