def test_exogenous_input(self): """Test that no errors are raised when using exogenous features.""" dtype = dtypes.float64 times = [1, 2, 3, 4, 5, 6] values = [[0.01], [5.10], [5.21], [0.30], [5.41], [0.50]] feature_a = [["off"], ["on"], ["on"], ["off"], ["on"], ["off"]] sparse_column_a = feature_column.sparse_column_with_keys( column_name="feature_a", keys=["on", "off"]) one_hot_a = layers.one_hot_column(sparse_id_column=sparse_column_a) regressor = estimators.StructuralEnsembleRegressor( periodicities=[], num_features=1, moving_average_order=0, exogenous_feature_columns=[one_hot_a], dtype=dtype) features = { TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values, "feature_a": feature_a } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), window_size=6, batch_size=1) regressor.train(input_fn=train_input_fn, steps=1) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) evaluation = regressor.evaluate(input_fn=eval_input_fn, steps=1) predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation, times=[[7, 8, 9]], exogenous_features={"feature_a": [[["on"], ["off"], ["on"]]]}) regressor.predict(input_fn=predict_input_fn)
def _test_initialization(self, warmup_iterations, batch_size): stub_model = StubTimeSeriesModel() data = self._make_test_data(length=20, cut_start=None, cut_end=None, offset=0.) if batch_size == -1: input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=10) else: input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(data), window_size=10, batch_size=batch_size) chainer = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss(model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # Warm up saved state model_outputs.loss.eval() outputs = model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return outputs
def _test_missing_values(self, cut_start, cut_end, offset): stub_model = StubTimeSeriesModel() data = self._make_test_data(length=100, cut_start=cut_start, cut_end=cut_end, offset=offset) input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=10) chainer = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss(model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(10): model_outputs.loss.eval() returned_loss = model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return returned_loss
def test_numpy(self): data = _make_numpy_time_series(num_features=4, num_samples=100) time_series_reader = input_pipeline.NumpyReader(data) self._whole_dataset_input_fn_test_template( time_series_reader=time_series_reader, num_features=4, num_samples=100)
def test_multivariate(self): dtype = dtypes.float32 num_features = 3 covariance = numpy.eye(num_features) # A single off-diagonal has a non-zero value in the true transition # noise covariance. covariance[-1, 0] = 1. covariance[0, -1] = 1. dataset_size = 100 values = numpy.cumsum(numpy.random.multivariate_normal( mean=numpy.zeros(num_features), cov=covariance, size=dataset_size), axis=0) times = numpy.arange(dataset_size) model = MultivariateLevelModel( configuration=state_space_model.StateSpaceModelConfiguration( num_features=num_features, dtype=dtype, use_observation_noise=False, transition_covariance_initial_log_scale_bias=5.)) estimator = estimators.StateSpaceRegressor( model=model, optimizer=gradient_descent.GradientDescentOptimizer(0.1)) data = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(data), batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=1) for component in model._ensemble_members: # Check that input statistics propagated to component models self.assertTrue(component._input_statistics)
def chained_model_outputs(original_model, data): input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=chunk_size) state_manager = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() state_manager.initialize_graph(original_model) model_outputs = state_manager.define_loss( model=original_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) def _eval_outputs(session): for _ in range(50): # Warm up saved state model_outputs.loss.eval() (posterior_mean, posterior_var, priors_from_time) = model_outputs.end_state posteriors = ((posterior_mean, ), (posterior_var, ), priors_from_time) outputs = (model_outputs.loss, posteriors, model_outputs.predictions) chunked_outputs_evaled = session.run(outputs) return chunked_outputs_evaled return _eval_outputs
def _gap_test_template(self, times, values): random_model = RandomStateSpaceModel( state_dimension=1, state_noise_dimension=1, configuration=state_space_model.StateSpaceModelConfiguration( num_features=1)) random_model.initialize_graph() input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader({ feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values })) features, _ = input_fn() times = features[feature_keys.TrainEvalFeatures.TIMES] values = features[feature_keys.TrainEvalFeatures.VALUES] model_outputs = random_model.get_batch_loss( features={ feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values }, mode=None, state=math_utils.replicate_state( start_state=random_model.get_start_state(), batch_size=array_ops.shape(times)[0])) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) model_outputs.loss.eval() coordinator.request_stop() coordinator.join()
def test_numpy_nobatch_nofeatures(self): data = _make_numpy_time_series(num_features=1, num_samples=100) data[TrainEvalFeatures.VALUES] = data[TrainEvalFeatures.VALUES][:, 0] time_series_reader = input_pipeline.NumpyReader(data) self._whole_dataset_input_fn_test_template( time_series_reader=time_series_reader, num_features=1, num_samples=100)
def test_numpy(self): data = _make_numpy_time_series(num_features=2, num_samples=31) time_series_reader = input_pipeline.NumpyReader(data) self._all_window_input_fn_test_template( time_series_reader=time_series_reader, original_numpy_features=data, num_samples=31, window_size=5)
def _equivalent_to_single_model_test_template(self, model_generator): with self.cached_session() as session: random_model = RandomStateSpaceModel( state_dimension=5, state_noise_dimension=4, configuration=state_space_model.StateSpaceModelConfiguration( dtype=dtypes.float64, num_features=1)) random_model.initialize_graph() series_length = 10 model_data = random_model.generate( number_of_series=1, series_length=series_length, model_parameters=random_model.random_model_parameters()) input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(model_data)) features, _ = input_fn() model_outputs = random_model.get_batch_loss( features=features, mode=None, state=math_utils.replicate_state( start_state=random_model.get_start_state(), batch_size=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0])) variables.global_variables_initializer().run() compare_outputs_evaled_fn = model_generator( random_model, model_data) coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) compare_outputs_evaled = compare_outputs_evaled_fn(session) model_outputs_evaled = session.run( (model_outputs.end_state, model_outputs.predictions)) coordinator.request_stop() coordinator.join() model_posteriors, model_predictions = model_outputs_evaled (_, compare_posteriors, compare_predictions) = compare_outputs_evaled (model_posterior_mean, model_posterior_var, model_from_time) = model_posteriors (compare_posterior_mean, compare_posterior_var, compare_from_time) = compare_posteriors self.assertAllClose(model_posterior_mean, compare_posterior_mean[0]) self.assertAllClose(model_posterior_var, compare_posterior_var[0]) self.assertAllClose(model_from_time, compare_from_time) self.assertEqual(sorted(model_predictions.keys()), sorted(compare_predictions.keys())) for prediction_name in model_predictions: if prediction_name == "loss": # Chunking means that losses will be different; skip testing them. continue # Compare the last chunk to their corresponding un-chunked model # predictions last_prediction_chunk = compare_predictions[prediction_name][ -1] comparison_values = last_prediction_chunk.shape[0] model_prediction = ( model_predictions[prediction_name][0, -comparison_values:]) self.assertAllClose(model_prediction, last_prediction_chunk)
def test_numpy_discard_out_of_order_window_equal(self): data = _make_numpy_time_series(num_features=1, num_samples=3) time_series_reader = input_pipeline.NumpyReader(data) self._random_window_input_fn_test_template( time_series_reader=time_series_reader, num_features=1, window_size=3, batch_size=5, discard_out_of_order=True)
def test_long_eval(self): g = ops.Graph() with g.as_default(): model = ar_model.ARModel(periodicities=2, num_features=1, num_time_buckets=10, input_window_size=2, output_window_size=1) raw_features = { TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]], TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]] } chunked_features, _ = test_utils.AllWindowInputFn( time_series_reader=input_pipeline.NumpyReader(raw_features), window_size=3)() model.initialize_graph() with variable_scope.variable_scope("armodel") as scope: raw_evaluation = model.define_loss( raw_features, mode=estimator_lib.ModeKeys.EVAL) with variable_scope.variable_scope(scope, reuse=True): chunked_evaluation = model.define_loss( chunked_features, mode=estimator_lib.ModeKeys.EVAL) with session.Session() as sess: coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(sess, coord=coordinator) variables.global_variables_initializer().run() raw_evaluation_evaled, chunked_evaluation_evaled = sess.run( [raw_evaluation, chunked_evaluation]) self.assertAllClose(chunked_evaluation_evaled.loss, raw_evaluation_evaled.loss) last_chunk_evaluation_state = [ state[-1, None] for state in chunked_evaluation_evaled.end_state ] for last_chunk_state_member, raw_state_member in zip( last_chunk_evaluation_state, raw_evaluation_evaled.end_state): self.assertAllClose(last_chunk_state_member, raw_state_member) self.assertAllEqual([[5, 7, 11]], raw_evaluation_evaled.prediction_times) for feature_name in raw_evaluation.predictions: self.assertAllEqual( [ 1, 3, 1 ], # batch, window, num_features. The window size has 2 # cut off for the first input_window. raw_evaluation_evaled.predictions[feature_name].shape) self.assertAllClose( np.reshape( chunked_evaluation_evaled. predictions[feature_name], [-1]), np.reshape( raw_evaluation_evaled.predictions[feature_name], [-1])) coordinator.request_stop() coordinator.join()
def test_loop_unrolling(self): """Tests running/restoring from a checkpoint with static unrolling.""" model = TimeDependentStateSpaceModel( # Unroll during training, but not evaluation static_unrolling_window_size_threshold=2) estimator = estimators.StateSpaceRegressor(model=model) times = numpy.arange(100) values = numpy.arange(100) dataset = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(dataset), batch_size=16, window_size=2) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(dataset)) estimator.train(input_fn=train_input_fn, max_steps=1) estimator.evaluate(input_fn=eval_input_fn, steps=1)
def test_numpy_withbatch(self): data_nobatch = _make_numpy_time_series(num_features=4, num_samples=100) data = { feature_name: feature_value[None] for feature_name, feature_value in data_nobatch.items() } time_series_reader = input_pipeline.NumpyReader(data) self._whole_dataset_input_fn_test_template( time_series_reader=time_series_reader, num_features=4, num_samples=100)
def test_numpy_discard_out_of_order_window_too_large(self): data = _make_numpy_time_series(num_features=1, num_samples=2) time_series_reader = input_pipeline.NumpyReader(data) with self.assertRaisesRegexp(ValueError, "only 2 records were available"): self._random_window_input_fn_test_template( time_series_reader=time_series_reader, num_features=1, window_size=3, batch_size=5, discard_out_of_order=True)
def test_structural_ensemble_numpy_input(self): numpy_data = { "times": numpy.arange(50), "values": numpy.random.normal(size=[50]) } estimators.StructuralEnsembleRegressor( num_features=1, periodicities=[], model_dir=self.get_temp_dir(), config=_SeedRunConfig()).train(input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(numpy_data)), steps=1)
def dry_run_train_helper(self, sample_every, period, num_samples, model_type, model_args, num_features=1): numpy.random.seed(1) dtype = dtypes.float32 features = self.simple_data(sample_every, dtype=dtype, period=period, num_samples=num_samples, num_features=num_features) model = model_type( configuration=(state_space_model.StateSpaceModelConfiguration( num_features=num_features, dtype=dtype, covariance_prior_fn=lambda _: 0.)), **model_args) class _RunConfig(estimator_lib.RunConfig): @property def tf_random_seed(self): return 4 estimator = estimators.StateSpaceRegressor(model, config=_RunConfig()) train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), num_threads=1, shuffle_seed=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) estimator.train(input_fn=train_input_fn, max_steps=1) first_evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1) estimator.train(input_fn=train_input_fn, max_steps=3) second_evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(second_evaluation["loss"], first_evaluation["loss"])
def test_ar_lstm_regressor(self): dtype = dtypes.float32 model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) estimator = estimators.LSTMAutoRegressor( periodicities=10, input_window_size=10, output_window_size=6, model_dir=model_dir, num_features=1, extra_feature_columns=exogenous_feature_columns, num_units=10, config=_SeedRunConfig()) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values, "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=1) evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1) self.assertAllEqual(evaluation["loss"], evaluation["average_loss"]) self.assertAllEqual([], evaluation["loss"].shape)
def test_no_periodicity(self): """Test that no errors are raised when periodicites is None.""" dtype = dtypes.float64 times = [1, 2, 3, 4, 5, 6] values = [[0.01], [5.10], [5.21], [0.30], [5.41], [0.50]] regressor = estimators.StructuralEnsembleRegressor( periodicities=None, num_features=1, moving_average_order=0, dtype=dtype) features = { TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), window_size=6, batch_size=1) regressor.train(input_fn=train_input_fn, steps=1) eval_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(features)) evaluation = regressor.evaluate(input_fn=eval_input_fn, steps=1) predict_input_fn = input_pipeline.predict_continuation_input_fn( evaluation, times=[[7, 8, 9]]) regressor.predict(input_fn=predict_input_fn)
def _test_pass_to_next(self, read_offset, step, correct_offset): stub_model = StubTimeSeriesModel(correct_offset=correct_offset) data = self._make_test_data(length=100 + read_offset, cut_start=None, cut_end=None, offset=100., step=step) init_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader( {k: v[:-read_offset] for k, v in data.items()})) result_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader( {k: v[read_offset:] for k, v in data.items()})) chainer = state_management.ChainingStateManager( state_saving_interval=1) stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) init_model_outputs = chainer.define_loss( model=stub_model, features=init_input_fn()[0], mode=estimator_lib.ModeKeys.TRAIN) result_model_outputs = chainer.define_loss( model=stub_model, features=result_input_fn()[0], mode=estimator_lib.ModeKeys.TRAIN) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) init_model_outputs.loss.eval() returned_loss = result_model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return returned_loss
def _time_dependency_test_template(self, model_type): """Test that a time-dependent observation model influences predictions.""" model = model_type() estimator = estimators.StateSpaceRegressor( model=model, optimizer=gradient_descent.GradientDescentOptimizer(0.1)) values = numpy.reshape([1., 2., 3., 4.], newshape=[1, 4, 1]) input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader({ feature_keys.TrainEvalFeatures.TIMES: [[0, 1, 2, 3]], feature_keys.TrainEvalFeatures.VALUES: values })) estimator.train(input_fn=input_fn, max_steps=1) predicted_values = estimator.evaluate(input_fn=input_fn, steps=1)["mean"] # Throw out the first value so we don't test the prior self.assertAllEqual(values[1:], predicted_values[1:])
def test_exact_posterior_recovery_no_transition_noise(self): with self.cached_session() as session: stub_model, data, true_params = self._get_single_model() input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(data)) features, _ = input_fn() model_outputs = stub_model.get_batch_loss( features=features, mode=None, state=math_utils.replicate_state( start_state=stub_model.get_start_state(), batch_size=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0])) variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) posterior_mean, posterior_var, posterior_times = session.run( # Feed the true model parameters so that this test doesn't depend on # the generated parameters being close to the variable initializations # (an alternative would be training steps to fit the noise values, # which would be slow). model_outputs.end_state, feed_dict=true_params) coordinator.request_stop() coordinator.join() self.assertAllClose(numpy.zeros([1, 4, 4]), posterior_var, atol=1e-2) self.assertAllClose(numpy.dot( numpy.linalg.matrix_power( stub_model.transition, data[feature_keys.TrainEvalFeatures.TIMES].shape[1]), true_params[stub_model.prior_state_mean]), posterior_mean[0], rtol=1e-1) self.assertAllClose( math_utils.batch_end_time( features[feature_keys.TrainEvalFeatures.TIMES]).eval(), posterior_times)
def test_chained_exact_posterior_recovery_no_transition_noise(self): with self.cached_session() as session: stub_model, data, true_params = self._get_single_model() chunk_size = 10 input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=chunk_size) features, _ = input_fn() state_manager = state_management.ChainingStateManager( state_saving_interval=1) state_manager.initialize_graph(stub_model) model_outputs = state_manager.define_loss( model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range( data[feature_keys.TrainEvalFeatures.TIMES].shape[1] // chunk_size): model_outputs.loss.eval() posterior_mean, posterior_var, posterior_times = session.run( model_outputs.end_state, feed_dict=true_params) coordinator.request_stop() coordinator.join() self.assertAllClose(numpy.zeros([1, 4, 4]), posterior_var, atol=1e-2) self.assertAllClose(numpy.dot( numpy.linalg.matrix_power( stub_model.transition, data[feature_keys.TrainEvalFeatures.TIMES].shape[1]), true_params[stub_model.prior_state_mean]), posterior_mean[0], rtol=1e-1) self.assertAllClose( data[feature_keys.TrainEvalFeatures.TIMES][:, -1], posterior_times)
def test_state_override(self): test_start_state = (numpy.array([[2, 3, 4]]), (numpy.array([2]), numpy.array([[3., 5.]]))) data = { feature_keys.FilteringFeatures.TIMES: numpy.arange(5), feature_keys.FilteringFeatures.VALUES: numpy.zeros(shape=[5, 3]) } features, _ = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(data))() features[feature_keys.FilteringFeatures.STATE_TUPLE] = test_start_state stub_model = _StateOverrideModel() chainer = state_management.ChainingStateManager() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss(model=stub_model, features=features, mode=estimator_lib.ModeKeys.EVAL) with train.MonitoredSession() as session: end_state = session.run(model_outputs.end_state) nest.assert_same_structure(test_start_state, end_state) for expected, received in zip(nest.flatten(test_start_state), nest.flatten(end_state)): self.assertAllEqual(expected, received)
def _input_statistics_test_template(self, stat_object, num_features, dtype, give_full_data, warmup_iterations=0, rtol=1e-6, data_length=500, chunk_size=4): graph = ops.Graph() with graph.as_default(): numpy_dtype = dtype.as_numpy_dtype values = ( (numpy.arange(data_length, dtype=numpy_dtype)[..., None] + numpy.arange(num_features, dtype=numpy_dtype)[None, ...])[None]) times = 2 * (numpy.arange(data_length)[None]) - 3 if give_full_data: stat_object.set_data((times, values)) features = { TrainEvalFeatures.TIMES: times, TrainEvalFeatures.VALUES: values } input_fn = input_pipeline.RandomWindowInputFn( batch_size=16, window_size=chunk_size, time_series_reader=input_pipeline.NumpyReader(features)) statistics = stat_object.initialize_graph(features=input_fn()[0]) with self.session(graph=graph) as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # A control dependency should ensure that, for queue-based statistics, # a use of any statistic is preceded by an update of all adaptive # statistics. statistics.total_observation_count.eval() self.assertAllClose( range(num_features) + numpy.mean(numpy.arange(chunk_size))[None], statistics.series_start_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.tile( numpy.var(numpy.arange(chunk_size))[None], [num_features]), statistics.series_start_moments.variance.eval(), rtol=rtol) self.assertAllClose( numpy.mean(values[0], axis=0), statistics.overall_feature_moments.mean.eval(), rtol=rtol) self.assertAllClose( numpy.var(values[0], axis=0), statistics.overall_feature_moments.variance.eval(), rtol=rtol) self.assertAllClose(-3, statistics.start_time.eval(), rtol=rtol) self.assertAllClose(data_length, statistics.total_observation_count.eval(), rtol=rtol) coordinator.request_stop() coordinator.join()
def test_one_shot_prediction_head_export(self, estimator_factory): def _new_temp_dir(): return os.path.join(test.get_temp_dir(), str(ops.uid())) model_dir = _new_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column( "2d_exogenous_feature", shape=(2,)), feature_column.embedding_column( categorical_column=categorical_column, dimension=10)] estimator = estimator_factory( model_dir=model_dir, exogenous_feature_columns=exogenous_feature_columns, head_type=ts_head_lib.OneShotPredictionHead) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange( 20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array( ["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) result = estimator.evaluate(input_fn=train_input_fn, steps=1) self.assertIn("average_loss", result) self.assertNotIn(feature_keys.State.STATE_TUPLE, result) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_saved_model(_new_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load( session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual( self, [feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature"], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile(numpy.array( ["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items()} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) # Build a parsing input function, then make a tf.Example for it to parse. export_location = estimator.export_saved_model( _new_temp_dir(), estimator.build_one_shot_parsing_serving_input_receiver_fn( filtering_length=20, prediction_length=15)) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: example = example_pb2.Example() times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES] values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES] times.int64_list.value.extend(range(35)) for i in range(20): values.float_list.value.extend( [float(i) * 2. + feature_number for feature_number in range(5)]) real_feature = example.features.feature["2d_exogenous_feature"] categortical_feature = example.features.feature[ "categorical_exogenous_feature"] for i in range(35): real_feature.float_list.value.extend([1, 1]) categortical_feature.bytes_list.value.append(b"strkey") # Serialize the tf.Example for feeding to the Session examples = [example.SerializeToString()] * 2 signatures = loader.load( session, [tag_constants.SERVING], export_location) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] ((_, input_value),) = predict_signature.inputs.items() feeds = {graph.as_graph_element(input_value.name): examples} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape)
def train_helper(self, input_window_size, loss, max_loss=None, train_steps=200, anomaly_prob=0.01, anomaly_distribution=None, multiple_periods=False): np.random.seed(3) data_noise_stddev = 0.2 if max_loss is None: if loss == ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS: max_loss = 1.0 else: max_loss = 0.05 / (data_noise_stddev**2) train_data, test_data = self.create_data( noise_stddev=data_noise_stddev, anomaly_prob=anomaly_prob, multiple_periods=multiple_periods) output_window_size = 10 window_size = input_window_size + output_window_size class _RunConfig(estimator_lib.RunConfig): @property def tf_random_seed(self): return 3 estimator = ARRegressor( periodicities=self.period, anomaly_prior_probability=0.01 if anomaly_distribution else None, anomaly_distribution=anomaly_distribution, num_features=2, output_window_size=output_window_size, num_time_buckets=20, input_window_size=input_window_size, hidden_layer_sizes=[16], loss=loss, config=_RunConfig()) train_input_fn = input_pipeline.RandomWindowInputFn( time_series_reader=input_pipeline.NumpyReader(train_data), window_size=window_size, batch_size=64, num_threads=1, shuffle_seed=2) test_input_fn = test_utils.AllWindowInputFn( time_series_reader=input_pipeline.NumpyReader(test_data), window_size=window_size) # Test training estimator.train(input_fn=train_input_fn, steps=train_steps) test_evaluation = estimator.evaluate(input_fn=test_input_fn, steps=1) test_loss = test_evaluation["loss"] logging.info("Final test loss: %f", test_loss) self.assertLess(test_loss, max_loss) if loss == ar_model.ARModel.SQUARED_LOSS: # Test that the evaluation loss is reported without input scaling. self.assertAllClose( test_loss, np.mean((test_evaluation["mean"] - test_evaluation["observed"])**2)) # Test predict train_data_times = train_data[TrainEvalFeatures.TIMES] train_data_values = train_data[TrainEvalFeatures.VALUES] test_data_times = test_data[TrainEvalFeatures.TIMES] test_data_values = test_data[TrainEvalFeatures.VALUES] predict_times = np.expand_dims( np.concatenate( [train_data_times[input_window_size:], test_data_times]), 0) predict_true_values = np.expand_dims( np.concatenate( [train_data_values[input_window_size:], test_data_values]), 0) state_times = np.expand_dims(train_data_times[:input_window_size], 0) state_values = np.expand_dims(train_data_values[:input_window_size, :], 0) state_exogenous = state_times[:, :, None][:, :, :0] def prediction_input_fn(): return ({ PredictionFeatures.TIMES: training.limit_epochs(predict_times, num_epochs=1), PredictionFeatures.STATE_TUPLE: (state_times, state_values, state_exogenous) }, {}) (predictions, ) = tuple( estimator.predict(input_fn=prediction_input_fn)) predicted_mean = predictions["mean"][:, 0] true_values = predict_true_values[0, :, 0] if loss == ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS: variances = predictions["covariance"][:, 0] standard_deviations = np.sqrt(variances) # Note that we may get tighter bounds with more training steps. errors = np.abs(predicted_mean - true_values) > 4 * standard_deviations fraction_errors = np.mean(errors) logging.info("Fraction errors: %f", fraction_errors)
def test_savedmodel_state_override(self): random_model = RandomStateSpaceModel( state_dimension=5, state_noise_dimension=4, configuration=state_space_model.StateSpaceModelConfiguration( exogenous_feature_columns=[ layers.real_valued_column("exogenous") ], dtype=dtypes.float64, num_features=1)) estimator = estimators.StateSpaceRegressor( model=random_model, optimizer=gradient_descent.GradientDescentOptimizer(0.1)) combined_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader({ feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4], feature_keys.FilteringFeatures.VALUES: [1., 2., 3., 4.], "exogenous": [-1., -2., -3., -4.] })) estimator.train(combined_input_fn, steps=1) export_location = estimator.export_saved_model( self.get_temp_dir(), estimator.build_raw_serving_input_receiver_fn()) with ops.Graph().as_default() as graph: random_model.initialize_graph() with self.session(graph=graph) as session: variables.global_variables_initializer().run() evaled_start_state = session.run( random_model.get_start_state()) evaled_start_state = [ state_element[None, ...] for state_element in evaled_start_state ] with ops.Graph().as_default() as graph: with self.session(graph=graph) as session: signatures = loader.load(session, [tag_constants.SERVING], export_location) first_split_filtering = saved_model_utils.filter_continuation( continue_from={ feature_keys.FilteringResults.STATE_TUPLE: evaled_start_state }, signatures=signatures, session=session, features={ feature_keys.FilteringFeatures.TIMES: [1, 2], feature_keys.FilteringFeatures.VALUES: [1., 2.], "exogenous": [[-1.], [-2.]] }) second_split_filtering = saved_model_utils.filter_continuation( continue_from=first_split_filtering, signatures=signatures, session=session, features={ feature_keys.FilteringFeatures.TIMES: [3, 4], feature_keys.FilteringFeatures.VALUES: [3., 4.], "exogenous": [[-3.], [-4.]] }) combined_filtering = saved_model_utils.filter_continuation( continue_from={ feature_keys.FilteringResults.STATE_TUPLE: evaled_start_state }, signatures=signatures, session=session, features={ feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4], feature_keys.FilteringFeatures.VALUES: [1., 2., 3., 4.], "exogenous": [[-1.], [-2.], [-3.], [-4.]] }) split_predict = saved_model_utils.predict_continuation( continue_from=second_split_filtering, signatures=signatures, session=session, steps=1, exogenous_features={"exogenous": [[[-5.]]]}) combined_predict = saved_model_utils.predict_continuation( continue_from=combined_filtering, signatures=signatures, session=session, steps=1, exogenous_features={"exogenous": [[[-5.]]]}) for state_key, combined_state_value in combined_filtering.items(): if state_key == feature_keys.FilteringResults.TIMES: continue self.assertAllClose(combined_state_value, second_split_filtering[state_key]) for prediction_key, combined_value in combined_predict.items(): self.assertAllClose(combined_value, split_predict[prediction_key])
def test_numpy_multivariate(self): data = _make_numpy_time_series(num_features=3, num_samples=50) time_series_reader = input_pipeline.NumpyReader(data) self._test_multivariate(time_series_reader, num_features=3)
def test_numpy_discard_out_of_order(self): data = _make_numpy_time_series(num_features=1, num_samples=50) time_series_reader = input_pipeline.NumpyReader(data) self._test_out_of_order(time_series_reader, discard_out_of_order=True)