예제 #1
0
 def test_long_eval_discard_indivisible(self):
     g = ops.Graph()
     with g.as_default():
         model = ar_model.ARModel(periodicities=2,
                                  num_features=1,
                                  num_time_buckets=10,
                                  input_window_size=2,
                                  output_window_size=2)
         raw_features = {
             TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]],
             TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]]
         }
         model.initialize_graph()
         raw_evaluation = model.define_loss(
             raw_features, mode=estimator_lib.ModeKeys.EVAL)
         with session.Session() as sess:
             coordinator = coordinator_lib.Coordinator()
             queue_runner_impl.start_queue_runners(sess, coord=coordinator)
             variables.global_variables_initializer().run()
             raw_evaluation_evaled = sess.run(raw_evaluation)
             self.assertAllEqual([[7, 11]],
                                 raw_evaluation_evaled.prediction_times)
             for feature_name in raw_evaluation.predictions:
                 self.assertAllEqual(
                     [
                         1, 2, 1
                     ],  # batch, window, num_features. The window has two cut
                     # off for the first input window and one discarded so
                     # that the remainder is divisible into output windows.
                     raw_evaluation_evaled.predictions[feature_name].shape)
             coordinator.request_stop()
             coordinator.join()
예제 #2
0
 def test_long_eval(self):
     g = ops.Graph()
     with g.as_default():
         model = ar_model.ARModel(periodicities=2,
                                  num_features=1,
                                  num_time_buckets=10,
                                  input_window_size=2,
                                  output_window_size=1)
         raw_features = {
             TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]],
             TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]]
         }
         chunked_features, _ = test_utils.AllWindowInputFn(
             time_series_reader=input_pipeline.NumpyReader(raw_features),
             window_size=3)()
         model.initialize_graph()
         with variable_scope.variable_scope("armodel") as scope:
             raw_evaluation = model.define_loss(
                 raw_features, mode=estimator_lib.ModeKeys.EVAL)
         with variable_scope.variable_scope(scope, reuse=True):
             chunked_evaluation = model.define_loss(
                 chunked_features, mode=estimator_lib.ModeKeys.EVAL)
         with session.Session() as sess:
             coordinator = coordinator_lib.Coordinator()
             queue_runner_impl.start_queue_runners(sess, coord=coordinator)
             variables.global_variables_initializer().run()
             raw_evaluation_evaled, chunked_evaluation_evaled = sess.run(
                 [raw_evaluation, chunked_evaluation])
             self.assertAllClose(chunked_evaluation_evaled.loss,
                                 raw_evaluation_evaled.loss)
             last_chunk_evaluation_state = [
                 state[-1, None]
                 for state in chunked_evaluation_evaled.end_state
             ]
             for last_chunk_state_member, raw_state_member in zip(
                     last_chunk_evaluation_state,
                     raw_evaluation_evaled.end_state):
                 self.assertAllClose(last_chunk_state_member,
                                     raw_state_member)
             self.assertAllEqual([[5, 7, 11]],
                                 raw_evaluation_evaled.prediction_times)
             for feature_name in raw_evaluation.predictions:
                 self.assertAllEqual(
                     [
                         1, 3, 1
                     ],  # batch, window, num_features. The window size has 2
                     # cut off for the first input_window.
                     raw_evaluation_evaled.predictions[feature_name].shape)
                 self.assertAllClose(
                     np.reshape(
                         chunked_evaluation_evaled.
                         predictions[feature_name], [-1]),
                     np.reshape(
                         raw_evaluation_evaled.predictions[feature_name],
                         [-1]))
             coordinator.request_stop()
             coordinator.join()
예제 #3
0
def _ar_lstm_regressor(model_dir, head_type, exogenous_feature_columns):
    return ts_estimators.TimeSeriesRegressor(model=ar_model.ARModel(
        periodicities=10,
        input_window_size=10,
        output_window_size=6,
        num_features=5,
        exogenous_feature_columns=exogenous_feature_columns,
        prediction_model_factory=functools.partial(
            ar_model.LSTMPredictionModel, num_units=10)),
                                             head_type=head_type,
                                             model_dir=model_dir)
예제 #4
0
 def _estimator_fn(model_dir, exogenous_feature_columns):
     return estimators.TimeSeriesRegressor(model=ar_model.ARModel(
         periodicities=10,
         input_window_size=10,
         output_window_size=6,
         num_features=1,
         exogenous_feature_columns=exogenous_feature_columns,
         prediction_model_factory=functools.partial(
             ar_model.LSTMPredictionModel, num_units=10)),
                                           config=_SeedRunConfig(),
                                           model_dir=model_dir)
예제 #5
0
 def test_predictions_direct_lstm(self):
     g = ops.Graph()
     with g.as_default():
         model = ar_model.ARModel(
             periodicities=2,
             num_features=1,
             num_time_buckets=10,
             input_window_size=2,
             output_window_size=2,
             prediction_model_factory=functools.partial(
                 ar_model.LSTMPredictionModel, num_units=16))
         with session.Session():
             predicted_values = model.predict({
                 PredictionFeatures.TIMES: [[4, 6, 10]],
                 PredictionFeatures.STATE_TUPLE:
                 ([[1, 2]], [[[1.], [2.]]], [[[], []]])
             })
             variables.global_variables_initializer().run()
             self.assertAllEqual(predicted_values["mean"].eval().shape,
                                 [1, 3, 1])
예제 #6
0
  def __init__(self,
               periodicities,
               input_window_size,
               output_window_size,
               model_dir=None,
               num_features=1,
               extra_feature_columns=None,
               num_timesteps=10,
               loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS,
               num_units=128,
               optimizer="Adam",
               config=None):
    """Initialize the Estimator.

    Args:
      periodicities: periodicities of the input data, in the same units as the
        time feature (for example 24 if feeding hourly data with a daily
        periodicity, or 60 * 24 if feeding minute-level data with daily
        periodicity). Note this can be a single value or a list of values for
        multiple periodicities.
      input_window_size: Number of past time steps of data to look at when doing
        the regression.
      output_window_size: Number of future time steps to predict. Note that
        setting this value to > 1 empirically seems to give a better fit.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      num_features: The dimensionality of the time series (default value is
        one for univariate, more than one for multivariate).
      extra_feature_columns: A list of `tf.feature_column`s (for example
        `tf.feature_column.embedding_column`) corresponding to features which
        provide extra information to the model but are not part of the series to
        be predicted.
      num_timesteps: Number of buckets into which to divide (time %
        periodicity). This value multiplied by the number of periodicities is
        the number of time features added to the model.
      loss: Loss function to use for training. Currently supported values are
        SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for
        NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For
        SQUARED_LOSS, the evaluation loss is reported based on un-scaled
        observations and predictions, while the training loss is computed on
        normalized data.
      num_units: The size of the hidden state in the encoder and decoder LSTM
        cells.
      optimizer: string, `tf.train.Optimizer` object, or callable that defines
        the optimizer algorithm to use for training. Defaults to the Adam
        optimizer with a learning rate of 0.01.
      config: Optional `estimator.RunConfig` object to configure the runtime
        settings.
    """
    optimizer = optimizers.get_optimizer_instance(
        optimizer, learning_rate=0.01)
    model = ar_model.ARModel(
        periodicities=periodicities,
        input_window_size=input_window_size,
        output_window_size=output_window_size,
        num_features=num_features,
        exogenous_feature_columns=extra_feature_columns,
        num_time_buckets=num_timesteps,
        loss=loss,
        prediction_model_factory=functools.partial(
            ar_model.LSTMPredictionModel, num_units=num_units))
    state_manager = state_management.FilteringOnlyStateManager()
    super(LSTMAutoRegressor, self).__init__(
        model=model,
        state_manager=state_manager,
        optimizer=optimizer,
        model_dir=model_dir,
        config=config,
        head_type=ts_head_lib.OneShotPredictionHead)
예제 #7
0
  def __init__(
      self, periodicities, input_window_size, output_window_size,
      num_features, exogenous_feature_columns=None, num_time_buckets=10,
      loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, hidden_layer_sizes=None,
      anomaly_prior_probability=None, anomaly_distribution=None,
      optimizer=None, model_dir=None, config=None):
    """Initialize the Estimator.

    Args:
      periodicities: periodicities of the input data, in the same units as the
        time feature. Note this can be a single value or a list of values for
        multiple periodicities.
      input_window_size: Number of past time steps of data to look at when doing
        the regression.
      output_window_size: Number of future time steps to predict. Note that
        setting it to > 1 empirically seems to give a better fit.
      num_features: The dimensionality of the time series (one for univariate,
        more than one for multivariate).
      exogenous_feature_columns: A list of `tf.feature_column`s (for example
        `tf.feature_column.embedding_column`) corresponding to exogenous
        features which provide extra information to the model but are not part
        of the series to be predicted. Passed to
        `tf.feature_column.input_layer`.
      num_time_buckets: Number of buckets into which to divide (time %
        periodicity) for generating time based features.
      loss: Loss function to use for training. Currently supported values are
        SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for
        NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For
        SQUARED_LOSS, the evaluation loss is reported based on un-scaled
        observations and predictions, while the training loss is computed on
        normalized data.
      hidden_layer_sizes: list of sizes of hidden layers.
      anomaly_prior_probability: If specified, constructs a mixture model under
        which anomalies (modeled with `anomaly_distribution`) have this prior
        probability. See `AnomalyMixtureARModel`.
      anomaly_distribution: May not be specified unless
        anomaly_prior_probability is specified and is not None. Controls the
        distribution of anomalies under the mixture model. Currently either
        `ar_model.AnomalyMixtureARModel.GAUSSIAN_ANOMALY` or
        `ar_model.AnomalyMixtureARModel.CAUCHY_ANOMALY`. See
        `AnomalyMixtureARModel`. Defaults to `GAUSSIAN_ANOMALY`.
      optimizer: The optimization algorithm to use when training, inheriting
          from tf.train.Optimizer. Defaults to Adagrad with step size 0.1.
      model_dir: See `Estimator`.
      config: See `Estimator`.
    Raises:
      ValueError: For invalid combinations of arguments.
    """
    if optimizer is None:
      optimizer = train.AdagradOptimizer(0.1)
    if anomaly_prior_probability is None and anomaly_distribution is not None:
      raise ValueError("anomaly_prior_probability is required if "
                       "anomaly_distribution is specified.")
    if anomaly_prior_probability is None:
      if anomaly_distribution is None:
        anomaly_distribution = ar_model.AnomalyMixtureARModel.GAUSSIAN_ANOMALY
      model = ar_model.ARModel(
          periodicities=periodicities, num_features=num_features,
          prediction_model_factory=functools.partial(
              ar_model.FlatPredictionModel,
              hidden_layer_sizes=hidden_layer_sizes),
          exogenous_feature_columns=exogenous_feature_columns,
          num_time_buckets=num_time_buckets,
          input_window_size=input_window_size,
          output_window_size=output_window_size, loss=loss)
    else:
      if loss != ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS:
        raise ValueError(
            "AnomalyMixtureARModel only supports "
            "ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS for its loss argument.")
      model = ar_model.AnomalyMixtureARModel(
          periodicities=periodicities,
          input_window_size=input_window_size,
          output_window_size=output_window_size,
          num_features=num_features,
          prediction_model_factory=functools.partial(
              ar_model.FlatPredictionModel,
              hidden_layer_sizes=hidden_layer_sizes),
          exogenous_feature_columns=exogenous_feature_columns,
          num_time_buckets=num_time_buckets,
          anomaly_prior_probability=anomaly_prior_probability,
          anomaly_distribution=anomaly_distribution)
    state_manager = state_management.FilteringOnlyStateManager()
    super(ARRegressor, self).__init__(
        model=model,
        state_manager=state_manager,
        optimizer=optimizer,
        model_dir=model_dir,
        config=config)