def save_predictor(self, predictor: Predictor, path: Path) -> None: """ Saves the predictor associated with the model configuration to the specified path. By default, this simply serializes the predictor. Args: predictor: The predictor to save. path: The directory where to save the predictor. """ predictor.serialize(path)
def test_serialize(Estimator, hyperparameters): estimator = Estimator.from_hyperparameters(freq=freq, **hyperparameters) with tempfile.TemporaryDirectory() as temp_dir: predictor_act = estimator.train(train_ds) predictor_act.serialize(Path(temp_dir)) predictor_exp = Predictor.deserialize(Path(temp_dir)) assert predictor_act == predictor_exp
def run_example(): dataset = get_dataset("electricity") serialize_path = Path("GluonTSTabularPredictor") estimator = TabularEstimator( freq="H", prediction_length=24, time_limit=600, # ten minutes for training disable_auto_regression= True, # makes prediction faster, but potentially less accurate last_k_for_val= 24, # split the last 24 targets from each time series to be the validation data quantiles_to_predict=[0.1, 0.5, 0.9], ) n_train = 5 training_data = list(islice(dataset.train, n_train)) predictor = estimator.train(training_data=training_data) os.makedirs(serialize_path, exist_ok=True) predictor.serialize(serialize_path) predictor = None # the quantiles_to_predict parameters should be List[str] type predictor = Predictor.deserialize(serialize_path) forecasts = list(predictor.predict(training_data)) print(forecasts)
def test_pytorch_predictor_serde(): context_length = 20 prediction_length = 5 transformation = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=TestSplitSampler(), past_length=context_length, future_length=prediction_length, ) pred_net = RandomNetwork(prediction_length=prediction_length, context_length=context_length) predictor = PyTorchPredictor( prediction_length=prediction_length, freq="1H", input_names=["past_target"], prediction_net=pred_net, batch_size=16, input_transform=transformation, device=torch.device("cpu"), ) with tempfile.TemporaryDirectory() as temp_dir: predictor.serialize(Path(temp_dir)) predictor_exp = Predictor.deserialize(Path(temp_dir)) assert predictor == predictor_exp
def run_example(): dataset = get_dataset("electricity") serialize_path = Path("GluonTSTabularPredictor") estimator = TabularEstimator( freq="H", prediction_length=24, time_limit=10, # two minutes for training disable_auto_regression=True, # makes prediction faster, but potentially less accurate last_k_for_val=24, # split the last 24 targets from each time series to be the validation data quantiles_to_predict=None, ) n_train = 5 training_data = list(islice(dataset.train, n_train)) predictor = estimator.train(training_data=training_data) os.makedirs(serialize_path, exist_ok=True) predictor.serialize(serialize_path) predictor = None predictor = Predictor.deserialize(serialize_path) forecasts = list(predictor.predict(training_data)) for entry, forecast in zip(training_data, forecasts): ts = to_pandas(entry) plt.figure() plt.plot(ts[-7 * predictor.prediction_length :], label="target") forecast.plot() plt.show()
def run_inference_server( env: SageMakerEnv, forecaster_type: Optional[Type[Union[Estimator, Predictor]]], ) -> None: if forecaster_type is not None: ctor = forecaster_type.from_hyperparameters def predictor_factory(request) -> Predictor: return ctor(**request['configuration']) else: predictor = Predictor.deserialize(env.path.model) def predictor_factory(request) -> Predictor: return predictor app = Application( app=make_app(predictor_factory, execution_params), config={ "bind": "0.0.0.0:8080", "workers": settings.number_of_workers, "timeout": 100, }, ) app.run()
def test_estimator_with_features(estimator_constructor): freq = "1h" prediction_length = 12 training_dataset = ListDataset( [ { "start": "2021-01-01 00:00:00", "target": [1.0] * 200, "feat_static_cat": [0, 1], "feat_static_real": [42.0], "feat_dynamic_real": [[1.0] * 200] * 3, }, { "start": "2021-02-01 00:00:00", "target": [1.0] * 100, "feat_static_cat": [1, 0], "feat_static_real": [1.0], "feat_dynamic_real": [[1.0] * 100] * 3, }, ], freq=freq, ) prediction_dataset = ListDataset( [ { "start": "2021-01-01 00:00:00", "target": [1.0] * 200, "feat_static_cat": [0, 1], "feat_static_real": [42.0], "feat_dynamic_real": [[1.0] * (200 + prediction_length)] * 3, }, { "start": "2021-02-01 00:00:00", "target": [1.0] * 100, "feat_static_cat": [1, 0], "feat_static_real": [1.0], "feat_dynamic_real": [[1.0] * (100 + prediction_length)] * 3, }, ], freq=freq, ) estimator = estimator_constructor(freq, prediction_length) predictor = estimator.train( training_data=training_dataset, validation_data=training_dataset, shuffle_buffer_length=5, ) with tempfile.TemporaryDirectory() as td: predictor.serialize(Path(td)) predictor_copy = Predictor.deserialize(Path(td)) forecasts = predictor_copy.predict(prediction_dataset) for f in islice(forecasts, 5): f.mean
def model_fn(model_dir: Union[str, Path]) -> Predictor: """Load a glounts model from a directory. Args: model_dir (Union[str, Path]): a directory where model is saved. Returns: Predictor: A gluonts predictor. """ predictor = Predictor.deserialize(Path(model_dir)) # If model was trained on log-space, then forecast must be inverted before metrics etc. with open(os.path.join(model_dir, "y_transform.json"), "r") as f: y_transform = json.load(f) logger.info("model_fn: custom transformations = %s", y_transform) if y_transform["inverse_transform"] == "expm1": predictor.output_transform = expm1_and_clip_to_zero else: predictor.output_transform = clip_to_zero # Custom field predictor.pre_input_transform = log1p if y_transform["transform"] == "log1p" else None logger.info("predictor.pre_input_transform: %s", predictor.pre_input_transform) logger.info("predictor.output_transform: %s", predictor.output_transform) logger.info("model_fn() done; loaded predictor %s", predictor) return predictor
def test_lstnet( skip_size, ar_window, lead_time, prediction_length, hybridize, scaling, dtype, ): estimator = LSTNetEstimator( skip_size=skip_size, ar_window=ar_window, num_series=NUM_SERIES, channels=6, kernel_size=2, context_length=4, freq=freq, lead_time=lead_time, prediction_length=prediction_length, trainer=Trainer(epochs=1, batch_size=2, learning_rate=0.01, hybridize=hybridize), scaling=scaling, dtype=dtype, ) predictor = estimator.train(dataset.train) with tempfile.TemporaryDirectory() as directory: predictor.serialize(Path(directory)) predictor_copy = Predictor.deserialize(Path(directory)) assert predictor == predictor_copy forecast_it, ts_it = make_evaluation_predictions(dataset=dataset.test, predictor=predictor, num_samples=NUM_SAMPLES) forecasts = list(forecast_it) tss = list(ts_it) assert len(forecasts) == len(tss) == len(dataset.test) test_ds = dataset.test.list_data[0] for fct in forecasts: assert fct.freq == freq assert fct.samples.shape == ( NUM_SAMPLES, prediction_length, NUM_SERIES, ) assert (fct.start_date == pd.period_range( start=test_ds["start"], periods=test_ds["target"].shape[1], # number of test periods freq=freq, )[-prediction_length]) evaluator = MultivariateEvaluator( quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test)) assert agg_metrics["ND"] < 1.0
def test_torch_deepar(): constant = get_dataset("constant") estimator = DeepAREstimator( freq=constant.metadata.freq, prediction_length=constant.metadata.prediction_length, batch_size=4, num_batches_per_epoch=3, trainer_kwargs=dict(max_epochs=2), ) predictor = estimator.train( training_data=constant.train, validation_data=constant.train, shuffle_buffer_length=5, ) with tempfile.TemporaryDirectory() as td: predictor.serialize(Path(td)) predictor_copy = Predictor.deserialize(Path(td)) forecasts = predictor_copy.predict(constant.test) for f in islice(forecasts, 5): f.mean
def _retrieve_model(self, locations): with self._s3fs.open(locations.model_archive, "rb") as stream: with tarfile.open(mode="r:gz", fileobj=stream) as archive: with TemporaryDirectory() as temp_dir: archive.extractall(temp_dir) predictor = Predictor.deserialize(Path(temp_dir)) return predictor
def load_model(self, model_path): try: predictor = Predictor.deserialize(Path(model_path)) print('Model loaded from %s' % model_path) except: print('Unable to load the model %s' % model_path) sys.exit(1) return predictor
def load(cls, path: str, reset_paths: bool = True, verbose: bool = True) -> "AbstractGluonTSModel": model = super().load(path, reset_paths, verbose) model.gts_predictor = GluonTSPredictor.deserialize( Path(path) / cls.gluonts_model_path) return model
def load_model(self): print("Model path ", Path(self.configs.model_save_path)) import os print(os.listdir(Path(self.configs.model_save_path))) predictor_deserialized = Predictor.deserialize(Path( self.configs.model_save_path), ctx=mx.cpu()) return predictor_deserialized
def test_serialize(Estimator, hyperparameters): estimator = from_hyperparameters(Estimator, hyperparameters, dsinfo) with tempfile.TemporaryDirectory() as temp_dir: predictor_act = estimator.train(dsinfo.train_ds) predictor_act.serialize(Path(temp_dir)) predictor_exp = Predictor.deserialize(Path(temp_dir)) # TODO: DeepFactorEstimator does not pass this assert assert predictor_act == predictor_exp
def make_gunicorn_app( env: ServeEnv, forecaster_type: Optional[Type[Union[Estimator, Predictor]]], settings: Settings, ) -> Application: check_gpu_support() if forecaster_type is not None: logger.info(f"Using dynamic predictor factory") ctor = forecaster_type.from_hyperparameters forecaster_fq_name = fqname_for(forecaster_type) forecaster_version = forecaster_type.__version__ def predictor_factory(request) -> Predictor: return ctor(**request["configuration"]) else: logger.info(f"Using static predictor factory") assert env is not None predictor = Predictor.deserialize(env.path.model) forecaster_fq_name = fqname_for(type(predictor)) forecaster_version = predictor.__version__ def predictor_factory(request) -> Predictor: return predictor logger.info(f"Using gluonts v{gluonts.__version__}") logger.info(f"Using forecaster {forecaster_fq_name} v{forecaster_version}") execution_params = { "MaxConcurrentTransforms": settings.number_of_workers, "BatchStrategy": settings.sagemaker_batch_strategy, "MaxPayloadInMB": settings.sagemaker_max_payload_in_mb, } flask_app = make_app( predictor_factory, execution_params, batch_transform_config=env.batch_config, settings=settings, ) gunicorn_app = Application( app=flask_app, config={ "bind": settings.sagemaker_server_bind, "workers": settings.number_of_workers, "timeout": settings.sagemaker_server_timeout, }, ) return gunicorn_app
def model_fn(model_dir): sub_dirs = os.listdir(model_dir) print('[DEBUG] sub_dirs:', sub_dirs) for sub_dir in sub_dirs: if sub_dir in ['CanonicalRNN', 'DeepFactor', 'DeepAR', 'DeepState', 'DeepVAR', 'GaussianProcess', 'GPVAR', 'LSTNet', 'NBEATS', 'DeepRenewalProcess', 'Tree', 'SelfAttention', 'MQCNN', 'MQRNN', 'Seq2Seq', 'SimpleFeedForward', 'TemporalFusionTransformer', 'DeepTPP', 'Transformer', 'WaveNet', 'Naive2', 'NPTS', 'Prophet', 'ARIMA', 'ETS', 'TBATS', 'CROSTON', 'MLP', 'SeasonalNaive']: # TODO add all algo_names model_dir = os.path.join(model_dir, sub_dir) print('[DEBUG] algo_name:', sub_dir) break predictor = Predictor.deserialize(Path(model_dir)) print('[DEBUG] model init done.') return predictor
def load_predictor(self, path: Path) -> Predictor: """ Loads the predictor from the specified path. Args: path: The directory from which to load the predictor. Returns: The predictor which was loaded. """ return Predictor.deserialize(path)
def initialize(self, context): """ Initialize model. This will be called during model loading time :param context: Initial context contains model server system properties. :return: None """ self.initialized = True properties = context.system_properties model_dir = properties.get("model_dir") logger.info(f"Loading model from {model_dir}") self.model = Predictor.deserialize(Path(model_dir))
def test_tabular_estimator( dataset, freq, prediction_length: int, lag_indices: List[int], disable_auto_regression: bool, last_k_for_val: int, validation_data: ListDataset, ): estimator = TabularEstimator( freq=freq, prediction_length=prediction_length, lag_indices=lag_indices, time_limit=10, disable_auto_regression=disable_auto_regression, last_k_for_val=last_k_for_val, ) def check_consistency(entry, f1, f2): ts = to_pandas(entry) start_timestamp = ts.index[-1] + pd.tseries.frequencies.to_offset(freq) assert f1.samples.shape == (1, prediction_length) assert f1.start_date == start_timestamp assert f2.samples.shape == (1, prediction_length) assert f2.start_date == start_timestamp assert np.allclose(f1.samples, f2.samples) with tempfile.TemporaryDirectory() as path: predictor = estimator.train(dataset, validation_data=validation_data) predictor.serialize(Path(path)) predictor = None predictor = Predictor.deserialize(Path(path)) assert not predictor.auto_regression or any( l < prediction_length for l in predictor.lag_indices ) assert predictor.batch_size > 1 forecasts_serial = list(predictor._predict_serial(dataset)) forecasts_batch = list(predictor.predict(dataset)) for entry, f1, f2 in zip(dataset, forecasts_serial, forecasts_batch): check_consistency(entry, f1, f2) if not predictor.auto_regression: forecasts_batch_autoreg = list( predictor._predict_batch_autoreg(dataset) ) for entry, f1, f2 in zip( dataset, forecasts_serial, forecasts_batch_autoreg ): check_consistency(entry, f1, f2)
def _predict_fn(input_object: List[DataEntry], model: Predictor, num_samples=1000) -> List[Forecast]: """Take the deserialized JSON-lines, then perform inference against the loaded model. Args: input_object (List[DataEntry]): List of gluonts timeseries. model (Predictor): A gluonts predictor. num_samples (int, optional): Number of forecast paths for each timeseries. Defaults to 1000. Returns: List[Forecast]: List of forecast results. """ # Create ListDataset here, because we need to match their freq with model's freq. X = ListDataset(input_object, freq=model.freq) # Apply forward transformation to input data, before injecting it to the predictor. if model.pre_input_transform is not None: logger.debug("Before model.pre_input_transform: %s", X.list_data) model.pre_input_transform(X) logger.debug("After model.pre_input_transform: %s", X.list_data) it = model.predict(X, num_samples=num_samples) return list(it)
def temporary_serve_env(predictor: Predictor) -> ContextManager[ServeEnv]: """ A context manager that instantiates a serve environment for a given `Predictor` in a temporary directory and removes the directory on exit. Parameters ---------- predictor A predictor to serialize in `ServeEnv` `model` folder. Returns ------- ContextManager[gluonts.shell.env.ServeEnv] A context manager that yields the `ServeEnv` instance. """ with tempfile.TemporaryDirectory(prefix="gluonts-serve-env") as base: paths = ServePaths(base=Path(base)) # serialize model predictor.serialize(paths.model) yield ServeEnv(path=paths.base)
def predict(dataset: ListDataset, snapshot_dir: str, samples: int = 100): """ Make predictions using model snapshot. :param dataset: :param snapshot_dir: :param samples: :return: """ predictor = Predictor.deserialize(Path(snapshot_dir)) forecast_it, ts_it = make_evaluation_predictions(dataset, predictor=predictor, num_samples=samples) return np.array( [np.median(x.samples, axis=0) for x in list(iter(forecast_it))])
def test_pytorch_predictor_serde(): context_length = 20 prediction_length = 5 transformation = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExpectedNumInstanceSampler(num_instances=1), past_length=context_length, future_length=prediction_length, ) pred_net = RandomNetwork( prediction_length=prediction_length, context_length=context_length ) predictor = PyTorchPredictor( prediction_length=prediction_length, freq="1H", input_names=["past_target"], prediction_net=pred_net, batch_size=16, input_transform=transformation, device=None, ) with tempfile.TemporaryDirectory() as temp_dir: predictor.serialize(Path(temp_dir)) predictor_exp = Predictor.deserialize(Path(temp_dir)) test_data = [ { FieldName.START: pd.Timestamp("2020-01-01 00:00:00", freq="1H"), FieldName.TARGET: np.random.uniform(size=(100,)).astype("f"), } for _ in range(20) ] forecast = list(predictor.predict(test_data)) forecast_exp = list(predictor_exp.predict(test_data)) for f, f_exp in zip(forecast, forecast_exp): assert np.allclose(f.samples, f_exp.samples)
def train(dataset: ListDataset, frequency: str, horizon: int, model_name: str, num_layers: int, num_cells: int, epochs: int, patience: int, weight_decay: float, dropout_rate: float, batch_size: int, snapshot_dir: str, overwrite: bool): """ Train a model. :param dataset: :param model_name: :param horizon: :param frequency: :param snapshot_dir: :param epochs: :param patience: :param weight_decay: :param batch_size: :param dropout_rate: :param num_layers: :param num_cells: :param overwrite: :return: """ model_dir = Path(snapshot_dir) if not overwrite and os.path.isdir(snapshot_dir): return Predictor.deserialize(model_dir) trainer = Trainer(epochs=epochs, patience=patience, weight_decay=weight_decay, batch_size=batch_size) if model_name == 'deepar': estimator = DeepAREstimator(freq=frequency, scaling=False, dropout_rate=dropout_rate, num_layers=num_layers, num_cells=num_cells, prediction_length=horizon, trainer=trainer) else: raise Exception(f'Unknown model {model_name}') predictor = estimator.train(training_data=dataset) model_dir.mkdir(parents=True, exist_ok=overwrite) predictor.serialize(model_dir) return predictor
def test_estimator_constant_dataset(estimator_constructor): constant = get_dataset("constant") estimator = estimator_constructor(constant) predictor = estimator.train( training_data=constant.train, validation_data=constant.train, shuffle_buffer_length=5, ) with tempfile.TemporaryDirectory() as td: predictor.serialize(Path(td)) predictor_copy = Predictor.deserialize(Path(td)) forecasts = predictor_copy.predict(constant.test) for f in islice(forecasts, 5): if isinstance(f, DistributionForecast): f = f.to_sample_forecast() f.mean
def predict( self, x_y_data: ListDataset, training_pred_score_path: str, ): models = self.models tss_dict = {} forecasts_dict = {} load_path_dict = {} type_of_time_series = self.type_of_time_series for models_ in models.get(type_of_time_series): for key, value in models_.items(): model_name = key model_save_name = "{}_{}_score_trained".format( model_name, type_of_time_series) load_path = os.path.join(training_pred_score_path, "saved_models", model_save_name) ml_loaded = Predictor.deserialize(Path(load_path)) forecast_it, ts_it = make_evaluation_predictions( dataset=x_y_data, # test dataset predictor=ml_loaded, # predictor num_eval_samples= 1, # number of sample paths we want for evaluation ) forecasts = list(forecast_it) tss = list(ts_it) tss_dict[model_name] = tss forecasts_dict[model_name] = forecasts load_path_dict[model_name] = load_path return tss_dict, forecasts_dict, load_path_dict
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len( df_test[0:number_of_products])) # formateo del nombre columns = pd.DataFrame(list(df.columns)[0:number_of_products], columns=["name"]) item_metrics["item_id"] = columns["name"] item_metrics.drop(columns=["OWA"], inplace=True) print(item_metrics) # guardar parĂ¡metros path = "results/" try_create_folder(path) path = path + "energy-model" try_create_folder(path) # guardar los resultados item_metrics.to_csv("results/resultados_energia.csv", index=False) # guardar el modelo predictor.serialize(Path(path)) # hacer inferencia con el modelo guardado predictor_deserialized = Predictor.deserialize(Path(path)) # repetir los pasos para hacer la inferencia print("...")
def test_seriali_predictors(predictor_cls, parameters): predictor = predictor_cls(freq=CONSTANT_DATASET_FREQ, **parameters) with tempfile.TemporaryDirectory() as temp_dir: predictor.serialize(Path(temp_dir)) predictor_exp = Predictor.deserialize(Path(temp_dir)) assert predictor == predictor_exp
if __name__ == "__main__": dataset = get_dataset("exchange_rate") estimator = SimpleFeedForwardEstimator( prediction_length=dataset.metadata.prediction_length, freq=dataset.metadata.freq, trainer=Trainer(epochs=5, num_batches_per_epoch=10), ) predictor = estimator.train(dataset.train) # save the trained model in a path ~/.mxnet/gluon-ts/feedforward/ # or $MXNET_HOME/feedforward if MXNET_HOME is defined model_path = get_download_path() / "feedforward" os.makedirs(model_path, exist_ok=True) predictor.serialize(model_path) # loads it back and evaluate predictions accuracy with the deserialized model predictor_deserialized = Predictor.deserialize(model_path) forecast_it, ts_it = make_evaluation_predictions( dataset.test, predictor=predictor_deserialized, num_samples=100) agg_metrics, item_metrics = Evaluator()(ts_it, forecast_it, num_series=len(dataset.test)) pprint.pprint(agg_metrics)