def test_forecasts(method_name): if method_name == "mlp": # https://stackoverflow.com/questions/56254321/error-in-ifncol-matrix-rep-argument-is-of-length-zero # https://cran.r-project.org/web/packages/neuralnet/index.html # published before the bug fix: https://github.com/bips-hb/neuralnet/pull/21 # The issue is still open on nnfor package: https://github.com/trnnick/nnfor/issues/8 # TODO: look for a workaround. pytest.xfail( "MLP currently does not work because " "the `neuralnet` package is not yet updated with a known bug fix in ` bips-hb/neuralnet`" ) dataset = datasets.get_dataset("constant") (train_dataset, test_dataset, metadata) = ( dataset.train, dataset.test, dataset.metadata, ) freq = metadata.freq prediction_length = metadata.prediction_length params = dict( freq=freq, prediction_length=prediction_length, method_name=method_name ) predictor = RForecastPredictor(**params) predictions = list(predictor.predict(train_dataset)) forecast_type = ( QuantileForecast if method_name in QUANTILE_FORECAST_METHODS else SampleForecast ) assert all( isinstance(prediction, forecast_type) for prediction in predictions ) assert all(prediction.freq == freq for prediction in predictions) assert all( prediction.prediction_length == prediction_length for prediction in predictions ) assert all( prediction.start_date == forecast_start(data) for data, prediction in zip(train_dataset, predictions) ) evaluator = Evaluator() agg_metrics, item_metrics = backtest_metrics( test_dataset=test_dataset, predictor=predictor, evaluator=evaluator, ) assert agg_metrics["mean_wQuantileLoss"] < TOLERANCE assert agg_metrics["NRMSE"] < TOLERANCE assert agg_metrics["RMSE"] < TOLERANCE
def gluonts_r(dataset,freq, pred_length,period=None,trunc_length=None, method_name = "ets"): params = dict(freq=freq, prediction_length=pred_length, method_name = method_name) predictor = RForecastPredictor(**params) fcst = predictor.predict(dataset) fcstlist = [] for i in fcst: fcstlist.append(i) return fcstlist
def r_forecast_package(): import ast from gluonts.model.r_forecast import RForecastPredictor dataset = get_dataset("exchange_rate", regenerate=False) prediction_length = dataset.metadata.prediction_length freq = dataset.metadata.freq cardinality = ast.literal_eval(dataset.metadata.feat_static_cat[0].cardinality) train_ds = dataset.train test_ds = dataset.test #-------------------- # ETS. ets_predictor = RForecastPredictor( freq=freq, prediction_length=prediction_length, method_name="ets", ) ets_forecast = list(ets_predictor.predict(train_ds)) # ARIMA. arima_predictor = RForecastPredictor( freq=freq, prediction_length=prediction_length, method_name="arima", ) arima_forecast = list(arima_predictor.predict(train_ds))
def train(args): freq = args.freq.replace('"', '') prediction_length = args.prediction_length context_length = args.context_length use_feat_dynamic_real = args.use_feat_dynamic_real use_past_feat_dynamic_real = args.use_past_feat_dynamic_real use_feat_static_cat = args.use_feat_static_cat use_log1p = args.use_log1p print('freq:', freq) print('prediction_length:', prediction_length) print('context_length:', context_length) print('use_feat_dynamic_real:', use_feat_dynamic_real) print('use_past_feat_dynamic_real:', use_past_feat_dynamic_real) print('use_feat_static_cat:', use_feat_static_cat) print('use_log1p:', use_log1p) batch_size = args.batch_size print('batch_size:', batch_size) train = load_json(os.path.join(args.train, 'train_'+freq+'.json')) test = load_json(os.path.join(args.test, 'test_'+freq+'.json')) num_timeseries = len(train) print('num_timeseries:', num_timeseries) train_ds = ListDataset(parse_data(train, use_log1p=use_log1p), freq=freq) test_ds = ListDataset(parse_data(test, use_log1p=use_log1p), freq=freq) predictor = None trainer= Trainer(ctx="cpu", epochs=args.epochs, num_batches_per_epoch=args.num_batches_per_epoch, learning_rate=args.learning_rate, learning_rate_decay_factor=args.learning_rate_decay_factor, patience=args.patience, minimum_learning_rate=args.minimum_learning_rate, clip_gradient=args.clip_gradient, weight_decay=args.weight_decay, init=args.init.replace('"', ''), hybridize=args.hybridize) print('trainer:', trainer) cardinality = None if args.cardinality != '': cardinality = args.cardinality.replace('"', '').replace(' ', '').replace('[', '').replace(']', '').split(',') for i in range(len(cardinality)): cardinality[i] = int(cardinality[i]) print('cardinality:', cardinality) embedding_dimension = [min(50, (cat+1)//2) for cat in cardinality] if cardinality is not None else None print('embedding_dimension:', embedding_dimension) algo_name = args.algo_name.replace('"', '') print('algo_name:', algo_name) if algo_name == 'CanonicalRNN': estimator = CanonicalRNNEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, num_layers=5, num_cells=50, cell_type='lstm', num_parallel_samples=100, cardinality=cardinality, embedding_dimension=10, ) elif algo_name == 'DeepFactor': estimator = DeepFactorEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, cardinality=cardinality, embedding_dimension=10, ) elif algo_name == 'DeepAR': estimator = DeepAREstimator( freq = freq, # – Frequency of the data to train on and predict prediction_length = prediction_length, # – Length of the prediction horizon trainer = trainer, # – Trainer object to be used (default: Trainer()) context_length = context_length, # – Number of steps to unroll the RNN for before computing predictions (default: None, in which case context_length = prediction_length) num_layers = 2, # – Number of RNN layers (default: 2) num_cells = 40, # – Number of RNN cells for each layer (default: 40) cell_type = 'lstm', # – Type of recurrent cells to use (available: ‘lstm’ or ‘gru’; default: ‘lstm’) dropoutcell_type = 'ZoneoutCell', # – Type of dropout cells to use (available: ‘ZoneoutCell’, ‘RNNZoneoutCell’, ‘VariationalDropoutCell’ or ‘VariationalZoneoutCell’; default: ‘ZoneoutCell’) dropout_rate = 0.1, # – Dropout regularization parameter (default: 0.1) use_feat_dynamic_real = use_feat_dynamic_real, # – Whether to use the feat_dynamic_real field from the data (default: False) use_feat_static_cat = use_feat_static_cat, # – Whether to use the feat_static_cat field from the data (default: False) use_feat_static_real = False, # – Whether to use the feat_static_real field from the data (default: False) cardinality = cardinality, # – Number of values of each categorical feature. This must be set if use_feat_static_cat == True (default: None) embedding_dimension = embedding_dimension, # – Dimension of the embeddings for categorical features (default: [min(50, (cat+1)//2) for cat in cardinality]) # distr_output = StudentTOutput(), # – Distribution to use to evaluate observations and sample predictions (default: StudentTOutput()) # scaling = True, # – Whether to automatically scale the target values (default: true) # lags_seq = None, # – Indices of the lagged target values to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq) # time_features = None, # – Time features to use as inputs of the RNN (default: None, in which case these are automatically determined based on freq) # num_parallel_samples = 100, # – Number of evaluation samples per time series to increase parallelism during inference. This is a model optimization that does not affect the accuracy (default: 100) # imputation_method = None, # – One of the methods from ImputationStrategy # train_sampler = None, # – Controls the sampling of windows during training. # validation_sampler = None, # – Controls the sampling of windows during validation. # alpha = None, # – The scaling coefficient of the activation regularization # beta = None, # – The scaling coefficient of the temporal activation regularization batch_size = batch_size, # – The size of the batches to be used training and prediction. # minimum_scale = None, # – The minimum scale that is returned by the MeanScaler # default_scale = None, # – Default scale that is applied if the context length window is completely unobserved. If not set, the scale in this case will be the mean scale in the batch. # impute_missing_values = None, # – Whether to impute the missing values during training by using the current model parameters. Recommended if the dataset contains many missing values. However, this is a lot slower than the default mode. # num_imputation_samples = None, # – How many samples to use to impute values when impute_missing_values=True ) elif algo_name == 'DeepState': estimator = DeepStateEstimator( freq=freq, prediction_length=prediction_length, trainer=trainer, batch_size=batch_size, use_feat_dynamic_real=use_feat_dynamic_real, use_feat_static_cat=use_feat_static_cat, cardinality=cardinality, ) elif algo_name == 'DeepVAR': estimator = DeepVAREstimator( # use multi freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, target_dim=96, ) elif algo_name == 'GaussianProcess': # # TODO # estimator = GaussianProcessEstimator( # freq=freq, # prediction_length=prediction_length, # context_length=context_length, # trainer=trainer, # batch_size=batch_size, # cardinality=num_timeseries, # ) pass elif algo_name == 'GPVAR': estimator = GPVAREstimator( # use multi freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, target_dim=96, ) elif algo_name == 'LSTNet': estimator = LSTNetEstimator( # use multi freq=freq, prediction_length=prediction_length, context_length=context_length, num_series=96, skip_size=4, ar_window=4, channels=72, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'NBEATS': estimator = NBEATSEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'DeepRenewalProcess': estimator = DeepRenewalProcessEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, num_cells=40, num_layers=2, ) elif algo_name == 'Tree': estimator = TreePredictor( freq = freq, prediction_length = prediction_length, context_length = context_length, n_ignore_last = 0, lead_time = 0, max_n_datapts = 1000000, min_bin_size = 100, # Used only for "QRX" method. use_feat_static_real = False, use_feat_dynamic_cat = False, use_feat_dynamic_real = use_feat_dynamic_real, cardinality = cardinality, one_hot_encode = False, model_params = {'eta': 0.1, 'max_depth': 6, 'silent': 0, 'nthread': -1, 'n_jobs': -1, 'gamma': 1, 'subsample': 0.9, 'min_child_weight': 1, 'colsample_bytree': 0.9, 'lambda': 1, 'booster': 'gbtree'}, max_workers = 4, # default: None method = "QRX", # "QRX", "QuantileRegression", "QRF" quantiles=None, # Used only for "QuantileRegression" method. model=None, seed=None, ) elif algo_name == 'SelfAttention': # # TODO # estimator = SelfAttentionEstimator( # freq=freq, # prediction_length=prediction_length, # context_length=context_length, # trainer=trainer, # batch_size=batch_size, # ) pass elif algo_name == 'MQCNN': estimator = MQCNNEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, use_past_feat_dynamic_real=use_past_feat_dynamic_real, use_feat_dynamic_real=use_feat_dynamic_real, use_feat_static_cat=use_feat_static_cat, cardinality=cardinality, embedding_dimension=embedding_dimension, add_time_feature=True, add_age_feature=False, enable_encoder_dynamic_feature=True, enable_decoder_dynamic_feature=True, seed=None, decoder_mlp_dim_seq=None, channels_seq=None, dilation_seq=None, kernel_size_seq=None, use_residual=True, quantiles=None, distr_output=None, scaling=None, scaling_decoder_dynamic_feature=False, num_forking=None, max_ts_len=None, ) elif algo_name == 'MQRNN': estimator = MQRNNEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'Seq2Seq': # # TODO # estimator = Seq2SeqEstimator( # freq=freq, # prediction_length=prediction_length, # context_length=context_length, # trainer=trainer, # cardinality=cardinality, # embedding_dimension=4, # encoder=Seq2SeqEncoder(), # decoder_mlp_layer=[4], # decoder_mlp_static_dim=4 # ) pass elif algo_name == 'SimpleFeedForward': estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[40, 40], prediction_length=prediction_length, context_length=context_length, freq=freq, trainer=trainer, batch_size=batch_size, ) elif algo_name == 'TemporalFusionTransformer': estimator = TemporalFusionTransformerEstimator( prediction_length=prediction_length, context_length=context_length, freq=freq, trainer=trainer, batch_size=batch_size, hidden_dim = 32, variable_dim = None, num_heads = 4, num_outputs = 3, num_instance_per_series = 100, dropout_rate = 0.1, # time_features = [], # static_cardinalities = {}, # dynamic_cardinalities = {}, # static_feature_dims = {}, # dynamic_feature_dims = {}, # past_dynamic_features = [] ) elif algo_name == 'DeepTPP': # # TODO # estimator = DeepTPPEstimator( # prediction_interval_length=prediction_length, # context_interval_length=context_length, # freq=freq, # trainer=trainer, # batch_size=batch_size, # num_marks=len(cardinality) if cardinality is not None else 0, # ) pass elif algo_name == 'Transformer': estimator = TransformerEstimator( freq=freq, prediction_length=prediction_length, trainer=trainer, batch_size=batch_size, cardinality=cardinality, ) elif algo_name == 'WaveNet': estimator = WaveNetEstimator( freq=freq, prediction_length=prediction_length, trainer=trainer, batch_size=batch_size, cardinality=cardinality, ) elif algo_name == 'Naive2': # TODO Multiplicative seasonality is not appropriate for zero and negative values predictor = Naive2Predictor(freq=freq, prediction_length=prediction_length, season_length=context_length) elif algo_name == 'NPTS': predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=context_length) elif algo_name == 'Prophet': def configure_model(model): model.add_seasonality( name='weekly', period=7, fourier_order=3, prior_scale=0.1 ) return model predictor = ProphetPredictor(freq=freq, prediction_length=prediction_length, init_model=configure_model) elif algo_name == 'ARIMA': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='arima', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'ETS': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='ets', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'TBATS': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='tbats', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'CROSTON': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='croston', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'MLP': predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='mlp', period=context_length, trunc_length=len(train[0]['target'])) elif algo_name == 'SeasonalNaive': predictor = SeasonalNaivePredictor(freq=freq, prediction_length=prediction_length) else: print('[ERROR]:', algo_name, 'not supported') return if predictor is None: try: predictor = estimator.train(train_ds, test_ds) except Exception as e: print(e) try: grouper_train = MultivariateGrouper(max_target_dim=num_timeseries) train_ds_multi = grouper_train(train_ds) test_ds_multi = grouper_train(test_ds) predictor = estimator.train(train_ds_multi, test_ds_multi) except Exception as e: print(e) forecast_it, ts_it = make_evaluation_predictions( dataset=test_ds, # test dataset predictor=predictor, # predictor num_samples=100, # number of sample paths we want for evaluation ) forecasts = list(forecast_it) tss = list(ts_it) # print(len(forecasts), len(tss)) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds)) print(json.dumps(agg_metrics, indent=4)) model_dir = os.path.join(args.model_dir, algo_name) if not os.path.exists(model_dir): os.makedirs(model_dir) predictor.serialize(Path(model_dir))
def test_r_predictor_serialization(): predictor = RForecastPredictor(freq="1D", prediction_length=3) assert predictor == serde.decode(serde.encode(predictor))
# lectura del dataset dataset = get_dataset(args.datasource, regenerate=False) # metadata del dataset prediction_length = dataset.metadata.prediction_length freq = dataset.metadata.freq cardinality = ast.literal_eval( dataset.metadata.feat_static_cat[0].cardinality) # sacar conjuntos de entrenamiento train_ds = dataset.train test_ds = dataset.test # predictor ets ets_predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='ets') ets_forecast = list(ets_predictor.predict(train_ds)) # arima predictor arima_predictor = RForecastPredictor(freq=freq, prediction_length=prediction_length, method_name='arima') arima_forecast = list(arima_predictor.predict(train_ds)) # predictor croston croston_predictor = CrostonForecastPredictor( freq=freq, prediction_length=prediction_length, variant='original', no_of_params=2)
paths=target_path, start=start, past_length=past , pred_length=pred, slice=slice , timestep=timestep, freq=freq ) if not os.path.exists(forecast_result_saved_path): sample_forecasts = [] for target_name in target_path: target = target_path[target_name] with open(target, 'rb') as fp: target_ds = pickle.load(fp) print('导入原始数据成功~~~') assert target_ds.metadata['dim'] == 1, 'target 序列的维度都应该为1' target_data = target_ds prophet_predictor = RForecastPredictor(freq=freq, prediction_length=pred ,method_name='arima' ,trunc_length=past) generators = prophet_predictor.predict(target_data.train) forecast_samples = list(generators) sorted_samples = np.concatenate([np.expand_dims(sample._sorted_samples, 0) for sample in forecast_samples], axis=0) sorted_samples = np.expand_dims(sorted_samples, axis=0) sample_forecasts.append(sorted_samples) sample_forecasts = np.concatenate(sample_forecasts, axis=0) print('把预测结果保存在-->', forecast_result_saved_path) with open(forecast_result_saved_path , 'wb') as fp: pickle.dump(sample_forecasts , fp)