def GlounTS(): #from pts.dataset import ListDataset #from pts.model.deepar import DeepAREstimator #from pts import Trainer #from pts.dataset import to_pandas # gluonts crash in my sistem. from gluonts.dataset.common import ListDataset from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer training_data = ListDataset([{ "start": df.index[0], "target": df.value[:"2015-03-08 23:22:53"] }], freq="5min") #estimator = DeepAREstimator(freq="5min",input_size = 43, prediction_length=forecast_size, trainer=Trainer(epochs=20)) estimator = DeepAREstimator(freq="5min", prediction_length=forecast_size, trainer=Trainer(epochs=20)) predictor = estimator.train(training_data=training_data) test_data = ListDataset([{ "start": df.index[0], "target": df.value[:"2015-03-08 23:22:53"] }], freq="5min") GluonTS_prediction = next(predictor.predict(test_data)) GluonTS_mean_yhat = GluonTS_prediction.mean GluonTS_median_yhat = GluonTS_prediction.median return GluonTS_mean_yhat.tolist(), GluonTS_median_yhat.tolist( ), GluonTS_prediction
def run_model(data_train, data_meta, save_path, num_epochs=50, lr=1e-3, batch_size=64, scaling=False, context_length=3, num_layers=3, embedding_dimension=16, context='gpu'): estimator = DeepAREstimator(freq=data_meta['freq'], prediction_length=82, scaling=scaling, context_length=context_length, num_layers=num_layers, embedding_dimension=embedding_dimension, trainer=Trainer(batch_size=batch_size, epochs=num_epochs, learning_rate=lr, ctx=context, hybridize=False)) predictor = estimator.train(data_train) predictor.serialize(Path(save_path)) return predictor
def train_predictor(df_dict, end_train_date, regions_list, target_col, feat_dynamic_cols=None): estimator = DeepAREstimator(freq=data_freq, prediction_length=nb_hours_pred, trainer=Trainer(epochs=max_epochs, learning_rate = learning_rate, learning_rate_decay_factor=0.01, patience=patience), use_feat_dynamic_real=feat_dynamic_cols is not None) if feat_dynamic_cols is not None: training_data = ListDataset( [{"item_id": region, "start": df_dict[region].index[0], "target": df_dict[region][target_col][:end_train_date], "feat_dynamic_real": [df_dict[region][feat_dynamic_col][:end_train_date] for feat_dynamic_col in feat_dynamic_cols] } for region in regions_list], freq = data_freq ) else: training_data = ListDataset( [{"item_id": region, "start": df_dict[region].index[0], "target": df_dict[region][target_col][:end_train_date] } for region in regions_list], freq = data_freq ) predictor = estimator.train(training_data=training_data) return predictor
def test_dynamic_integration( train_length: int, test_length: int, prediction_length: int, target_start: str, rolling_start: str, num_dynamic_feat: int, ): """ Trains an estimator on a rolled dataset with dynamic features. Tests https://github.com/awslabs/gluon-ts/issues/1390 """ train_ds = create_dynamic_dataset(target_start, train_length, num_dynamic_feat) rolled_ds = generate_rolling_dataset( dataset=create_dynamic_dataset(target_start, test_length, num_dynamic_feat), strategy=StepStrategy(prediction_length=prediction_length), start_time=pd.Timestamp(rolling_start), ) estimator = DeepAREstimator( freq="D", prediction_length=prediction_length, context_length=2 * prediction_length, use_feat_dynamic_real=True, trainer=Trainer(epochs=1), ) predictor = estimator.train(training_data=train_ds) forecast_it, ts_it = make_evaluation_predictions(rolled_ds, predictor=predictor, num_samples=100) training_agg_metrics, _ = Evaluator(num_workers=0)(ts_it, forecast_it) # it should have failed by this point if the dynamic features were wrong assert training_agg_metrics
def test_listing_1(): """ Test GluonTS paper examples from arxiv paper: https://arxiv.org/abs/1906.05264 Listing 1 """ from gluonts.dataset.repository.datasets import get_dataset from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer from gluonts.evaluation import Evaluator from gluonts.evaluation.backtest import backtest_metrics # We use electricity in the paper but that would take too long to run in # the unit test dataset_info, train_ds, test_ds = constant_dataset() meta = dataset_info.metadata estimator = DeepAREstimator( freq=meta.time_granularity, prediction_length=1, trainer=Trainer(epochs=1, batch_size=32), ) predictor = estimator.train(train_ds) evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9)) agg_metrics, item_metrics = backtest_metrics( train_dataset=train_ds, test_dataset=test_ds, forecaster=predictor, evaluator=evaluator, )
def train(epochs, prediction_length, num_layers, dropout_rate): #create train dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] + "/train.csv", header=0, index_col=0) training_data = ListDataset([{ "start": df.index[0], "target": df.value[:] }], freq="5min") #define DeepAR estimator deepar_estimator = DeepAREstimator(freq="5min", prediction_length=prediction_length, dropout_rate=dropout_rate, num_layers=num_layers, trainer=Trainer(epochs=epochs)) #train the model deepar_predictor = deepar_estimator.train(training_data=training_data) #create test dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] + "/test.csv", header=0, index_col=0) test_data = ListDataset([{ "start": df.index[0], "target": df.value[:] }], freq="5min") #evaluate trained model on test data forecast_it, ts_it = make_evaluation_predictions(test_data, deepar_predictor, num_samples=100) forecasts = list(forecast_it) tss = list(ts_it) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data)) print("MSE:", agg_metrics["MSE"]) #save the model deepar_predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR'])) return deepar_predictor
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: """ Fits DeepAR model using training data from set_training_data and hyperparameters Keyword Arguments: timeout {float} -- timeout, considered (default: {None}) iterations {int} -- iterations, considered (default: {None}) Returns: CallResult[None] """ if iterations is None: iterations = self.hyperparams["epochs"] has_finished = True else: has_finished = False estimator = DeepAREstimator( freq=self._freq, prediction_length=self.hyperparams['prediction_length'], context_length=self.hyperparams['context_length'], use_feat_static_cat=self._deepar_dataset.has_cat_cols() or self._deepar_dataset.has_group_cols(), use_feat_dynamic_real=self._deepar_dataset.has_real_cols(), cardinality=self._deepar_dataset.get_cardinality(), distr_output=self._deepar_dataset.get_distribution_type(), dropout_rate=self.hyperparams['dropout_rate'], trainer=Trainer( epochs=iterations, learning_rate=self.hyperparams['learning_rate'], batch_size=self.hyperparams['training_batch_size'], num_batches_per_epoch=self.hyperparams['steps_per_epoch'])) logger.info(f"Fitting for {iterations} iterations") start_time = time.time() predictor = estimator.train(self._train_data) predictor.batch_size = self.hyperparams['inference_batch_size'] self._is_fit = True logger.info( f"Fit for {iterations} epochs, took {time.time() - start_time}s") if not os.path.isdir(self.hyperparams['weights_dir']): os.mkdir(self.hyperparams['weights_dir']) predictor.serialize(Path(self.hyperparams['weights_dir'])) return CallResult(None, has_finished=has_finished)
def model_eval(estimator=None, TD=None, cardinalities=None, istrain=True, ismetric=True, isplot=True, pars=None): from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer p = pars if estimator is None: estimator = DeepAREstimator( prediction_length=p.get("single_pred_length", 28), freq="D", distr_output=p.get("distr_output", None), use_feat_static_cat=True, use_feat_dynamic_real=True, cardinality=p.get("cardinality", None), trainer=Trainer( learning_rate=p.get("lr", 1e-4), # 1e-4, #1e-3, epochs=p.get("epoch", None), num_batches_per_epoch=p.get("num_batches_per_epoch", 10), batch_size=p.get("batch_size", 8), )) if istrain: estimator = estimator.train(TD.train) #### Evaluate ######################################################################## from gluonts.evaluation.backtest import make_evaluation_predictions forecast_it, ts_it = make_evaluation_predictions(dataset=TD.test, predictor=estimator, num_samples=p.get( "num_samples", 5)) forecasts, tss = list(forecast_it), list(ts_it) if isplot: forecast_graph(forecasts, tss, p.get("ii_series", 0)) ####### Metrics ###################################################################### agg_metrics, item_metrics = None, None if ismetric: agg_metrics, item_metrics = forecast_metrics(tss, forecasts, TD, quantiles=[0.1, 0.5, 0.9], show=True, dir_save=None) return estimator, forecasts, tss, agg_metrics, item_metrics
def easy_train(): import pandas as pd df = pd.read_csv("optiver_hacktheburgh/sp.csv", header=0, index_col=0, usecols=[0, 2], skiprows=lambda x: x % 5 != 0) # df[:100].plot(linewidth=2) print("Showing") # plt.show() from gluonts.dataset.common import ListDataset training_data = ListDataset([{ "start": df.index[0], "target": df.values.flatten() }], freq="1s") #from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer estimator = DeepAREstimator(freq="1min", prediction_length=100, trainer=Trainer(epochs=20)) predictor = estimator.train(training_data=training_data) test_data = ListDataset([{ "start": df.index[0], "target": df.values.flatten()[:1000] }], freq="10s") full_test_data = ListDataset([{ "start": df.index[0], "target": df.values.flatten() }], freq="10s") means = [] for i, (test_entry, forecast) in enumerate( zip(full_test_data, predictor.predict(test_data))): # if i > 0: # break print(forecast.dim()) plt.plot(test_entry["target"]) #forecast.plot(color='g', prediction_intervals=[], output_file="test.png") means.extend(list(forecast.mean)) print(forecast.mean) l = len(test_entry["target"]) plt.axhline(y=means[0], xmin=0, xmax=l, linewidth=2, color='r') plt.axvline(x=5000, color='b') plt.grid(which='both') plt.show()
def run_model(data_train, data_meta, save_path, num_epochs=50, lr=1e-3, batch_size=64): estimator = DeepAREstimator( freq=data_meta['freq'], prediction_length=data_meta['prediction_length'], trainer=Trainer(batch_size=batch_size, epochs=num_epochs, learning_rate=lr, ctx='cpu', hybridize=False)) predictor = estimator.train(data_train) predictor.serialize(Path(save_path)) return predictor
def deepar(data="m4_quarterly", seed=42, epochs=100, batches=50): dataset = get_dataset(data, regenerate=False) mx.random.seed(seed) np.random.seed(seed) trainer = Trainer( ctx=mx.cpu(0), epochs=epochs, num_batches_per_epoch=batches, learning_rate=1e-3, ) cardinality = int(dataset.metadata.feat_static_cat[0].cardinality) estimator = DeepAREstimator( trainer=trainer, cardinality=[cardinality], context_length=dataset.metadata.prediction_length, prediction_length=dataset.metadata.prediction_length, freq=dataset.metadata.freq, use_feat_static_cat=True ) # predictor = estimator.train(training_data=dataset.train, # validation_data=dataset.test) predictor = estimator.train(training_data=dataset.train) forecast_it, ts_it = make_evaluation_predictions( dataset.test, predictor=predictor, num_samples=100 ) agg_metrics, item_metrics = Evaluator()( ts_it, forecast_it, num_series=len(dataset.test) ) metrics = ["MASE", "sMAPE", "MSIS", "wQuantileLoss[0.5]", "wQuantileLoss[0.9]"] output = {key: round(value, 8) for key, value in agg_metrics.items() if key in metrics} output["seed"] = seed output["epochs"] = epochs output["num_batches"] = batches df = pd.DataFrame([output]) return df
def deepar_test(): url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv" df = pd.read_csv(url, header=0, index_col=0) data = ListDataset( [{ "start": df.index[0], "target": df.value[:"2015-04-05 00:00:00"] }], freq="5min" ) trainer = Trainer(epochs=10) estimator = DeepAREstimator(freq="5min", prediction_length=12, trainer=trainer) predictor = estimator.train(training_data=data) prediction = next(predictor.predict(data)) print(prediction.mean) prediction.plot(output_file="./graph.png")
def train(self, **kwargs): epochs = kwargs.get("epochs", 10) # Adjust class freq. self.freq = pd.infer_freq(self.train_df.index) if self.freq == "MS": self.freq = "M" estimator = DeepAREstimator( freq=self.freq, prediction_length=self.forecast_len, trainer=Trainer(epochs=epochs, batch_size=64, ctx="gpu" if self.GPU else "cpu"), ) self.model = estimator.train( training_data=self.format_input(self.train_df, self.freq))
def train(dataset: ListDataset, frequency: str, horizon: int, model_name: str, num_layers: int, num_cells: int, epochs: int, patience: int, weight_decay: float, dropout_rate: float, batch_size: int, snapshot_dir: str, overwrite: bool): """ Train a model. :param dataset: :param model_name: :param horizon: :param frequency: :param snapshot_dir: :param epochs: :param patience: :param weight_decay: :param batch_size: :param dropout_rate: :param num_layers: :param num_cells: :param overwrite: :return: """ model_dir = Path(snapshot_dir) if not overwrite and os.path.isdir(snapshot_dir): return Predictor.deserialize(model_dir) trainer = Trainer(epochs=epochs, patience=patience, weight_decay=weight_decay, batch_size=batch_size) if model_name == 'deepar': estimator = DeepAREstimator(freq=frequency, scaling=False, dropout_rate=dropout_rate, num_layers=num_layers, num_cells=num_cells, prediction_length=horizon, trainer=trainer) else: raise Exception(f'Unknown model {model_name}') predictor = estimator.train(training_data=dataset) model_dir.mkdir(parents=True, exist_ok=overwrite) predictor.serialize(model_dir) return predictor
def train_deepar(train_ds, context_length=10, prediction_length=20, period=4320, epochs=2): freq = "{}H".format(period / 3600) estimator = DeepAREstimator(prediction_length=prediction_length, context_length=context_length, freq=freq, num_cells=50, trainer=Trainer(ctx="gpu", epochs=epochs, learning_rate=1e-3, hybridize=False, num_batches_per_epoch=100, batch_size=64), num_parallel_samples=500) predictor = estimator.train(train_ds) return predictor
def train_predictor(region_df_dict, end_train_date, regions_list, max_epochs, learning_rate, target_col, feat_dynamic_cols=None): estimator = DeepAREstimator(freq=md.FREQ, prediction_length=md.NB_HOURS_PRED, trainer=Trainer(epochs=max_epochs, learning_rate=learning_rate, learning_rate_decay_factor=md.LR_DECAY_FACTOR), use_feat_dynamic_real=feat_dynamic_cols is not None) if feat_dynamic_cols is not None: training_data = ListDataset( [{"item_id": region, "start": region_df_dict[region].index[0], "target": region_df_dict[region][target_col][:end_train_date], "feat_dynamic_real": [region_df_dict[region][feat_dynamic_col][:end_train_date] for feat_dynamic_col in feat_dynamic_cols] } for region in regions_list], freq=md.FREQ ) else: training_data = ListDataset( [{"item_id": region, "start": region_df_dict[region].index[0], "target": region_df_dict[region][target_col][:end_train_date] } for region in regions_list], freq=md.FREQ ) model_path = predictor_path(region_df_dict, regions_list, max_epochs, learning_rate, feat_dynamic_cols) model_dir, model_name = os.path.split(model_path) logging.info("Training deepar model {}".format(model_name)) logging.getLogger().setLevel(logging.WARNING) predictor = estimator.train(training_data=training_data) logging.getLogger().setLevel(logging.INFO) logging.info("Saving model with {} epochs and learning rate of {}".format(max_epochs, learning_rate)) with open(model_path, "wb") as file: pickle.dump(predictor, file) return predictor
def test_general_functionality() -> None: ds_info, train_ds, test_ds = constant_dataset() freq = ds_info.metadata.freq prediction_length = ds_info.prediction_length trainer = Trainer(epochs=3, num_batches_per_epoch=5) estimator = DeepAREstimator(prediction_length=prediction_length, freq=freq, trainer=trainer) predictor = estimator.train(training_data=train_ds) agg_metrics, item_metrics = backtest_metrics( test_dataset=test_ds, predictor=predictor, evaluator=Evaluator(calculate_owa=False), ) # just some sanity check assert (agg_metrics is not None and item_metrics is not None ), "Metrics should not be None if everything went smooth."
def init_model(): epochs = None context = 'cpu' if args.epochs is not None: epochs = args.epochs if args.gpu: context = 'gpu' predictor = None if args.train: my_trainer = Trainer( ctx=context ) # TODO: Find a way to make it such that we do not set epoch when there is no need to estimator = DeepAREstimator(freq="5min", prediction_length=args.prediction, trainer=my_trainer) predictor = estimator.train(training_data=training_data) predictor.serialize(Path("models/")) else: # predictor = Predictor.deserialize(Path("models/")) predictor = RepresentableBlockPredictor.deserialize(Path("models/")) predictor.ctx = mx.Context('cpu') return predictor
def train(args): # Parse arguments epochs = args.epochs pred_length = args.pred_length num_layers = args.num_layers num_cells = args.num_cells dropout_rate = args.dropout_rate batch_size = args.batch_size lr = args.lr model_dir = args.model_dir data_dir = args.data_dir num_gpus = args.num_gpus output_dir = args.output_dir device = "gpu" if num_gpus > 0 else "cpu" FREQ = 'D' # Get training data target_df = pd.read_csv(os.path.join(data_dir, 'target_train.csv')) target_df.set_index(target_df.columns[0], inplace=True) target = target_df.values num_steps, num_series = target_df.shape start_dt = target_df.index[0] custom_ds_metadata = { 'num_series': num_series, 'num_steps': num_steps, 'prediction_length': pred_length, 'freq': FREQ, 'start': [start_dt for _ in range(num_series)] } # Prepare GlounTS Dataset train_lst = [] for i in range(0, num_series): target_vec = target[:-pred_length, i] dic = {FieldName.TARGET: target_vec, FieldName.START: start_dt} train_lst.append(dic) test_lst = [] for i in range(0, num_series): target_vec = target[:, i] dic = {FieldName.TARGET: target_vec, FieldName.START: start_dt} test_lst.append(dic) train_ds = ListDataset(train_lst, freq=FREQ) test_ds = ListDataset(test_lst, freq=FREQ) train_entry = next(iter(train_ds)) train_entry.keys() # Define Estimator trainer = Trainer(ctx=device, epochs=epochs, learning_rate=lr, batch_size=batch_size) deepar_estimator = DeepAREstimator(freq=FREQ, prediction_length=pred_length, num_cells=num_cells, dropout_rate=dropout_rate, num_layers=num_layers, distr_output=StudentTOutput(), trainer=trainer) # Train the model deepar_predictor = deepar_estimator.train(train_ds) # Evaluate trained model on test data forecast_it, ts_it = make_evaluation_predictions(test_ds, deepar_predictor, num_samples=100) forecasts = list(forecast_it) tss = list(ts_it) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds)) metrics = [ 'RMSE', 'MAPE', 'wQuantileLoss[0.1]', 'wQuantileLoss[0.5]', 'wQuantileLoss[0.9]', 'mean_wQuantileLoss' ] metrics_dic = dict( (key, value) for key, value in agg_metrics.items() if key in metrics) print(json.dumps(metrics_dic, indent=2)) # Save the model deepar_predictor.serialize(pathlib.Path(model_dir)) return deepar_predictor
def fit(self, df, future_regressor=[]): """Train algorithm given data supplied. Args: df (pandas.DataFrame): Datetime Indexed """ df = self.basic_profile(df) try: from mxnet.random import seed as mxnet_seed mxnet_seed(self.random_seed) except Exception: pass gluon_train = df.transpose() self.train_index = gluon_train.index gluon_freq = str(self.frequency).split('-')[0] if gluon_freq in ["MS", "1MS"]: gluon_freq = "M" if int(self.verbose) > 1: print(f"Gluon Frequency is {gluon_freq}") if str(self.context_length).replace('.', '').isdigit(): self.gluon_context_length = int(float(self.context_length)) elif 'forecastlength' in str(self.context_length).lower(): len_int = int([x for x in str(self.context_length) if x.isdigit()][0]) self.gluon_context_length = int(len_int * self.forecast_length) else: self.gluon_context_length = 2 * self.forecast_length self.context_length = '2ForecastLength' ts_metadata = { 'num_series': len(gluon_train.index), 'freq': gluon_freq, 'gluon_start': [gluon_train.columns[0] for _ in range(len(gluon_train.index))], 'context_length': self.gluon_context_length, 'forecast_length': self.forecast_length, } self.test_ds = ListDataset( [{ FieldName.TARGET: target, FieldName.START: start } for (target, start) in zip(gluon_train.values, ts_metadata['gluon_start']) ], freq=ts_metadata['freq'], ) if self.gluon_model == 'DeepAR': from gluonts.model.deepar import DeepAREstimator estimator = DeepAREstimator( freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length'], trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate), ) elif self.gluon_model == 'NPTS': from gluonts.model.npts import NPTSEstimator estimator = NPTSEstimator( freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length'], ) elif self.gluon_model == 'MQCNN': from gluonts.model.seq2seq import MQCNNEstimator estimator = MQCNNEstimator( freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length'], trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate), ) elif self.gluon_model == 'SFF': from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator estimator = SimpleFeedForwardEstimator( prediction_length=ts_metadata['forecast_length'], context_length=ts_metadata['context_length'], freq=ts_metadata['freq'], trainer=Trainer( epochs=self.epochs, learning_rate=self.learning_rate, hybridize=False, num_batches_per_epoch=100, ), ) elif self.gluon_model == 'Transformer': from gluonts.model.transformer import TransformerEstimator estimator = TransformerEstimator( prediction_length=ts_metadata['forecast_length'], context_length=ts_metadata['context_length'], freq=ts_metadata['freq'], trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate), ) elif self.gluon_model == 'DeepState': from gluonts.model.deepstate import DeepStateEstimator estimator = DeepStateEstimator( prediction_length=ts_metadata['forecast_length'], past_length=ts_metadata['context_length'], freq=ts_metadata['freq'], use_feat_static_cat=False, cardinality=[1], trainer=Trainer(ctx='cpu', epochs=self.epochs, learning_rate=self.learning_rate), ) elif self.gluon_model == 'DeepFactor': from gluonts.model.deep_factor import DeepFactorEstimator estimator = DeepFactorEstimator( freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length'], trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate), ) elif self.gluon_model == 'WaveNet': # Usually needs more epochs/training iterations than other models do from gluonts.model.wavenet import WaveNetEstimator estimator = WaveNetEstimator( freq=ts_metadata['freq'], prediction_length=ts_metadata['forecast_length'], trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate), ) else: raise ValueError("'gluon_model' not recognized.") self.GluonPredictor = estimator.train(self.test_ds) self.ts_metadata = ts_metadata self.fit_runtime = datetime.datetime.now() - self.startTime return self
FieldName.FEAT_STATIC_CAT: fsc } for (target, fsc) in zip(df_train[0:number_of_products], ts_code[0:number_of_products].reshape(-1, 1))], freq=freq) # reshape de la data de test para solo ocupar number_of_products test_ds = ListDataset([{ FieldName.TARGET: target, FieldName.START: start_test, FieldName.FEAT_STATIC_CAT: fsc } for (target, fsc) in zip(df_test[0:number_of_products], ts_code[0:number_of_products].reshape(-1, 1))], freq=freq) # entrenar el predictor predictor = estimator.train(training_data=train_ds) # evaluar las predicciones para el conjunto de testing forecast_it, ts_it = make_evaluation_predictions(dataset=test_ds, predictor=predictor, num_samples=100) print("Obtención de valores de acondicionamiento de series de tiempo ...") tss = list(tqdm(ts_it, total=len(df_test))) print("Obtención de valores de acondicionamiento de series de tiempo ...") forecasts = list(tqdm(forecast_it, total=len(df_test))) # plotear las predicciones en un intervalo de confianza for i in tqdm(range(number_of_products - 1)): ts_entry = tss[i] ts_entry.columns = [list(df.columns)[i]]
plt.plot(custom_datasetx[0]) plt.show() start = pd.Timestamp("01-01-2019", freq=freq) train_ds = [{ 'target': x, 'start': start } for x in custom_datasetx[:, :, :-prediction_length]] test_ds = [{'target': x, 'start': start} for x in custom_datasetx[:, :, :]] # Trainer parameters epochs = 1 learning_rate = 1E-3 batch_size = 1 num_batches_per_epoch = 2 # create estimator estimator = DeepAREstimator( prediction_length=prediction_length, context_length=prediction_length, freq=freq, # trainer=Trainer(ctx="gpu", epochs=epochs, learning_rate=learning_rate, hybridize=True, # batch_size=batch_size, num_batches_per_epoch=num_batches_per_epoch,), distr_output=MultivariateGaussianOutput(dim=2), ) predictor = estimator.train(train_ds)
def train(args): # Parse arguments epochs = args.epochs pred_length = args.pred_length num_layers = args.num_layers num_cells = args.num_cells dropout_rate = args.dropout_rate batch_size = args.batch_size lr = args.lr model_dir = args.model_dir data_dir = args.data_dir num_gpus = args.num_gpus output_dir = args.output_dir device = "gpu" if num_gpus > 0 else "cpu" FREQ = 'D' target_col = 'Weekly_Sales_sum' related_cols = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment'] # Get training data target_train_df = pd.read_csv(os.path.join(data_dir, 'target_train.csv'), index_col=0, header=[0,1]) related_train_df = pd.read_csv(os.path.join(data_dir, 'related_train.csv'), index_col=0, header=[0,1]) store_df = pd.read_csv(os.path.join(data_dir, 'item.csv'), index_col=0) num_steps, num_series = target_train_df.shape target = target_train_df.values start_train_dt = target_train_df.index[0] custom_ds_metadata = {'num_series': num_series, 'num_steps': num_steps, 'prediction_length': pred_length, 'freq': FREQ, 'start': [start_train_dt for _ in range(num_series)] } # Prepare GlounTS Dataset related_list = [related_train_df[c].values for c in related_cols] train_lst = [] for i in range(0, num_series): target_vec = target[:-pred_length, i] related_vecs = [related[:-pred_length, i] for related in related_list] item = store_df.loc[i+1] dic = {FieldName.TARGET: target_vec, FieldName.START: start_train_dt, FieldName.FEAT_DYNAMIC_REAL: related_vecs, FieldName.FEAT_STATIC_CAT: [item[0]], FieldName.FEAT_STATIC_REAL: [item[1]] } train_lst.append(dic) test_lst = [] for i in range(0, num_series): target_vec = target[:, i] related_vecs = [related[:, i] for related in related_list] item = store_df.loc[i+1] dic = {FieldName.TARGET: target_vec, FieldName.START: start_train_dt, FieldName.FEAT_DYNAMIC_REAL: related_vecs, FieldName.FEAT_STATIC_CAT: [item[0]], FieldName.FEAT_STATIC_REAL: [item[1]] } test_lst.append(dic) train_ds = ListDataset(train_lst, freq=FREQ) test_ds = ListDataset(test_lst, freq=FREQ) # Define Estimator trainer = Trainer( ctx=device, epochs=epochs, learning_rate=lr, batch_size=batch_size ) deepar_estimator = DeepAREstimator(freq=FREQ, prediction_length=pred_length, use_feat_dynamic_real=True, use_feat_static_cat=True, use_feat_static_real=True, cardinality=[3], num_cells=30, distr_output=StudentTOutput(), trainer=trainer) # Train the model deepar_predictor = deepar_estimator.train(train_ds) # Evaluate trained model on test data forecast_it, ts_it = make_evaluation_predictions(test_ds, deepar_predictor, num_samples=100) forecasts = list(forecast_it) tss = list(ts_it) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_ds)) metrics = ['RMSE', 'MAPE', 'wQuantileLoss[0.1]', 'wQuantileLoss[0.5]', 'wQuantileLoss[0.9]', 'mean_wQuantileLoss'] metrics_dic = dict((key,value) for key, value in agg_metrics.items() if key in metrics) print(json.dumps(metrics_dic, indent=2)) # Save the model deepar_predictor.serialize(pathlib.Path(model_dir)) return deepar_predictor
def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate, hybridize, num_batches_per_epoch): #create train dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] + "/train.csv", header=0, index_col=0) training_data = ListDataset([{ "start": df.index[0], "target": df.usage[:], "item_id": df.client[:] }], freq=freq) #create test dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] + "/test.csv", header=0, index_col=0) test_data = ListDataset([{ "start": df.index[0], "target": df.usage[:], "item_id": 'client_12' }], freq=freq) hook = Hook.create_from_json_file() #determine estimators################################## if algo == "DeepAR": estimator = DeepAREstimator( freq=freq, prediction_length=prediction_length, context_length=1, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("DeepAR training is complete SUCCESS") elif algo == "SFeedFwd": estimator = SimpleFeedForwardEstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") elif algo == "lstnet": # Needed for LSTNet ONLY grouper = MultivariateGrouper(max_target_dim=6) training_data = grouper(training_data) test_data = grouper(test_data) context_length = prediction_length num_series = 1 skip_size = 1 ar_window = 1 channels = 4 estimator = LSTNetEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, num_series=num_series, skip_size=skip_size, ar_window=ar_window, channels=channels, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") elif algo == "seq2seq": estimator = MQCNNEstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") else: estimator = TransformerEstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") ################################################### #evaluate trained model on test data forecast_it, ts_it = make_evaluation_predictions(test_data, predictor, num_samples=100) print("EVALUATION is complete SUCCESS") forecasts = list(forecast_it) tss = list(ts_it) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data)) print("METRICS retrieved SUCCESS") #bucket = "bwp-sandbox" mainpref = "gluonts/blog-models/" prefix = mainpref + str(seq) + "/" agg_df = pd.DataFrame(agg_metrics, index=[0]) file = "metrics" + str(seq) + ".csv" os.system('mkdir metrics') cspath = os.path.join('metrics', file) agg_df.to_csv(cspath) s3.upload_file(cspath, bucket, mainpref + "metrics/" + file) hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True) hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True) hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True) hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True) print("MAPE:", agg_metrics["MAPE"]) #save the model predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR'])) uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket) return predictor
context_length =1, use_feat_dynamic_real = False, use_feat_static_cat = False, cardinality = None, scaling=False, trainer = Trainer( learning_rate = 5e-3, epochs = 10, num_batches_per_epoch = 10, batch_size = 30 ) ) predictor = estimator.train(gluonts_ds) from gluonts.evaluation.backtest import make_evaluation_predictions import json def plot_prob_forecasts(ts_entry, forecast_entry): plot_length = 150 prediction_intervals = (50.0, 90.0) legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1] fig, ax = plt.subplots(1, 1, figsize=(10, 7)) ts_entry[-plot_length:].plot(ax=ax) # plot the time series forecast_entry.plot(prediction_intervals=prediction_intervals, color='g') plt.grid(which="both") plt.legend(legend, loc="upper left")
estimator = DeepAREstimator( prediction_length = 12, freq = "D", distr_output = NegativeBinomialOutput(), use_feat_static_cat =True, use_feat_dynamic_real =True, cardinality = [3049, 7, 3, 10, 3], trainer = Trainer( learning_rate = 1e-3, epochs = 1, num_batches_per_epoch = 10, batch_size = 10 ) ) predictor = estimator.train(TD.train) from gluonts.evaluation.backtest import make_evaluation_predictions from gluonts.evaluation import Evaluator forecast_it, ts_it = make_evaluation_predictions( dataset=TD.test, predictor=predictor, num_samples=100 ) from gluonts.evaluation import Evaluator agg_metrics, item_metrics = Evaluator()(ts_it, forecast_it, num_series=len(TD.test) ) print(agg_metrics)
def train_models( train, models, forecast_len, full_df=None, seasonality="infer_from_data", in_sample=None, freq=None, GPU=None, ): seasons = select_seasonality(train, seasonality) periods = select_seasonality(train, "periodocity") models_dict = {} for m in models: if in_sample: print( "Model {} is being trained for in sample prediction".format(m)) else: print("Model {} is being trained for out of sample prediction". format(m)) if m == "ARIMA": models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons) if m == "Prophet": if freq == "D": model = Prophet(daily_seasonality=True) else: model = Prophet() models_dict[m] = model.fit(prophet_dataframe(train)) if m == "HWAAS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="add", damped=True, ).fit(use_boxcox=True) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="add", damped=True, ).fit(use_boxcox=False) if m == "HWAMS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="mul", damped=True, ).fit(use_boxcox=True) except: try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend="add", seasonal="mul", damped=True, ).fit(use_boxcox=False) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend=None, seasonal="add").fit(use_boxcox=False) # if m=="HOLT": # models_dict["HOLT"] = Holt(train,exponential=True).fit() if m == "PYAF": model = autof() model.train( iInputDS=train.reset_index(), iTime="Date", iSignal="Target", iHorizon=len(train), ) # bad coding to have horison here models_dict[m] = model.forecast(iInputDS=train.reset_index(), iHorizon=forecast_len) if m == "Gluonts": freqed = pd.infer_freq(train.index) if freqed == "MS": freq = "M" else: freq = freqed estimator = DeepAREstimator( freq=freq, prediction_length=forecast_len, trainer=Trainer(epochs=6, ctx="gpu"), ) # use_feat_dynamic_real=True if GPU: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) else: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) if m == "NBEATS": if GPU: device = torch.device("cuda") else: device = torch.device("cpu") if os.path.isfile(CHECKPOINT_NAME): os.remove(CHECKPOINT_NAME) stepped = 35 batch_size = 10 if in_sample: x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=True, device=device) optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) # test_losses = [] for r in range(stepped): train_100_grad_steps(data, device, net, optimiser) # test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["x_test"] = x_test models_dict[m]["y_test"] = y_test models_dict[m]["constant"] = norm_constant else: # if out_sample train is df x_train, y_train, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=False, device=device) batch_size = 10 # greater than 4 for viz optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) stepped = 5 # test_losses = [] for r in range(stepped): # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used # if GPU: # p = forecast.detach().numpy() ### Not Used # else: # p = forecast.detach().numpy() ### Not Used train_100_grad_steps(data, device, net, optimiser) # test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["tuple"] = (x_train, y_train, net, norm_constant) # if m=="TBA": # bat = TBATS(use_arma_errors=False,use_box_cox=True) # models_dict[m] = bat.fit(train) if m == "TATS": bat = TBATS( seasonal_periods=list(get_unique_N(season_list(train), 1)), use_arma_errors=False, use_trend=True, ) models_dict[m] = bat.fit(train) if m == "TBAT": bat = TBATS(use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATS1": bat = TBATS( seasonal_periods=[seasons], use_arma_errors=False, use_box_cox=True, use_trend=True, ) models_dict[m] = bat.fit(train) if m == "TBATP1": bat = TBATS( seasonal_periods=[periods], use_arma_errors=False, use_box_cox=True, use_trend=True, ) models_dict[m] = bat.fit(train) if m == "TBATS2": bat = TBATS( seasonal_periods=list(get_unique_N(season_list(train), 2)), use_arma_errors=False, use_box_cox=True, use_trend=True, ) models_dict[m] = bat.fit(train) # if m=="ProphetGluonts": # freqed = pd.infer_freq(train.index) # if freqed=="MS": # freq= "M" # else: # freq= freqed # models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True # models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"]) return models_dict, seasons
prediction_length=prediction_length, context_length=prediction_length*2, num_layers=2, num_cells=128, cell_type='gru', dropout_rate=0.1, scaling=True, lags_seq=np.arange(1, 1+1).tolist(), freq=freq, use_feat_dynamic_real=False, use_feat_static_cat=False, use_feat_static_real=False, distr_output=distr, cardinality=None, trainer=deep_ar_trainer) deep_ar_predictor = deep_ar_estimator.train(train_ds, test_ds) print("Generating Deep AR forecasts.......") deep_ar_forecast_it, ts_it = make_evaluation_predictions( dataset=test_ds, predictor=deep_ar_predictor, num_samples=100) tss = list(tqdm(ts_it, total=len(test_ds))) deep_ar_forecasts = list(tqdm(deep_ar_forecast_it, total=len(test_ds))) # estimador de deep renewal trainer = Trainer( ctx=mx.context.gpu() if is_gpu & args.use_cuda else mx.context.cpu(), batch_size=args.batch_size, learning_rate=args.learning_rate, epochs=20,
# Create the model object estimator = DeepAREstimator( freq="D", context_length=14, # How many past events do I look at to make prediction prediction_length=prediction_length, num_layers=num_layers, num_cells=num_cells, #num_parallel_samples=8, # Added 12/22/2020 -- Doesn't seem to be working in parallel dropout_rate=0.1, # Added 12/22/2020 cell_type='lstm', trainer=Trainer(epochs=21)) # modify as needed # Train the model on the json version (created in the step above) # of the training portion of the data set predictor = estimator.train(training_data=training_data) # Set up the data results of the model forecast_it, ts_it = make_evaluation_predictions( dataset=test_data, predictor=predictor, num_samples= num_samples, # This is running x times through the probablistic model ) forecasts = list(forecast_it) tss = list(ts_it) # This function is taken from a tutorial. Still, with some tweaking and citations, it sould be added to Steinbeck.py def plot_prob_forecasts(ts_entry, forecast_entry):
def train_models(train, models, forecast_len, full_df=None, seasonality="infer_from_data", in_sample=None): seasons = select_seasonality(train, seasonality) models_dict = {} for m in models: if m == "ARIMA": models_dict["ARIMA"] = pm.auto_arima(train, seasonal=True, m=seasons) if m == "Prophet": model = Prophet() models_dict["Prophet"] = model.fit(prophet_dataframe(train)) if m == "HWAAS": models_dict["HWAAS"] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='add', damped=True).fit(use_boxcox=True) if m == "HWAMS": models_dict["HWAMS"] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='mul', damped=True).fit(use_boxcox=True) # if m=="HOLT": # models_dict["HOLT"] = Holt(train,exponential=True).fit() if m == "PYAF": model = autof.cForecastEngine() model.train(iInputDS=train.reset_index(), iTime='Date', iSignal='Target', iHorizon=len(train)) # bad coding to have horison here models_dict["PYAF"] = model.forecast(iInputDS=train.reset_index(), iHorizon=forecast_len) if m == "Gluonts": freqed = pd.infer_freq(train.index) if freqed == "MS": freq = "M" else: freq = freqed estimator = DeepAREstimator( freq=freq, prediction_length=forecast_len, trainer=Trainer(epochs=2)) #use_feat_dynamic_real=True print(train) print(type(train)) print(gluonts_dataframe(train)) models_dict["Gluonts"] = estimator.train( training_data=gluonts_dataframe(train)) if m == "NBEATS": device = torch.device('cpu') seasons = select_seasonality(train, seasonality) if os.path.isfile(CHECKPOINT_NAME): os.remove(CHECKPOINT_NAME) stepped = 5 batch_size = 10 if in_sample: x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=True) optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) #test_losses = [] for r in range(stepped): train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict["NBEATS"] = {} models_dict["NBEATS"]["model"] = net models_dict["NBEATS"]["x_test"] = x_test models_dict["NBEATS"]["y_test"] = y_test models_dict["NBEATS"]["constant"] = norm_constant else: # if out_sample train is df x_train, y_train, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=False) batch_size = 10 # greater than 4 for viz optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) stepped = 5 #test_losses = [] for r in range(stepped): _, forecast = net(torch.tensor( x_train, dtype=torch.float)) ### Not Used p = forecast.detach().numpy() ### Not Used train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict["NBEATS"] = {} models_dict["NBEATS"]["model"] = net models_dict["NBEATS"]["tuple"] = (x_train, y_train, net, norm_constant) # if m=="ProphetGluonts": # freqed = pd.infer_freq(train.index) # if freqed=="MS": # freq= "M" # else: # freq= freqed # models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True # models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"]) # create a forecast engine. This is the main object handling all the operations # We use the test-dataset as the last step of our training to generate the evaluation-metrics and do not use the test-dataset during prediction. # get the best time series model for predicting one week return models_dict