def _evaluate(self, loader): """Evaluates model performance. Args: loader (torch DataLoader): instantiated Validation Dataloader (with TimeDataset) Returns: df with evaluation metrics """ test_metrics = metrics.MetricsCollection([m.new() for m in self.metrics.batch_metrics]) if self.highlight_forecast_step_n is not None: test_metrics.add_specific_target(target_pos=self.highlight_forecast_step_n - 1) ## Run self.test_metrics = test_metrics self.trainer.test(self.model, test_dataloaders=loader, ckpt_path=None, verbose=False) test_metrics_dict = self.test_metrics.compute(save=True) log.info("Validation metrics: {}".format(utils.print_epoch_metrics(test_metrics_dict))) val_metrics_df = self.test_metrics.get_stored_as_df() return val_metrics_df
def __init__( self, n_lags=60, n_forecasts=20, batch_size=None, epochs=100, patience_early_stopping=10, early_stop=True, learning_rate=3e-2, auto_lr_find=True, num_workers=3, loss_func="QuantileLoss", hidden_size=32, attention_head_size=1, hidden_continuous_size=8, dropout=0.1, ): """ Args: n_lags: int, — Number of time units that condition the predictions. Also known as 'lookback period'. Should be between 1-10 times the prediction length. Can be seen as equivalent for n_lags in NP n_forecasts: int - Number of time units that the model predicts batch_size: int, — batch_size. If set to None, automatic batch size will be set epochs: int, — number of epochs for training. Will be overwritten, if EarlyStopping is applied patience_early_stopping: int, — patience parameter of EarlyStopping callback early_stop: bool, — whether to use EarlyStopping callback learning_rate: float, — learning rate for the model. Will be overwritten, if auto_lr_find is used auto_lr_find: bool, — whether to use automatic laerning rate finder num_workers: int, — number of workers for DataLoaders loss_func: str, loss function taking prediction and targets, should be from MultiHorizonMetric class, defaults to QuantileLoss. hidden_size: int, hidden size of network which is its main hyperparameter and can range from 8 to 512 attention_head_size: int, number of attention heads, lager values (up to 8) for large amount of data hidden_continuous_size: int, dictionary mapping continuous input indices to sizes for variable selection dropout: dropout in RNN layers, should be between 0 and 1. """ self.batch_size = batch_size self.epochs = epochs self.patience_early_stopping = patience_early_stopping self.early_stop = early_stop self.learning_rate = learning_rate self.auto_lr_find = auto_lr_find if self.learning_rate != None: self.auto_lr_find = False self.num_workers = num_workers self.context_length = n_lags self.prediction_length = n_forecasts self.hidden_size = hidden_size self.attention_head_size = attention_head_size self.hidden_continuous_size = hidden_continuous_size self.dropout = dropout self.loss_func = loss_func self.fitted = False self.freq = None if type(self.loss_func) == str: if self.loss_func.lower() in ["huber", "smoothl1", "smoothl1loss"]: self.loss_func = torch.nn.SmoothL1Loss() elif self.loss_func.lower() in ["mae", "l1", "l1loss"]: self.loss_func = torch.nn.L1Loss() elif self.loss_func.lower() in ["mse", "mseloss", "l2", "l2loss"]: self.loss_func = torch.nn.MSELoss() elif self.loss_func.lower() in ["quantileloss"]: self.loss_func = QuantileLoss() else: raise NotImplementedError("Loss function {} name not defined".format(self.loss_func)) elif callable(self.loss_func): pass elif hasattr(torch.nn.modules.loss, self.loss_func.__class__.__name__): pass else: raise NotImplementedError("Loss function {} not found".format(self.loss_func)) self.metrics = metrics.MetricsCollection( metrics=[metrics.LossMetric(torch.nn.SmoothL1Loss()), metrics.MAE(), metrics.MSE(),], value_metrics=[ # metrics.ValueMetric("Loss"), ], ) self.val_metrics = metrics.MetricsCollection([m.new() for m in self.metrics.batch_metrics])
def __init__( self, n_lags=60, n_forecasts=20, batch_size=None, epochs=100, weight_decay=1e-2, patience_early_stopping=10, early_stop=True, learning_rate=3e-2, auto_lr_find=False, num_workers=3, loss_func="huber", ): """ Args: n_lags: int, — Number of time units that condition the predictions. Also known as 'lookback period'. Should be between 1-10 times the prediction length. Can be seen as equivalent for n_lags in NP n_forecasts: int - Number of time units that the model predicts batch_size: int, — batch_size. If set to None, automatic batch size will be set epochs: int, — number of epochs for training. Will be overwritten, if EarlyStopping is applied patience_early_stopping: int, — patience parameter of EarlyStopping callback early_stop: bool, — whether to use EarlyStopping callback weight_decay: float, — weight_decay parameter for NBeats model learning_rate: float, — learning rate for the model. Will be overwritten, if auto_lr_find is used auto_lr_find: bool, — whether to use automatic laerning rate finder num_workers: int, — number of workers for DataLoaders loss_func: str, ['huber', 'MSE'] — what loss function will be used """ self.batch_size = batch_size self.weight_decay = weight_decay self.epochs = epochs self.patience_early_stopping = patience_early_stopping self.early_stop = early_stop self.learning_rate = learning_rate self.auto_lr_find = auto_lr_find self.num_workers = num_workers self.context_length = n_lags self.prediction_length = n_forecasts self.loss_func = loss_func self.fitted = False self.freq = None if type(self.loss_func) == str: if self.loss_func.lower() in ["huber", "smoothl1", "smoothl1loss"]: self.loss_func = torch.nn.SmoothL1Loss() elif self.loss_func.lower() in ["mae", "l1", "l1loss"]: self.loss_func = torch.nn.L1Loss() elif self.loss_func.lower() in ["mse", "mseloss", "l2", "l2loss"]: self.loss_func = torch.nn.MSELoss() else: raise NotImplementedError( "Loss function {} name not defined".format(self.loss_func)) elif callable(self.loss_func): pass elif hasattr(torch.nn.modules.loss, self.loss_func.__class__.__name__): pass else: raise NotImplementedError("Loss function {} not found".format( self.loss_func)) self.metrics = metrics.MetricsCollection( metrics=[ metrics.LossMetric(torch.nn.SmoothL1Loss()), metrics.MAE(), metrics.MSE(), ], value_metrics=[ # metrics.ValueMetric("Loss"), ], ) self.val_metrics = metrics.MetricsCollection( [m.new() for m in self.metrics.batch_metrics])
def __init__( self, n_lags=10, n_forecasts=1, num_hidden_layers=1, d_hidden=10, learning_rate=None, epochs=None, batch_size=None, loss_func="Huber", optimizer="AdamW", train_speed=None, normalize="auto", impute_missing=True, lstm_bias=True, lstm_bidirectional=False, ): """ Args: ## Model Config n_forecasts (int): Number of steps ahead of prediction time step to forecast. num_hidden_layers (int): number of hidden layer to include in AR-Net. defaults to 0. d_hidden (int): dimension of hidden layers of the AR-Net. Ignored if num_hidden_layers == 0. ## Train Config learning_rate (float): Maximum learning rate setting for 1cycle policy scheduler. default: None: Automatically sets the learning_rate based on a learning rate range test. For manual values, try values ~0.001-10. epochs (int): Number of epochs (complete iterations over dataset) to train model. default: None: Automatically sets the number of epochs based on dataset size. For best results also leave batch_size to None. For manual values, try ~5-500. batch_size (int): Number of samples per mini-batch. default: None: Automatically sets the batch_size based on dataset size. For best results also leave epochs to None. For manual values, try ~1-512. loss_func (str, torch.nn.modules.loss._Loss, 'typing.Callable'): Type of loss to use: str ['Huber', 'MSE'], or torch loss or callable for custom loss, eg. asymmetric Huber loss ## Data config normalize (str): Type of normalization to apply to the time series. options: ['auto', 'soft', 'off', 'minmax, 'standardize'] default: 'auto' uses 'minmax' if variable is binary, else 'soft' 'soft' scales minimum to 0.1 and the 90th quantile to 0.9 impute_missing (bool): whether to automatically impute missing dates/values imputation follows a linear method up to 10 missing values, more are filled with trend. ## LSTM specific bias (bool): If False, then the layer does not use bias weights b_ih and b_hh. Default: True bidirectional (bool): If True, becomes a bidirectional LSTM. Default: False """ kwargs = locals() # General self.name = "LSTM" self.n_forecasts = n_forecasts self.n_lags = n_lags # Data Preprocessing self.normalize = normalize self.impute_missing = impute_missing self.impute_limit_linear = 5 self.impute_rolling = 20 # Training self.config_train = configure.from_kwargs(configure.Train, kwargs) self.metrics = metrics.MetricsCollection( metrics=[metrics.LossMetric(self.config_train.loss_func), metrics.MAE(), metrics.MSE(),], value_metrics=[ # metrics.ValueMetric("Loss"), ], ) # Model self.config_model = configure.from_kwargs(configure.Model, kwargs) # LSTM specific self.lstm_bias = lstm_bias self.lstm_bidirectional = lstm_bidirectional # set during fit() self.data_freq = None # Set during _train() self.fitted = False self.data_params = None self.optimizer = None self.scheduler = None self.model = None # set during prediction self.future_periods = None # later set by user (optional) self.highlight_forecast_step_n = None self.true_ar_weights = None
def _train(self, df, df_val=None, progress_bar=True, plot_live_loss=False, hyperparameter_optim=False): """Execute model training procedure for a configured number of epochs. Args: df (pd.DataFrame): containing column 'ds', 'y' with training data df_val (pd.DataFrame): containing column 'ds', 'y' with validation data progress_bar (bool): display updating progress bar plot_live_loss (bool): plot live training loss, requires [live] install or livelossplot package installed. Returns: df with metrics """ if plot_live_loss: try: from livelossplot import PlotLosses except: plot_live_loss = False log.warning( "To plot live loss, please install neuralprophet[live]." "Using pip: 'pip install neuralprophet[live]'" "Or install the missing package manually: 'pip install livelossplot'", exc_info=True, ) loader = self._init_train_loader(df) val = df_val is not None ## Metrics if self.highlight_forecast_step_n is not None: self.metrics.add_specific_target(target_pos=self.highlight_forecast_step_n - 1) if not self.normalize == "off": self.metrics.set_shift_scale((self.data_params["y"].shift, self.data_params["y"].scale)) if val: val_loader = self._init_val_loader(df_val) val_metrics = metrics.MetricsCollection([m.new() for m in self.metrics.batch_metrics]) self.val_metrics = val_metrics ## Run start = time.time() if progress_bar: training_loop = tqdm( range(self.config_train.epochs), total=self.config_train.epochs, leave=log.getEffectiveLevel() <= 20 ) else: training_loop = range(self.config_train.epochs) if plot_live_loss: live_out = ["MatplotlibPlot"] if not progress_bar: live_out.append("ExtremaPrinter") live_loss = PlotLosses(outputs=live_out) self.metrics.reset() if val: self.val_metrics.reset() self.trainer = Trainer( max_epochs=self.config_train.epochs, checkpoint_callback=False, logger=False # logger = log ) if hyperparameter_optim: return loader, val_loader, self.model else: if val: self.trainer.fit(self.model, train_dataloader=loader, val_dataloaders=val_loader) else: self.trainer.fit(self.model, train_dataloader=loader) ## Metrics log.debug("Train Time: {:8.3f}".format(time.time() - start)) log.debug("Total Batches: {}".format(self.metrics.total_updates)) metrics_df = self.metrics.get_stored_as_df() if val: metrics_df_val = self.val_metrics.get_stored_as_df() for col in metrics_df_val.columns: metrics_df["{}_val".format(col)] = metrics_df_val[col] return metrics_df
def __init__( self, n_lags=60, n_forecasts=20, batch_size=None, epochs=100, patience_early_stopping=10, early_stop=True, learning_rate=3e-2, auto_lr_find=False, num_workers=3, loss_func="normaldistributionloss", hidden_size=32, rnn_layers=2, dropout=0.1, ): """ Args: n_lags: int, — Number of time units that condition the predictions. Also known as 'lookback period'. Should be between 1-10 times the prediction length. Can be seen as equivalent for n_lags in NP n_forecasts: int - Number of time units that the model predicts batch_size: int, — batch_size. If set to None, automatic batch size will be set epochs: int, — number of epochs for training. Will be overwritten, if EarlyStopping is applied patience_early_stopping: int, — patience parameter of EarlyStopping callback early_stop: bool, — whether to use EarlyStopping callback learning_rate: float, — learning rate for the model. Will be overwritten, if auto_lr_find is used auto_lr_find: bool, — whether to use automatic laerning rate finder num_workers: int, — number of workers for DataLoaders loss_func: str, Distribution loss function. Keep in mind that each distribution loss function might have specific requirements for target normalization. Defaults to NormalDistributionLoss. hidden_size: int, hidden recurrent size - the most important hyperparameter along with rnn_layers. rnn_layers: int, number of RNN layers - important hyperparameter. dropout: float, dropout in RNN layers, should be between 0 and 1. """ self.batch_size = batch_size self.epochs = epochs self.patience_early_stopping = patience_early_stopping self.early_stop = early_stop self.learning_rate = learning_rate self.auto_lr_find = auto_lr_find if self.learning_rate != None: self.auto_lr_find = False self.num_workers = num_workers self.context_length = n_lags self.prediction_length = n_forecasts self.hidden_size = hidden_size self.rnn_layers = rnn_layers self.dropout = dropout self.loss_func = loss_func self.fitted = False self.freq = None if type(self.loss_func) == str: if self.loss_func.lower() in ["huber", "smoothl1", "smoothl1loss"]: self.loss_func = torch.nn.SmoothL1Loss() elif self.loss_func.lower() in ["mae", "l1", "l1loss"]: self.loss_func = torch.nn.L1Loss() elif self.loss_func.lower() in ["mse", "mseloss", "l2", "l2loss"]: self.loss_func = torch.nn.MSELoss() elif self.loss_func.lower() in [ "normaldistloss", "ndl", "normaldistributionloss" ]: self.loss_func = NormalDistributionLoss() else: raise NotImplementedError( "Loss function {} name not defined".format(self.loss_func)) elif callable(self.loss_func): pass elif hasattr(torch.nn.modules.loss, self.loss_func.__class__.__name__): pass else: raise NotImplementedError("Loss function {} not found".format( self.loss_func)) self.metrics = metrics.MetricsCollection( metrics=[ metrics.LossMetric(torch.nn.SmoothL1Loss()), metrics.MAE(), metrics.MSE(), ], value_metrics=[ # metrics.ValueMetric("Loss"), ], ) self.val_metrics = metrics.MetricsCollection( [m.new() for m in self.metrics.batch_metrics])