def random_method(decorated: Callable[..., T]) -> Callable[..., T]: """Decorator usable on any method within a class that will provide an isolated torch random context. The decorator will store a `_random_instance` property on the object in order to persist successive calls to the RNG Parameters ---------- decorated A method to be run in an isolated torch random context. """ # check that @random_method has been applied to a method. raise_if_not(_is_method(decorated), "@random_method can only be used on methods.", logger) @wraps(decorated) def decorator(self, *args, **kwargs) -> T: if "random_state" in kwargs.keys(): self._random_instance = check_random_state(kwargs["random_state"]) elif not hasattr(self, "_random_instance"): self._random_instance = check_random_state( randint(0, high=MAX_NUMPY_SEED_VALUE)) with fork_rng(): manual_seed( self._random_instance.randint(0, high=MAX_TORCH_SEED_VALUE)) return decorated(self, *args, **kwargs) return decorator
def inverse_transform(self, data: Union[TimeSeries, Sequence[TimeSeries]], partial: bool = False) -> Union[TimeSeries, Sequence[TimeSeries]]: """ For each data transformer in the pipeline, inverse-transform data. Then inverse transformed data is passed to the next transformer. Transformers are traversed in reverse order. Raises value error if not all of the transformers are invertible and ``partial`` is set to False. Set ``partial`` to True for inverting only the InvertibleDataTransformer in the pipeline. Parameters ---------- data (Sequence of) TimeSeries to be inverse transformed. partial If set to `True`, the inverse transformation is applied even if the pipeline is not fully invertible, calling `inverse_transform()` only on the `InvertibleDataTransformer`s Returns ------- Union[TimeSeries, Sequence[TimeSeries]] Inverse transformed data. """ if not partial: raise_if_not(self._invertible, "Not all transformers in the pipeline can perform inverse_transform", logger) for transformer in reversed(self._transformers): data = transformer.inverse_transform(data) return data else: for transformer in reversed(self._transformers): if isinstance(transformer, InvertibleDataTransformer): data = transformer.inverse_transform(data) return data
def __getitem__(self, key: Union[int, slice]) -> "Pipeline": """ Gets subset of Pipeline based either on index or slice with indexes. Resulting pipeline will deep copy transformers of the original pipeline. Parameters ---------- key Either int or slice indicating the subset of data transformers to keep. Returns ------- Pipeline Subset of pipeline determined by key. """ raise_if_not( isinstance(key, int) or isinstance(key, slice), "key must be either an int or a slice", logger, ) if isinstance(key, int): transformers = [self._transformers[key]] else: transformers = self._transformers[key] return Pipeline(transformers, copy=True)
def __init__(self, transformers: Sequence[BaseDataTransformer[TimeSeries]], copy: bool = False): """ Pipeline combines multiple data transformers chaining them together. Parameters ---------- transformers Sequence of data transformers. copy If set makes a (deep) copy of each data transformer before adding them to the pipeline """ raise_if_not(all((isinstance(t, BaseDataTransformer)) for t in transformers), "transformers should be objects deriving from BaseDataTransformer", logger) if transformers is None or len(transformers) == 0: logger.warning("Empty pipeline created") self._transformers: Sequence[BaseDataTransformer[TimeSeries]] = [] elif copy: self._transformers = deepcopy(transformers) else: self._transformers = transformers self._invertible = all((isinstance(t, InvertibleDataTransformer) for t in self._transformers))
def __init__(self, index_generator: CovariateIndexGenerator, attribute: Callable): """ Parameters ---------- index_generator An instance of `CovariateIndexGenerator` with methods `generate_train_series()` and `generate_inference_series()`. Used to generate the index for encoders. attribute A callable that takes an index `index` of type `(pd.DatetimeIndex, pd.RangeIndex)` as input and returns a np.ndarray of shape `(len(index),)`. An example for a correct `attribute` for `index` of type pd.DatetimeIndex: ``attribute = lambda index: (index.year - 1950) / 50``. And for pd.RangeIndex: ``attribute = lambda index: (index - 1950) / 50`` """ raise_if_not( callable(attribute), f"Encountered invalid encoder argument `{attribute}` for encoder `callable`. " f"Attribute must be a callable that returns a `np.ndarray`.", logger, ) super().__init__(index_generator) self.attribute = attribute
def __init__(self, index_generator: CovariateIndexGenerator, attribute: str): """ Parameters ---------- index_generator An instance of `CovariateIndexGenerator` with methods `generate_train_series()` and `generate_inference_series()`. Used to generate the index for encoders. attribute Either 'absolute' or 'relative'. If 'absolute', the generated encoded values will range from (0, inf) and the train target series will be used as a reference to set the 0-index. If 'relative', the generated encoded values will range from (-inf, inf) and the train target series end time will be used as a reference to evaluate the relative index positions. """ raise_if_not( isinstance(attribute, str) and attribute in INTEGER_INDEX_ATTRIBUTES, f"Encountered invalid encoder argument `{attribute}` for encoder `position`. " f'Attribute must be one of `("absolute", "relative")`.', logger, ) super().__init__(index_generator) self.attribute = attribute self.reference_index: Optional[Tuple[int, Optional[Union[pd.Timestamp, int]]]] = None self.was_called = False
def __init__(self, models: Union[List[ForecastingModel], List[GlobalForecastingModel]]): raise_if_not( isinstance(models, list) and models, "Cannot instantiate EnsembleModel with an empty list of models", logger, ) is_local_ensemble = all( isinstance(model, ForecastingModel) and not isinstance(model, GlobalForecastingModel) for model in models) self.is_global_ensemble = all( isinstance(model, GlobalForecastingModel) for model in models) raise_if_not( is_local_ensemble or self.is_global_ensemble, "All models must either be GlobalForecastingModel instances, or none of them should be.", logger, ) raise_if( any([m._fit_called for m in models]), "Cannot instantiate EnsembleModel with trained/fitted models. " "Consider resetting all models with `my_model.untrained_model()`", logger, ) super().__init__() self.models = models self.is_single_series = None
def fill_missing_values(series: TimeSeries, fill: Union[str, float] = 'auto', **interpolate_kwargs) -> TimeSeries: """ Fills missing values in the provided time series Parameters ---------- series The time series for which to fill missing values fill The value used to replace the missing values. If set to 'auto', will auto-fill missing values using the `pandas.Dataframe.interpolate()` method. interpolate_kwargs Keyword arguments for `pandas.Dataframe.interpolate()`, only used when fit is set to 'auto'. See `the documentation <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html>`_ for the list of supported parameters. Returns ------- TimeSeries A new TimeSeries with all missing values filled according to the rules above. """ raise_if_not(isinstance(fill, str) or isinstance(fill, float), "`fill` should either be a string or a float", logger) raise_if(isinstance(fill, str) and fill != 'auto', "invalid string for `fill`: can only be set to 'auto'", logger) if fill == 'auto': return _auto_fill(series, **interpolate_kwargs) return _const_fill(series, fill)
def wrapper_multivariate_support(*args, **kwargs): # we can avoid checks about args and kwargs since the input is adjusted by the previous decorator actual_series = args[0] pred_series = args[1] raise_if_not( actual_series.width == pred_series.width, "The two TimeSeries instances must have the same width.", logger, ) value_list = [] for i in range(actual_series.width): value_list.append( func( actual_series.univariate_component(i), pred_series.univariate_component(i), *args[2:], **kwargs ) ) # [2:] since we already know the first two arguments are the series if "reduction" in kwargs: return kwargs["reduction"](value_list) else: return signature(func).parameters["reduction"].default(value_list)
def __init__(self, fill: Union[str, float] = 'auto', name: str = "MissingValuesFiller", n_jobs: int = 1, verbose: bool = False): """ Data transformer to fill missing values from a (sequence of) TimeSeries Parameters ---------- fill The value used to replace the missing values. If set to 'auto', will auto-fill missing values using the `pandas.Dataframe.interpolate()` method. name A specific name for the transformer n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a `Sequence[TimeSeries]` is passed as input to a method, parallelising operations regarding different `TimeSeries`. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. Note: for a small amount of data, the parallelisation overhead could end up increasing the total required amount of time. verbose Optionally, whether to print operations progress """ raise_if_not( isinstance(fill, str) or isinstance(fill, float), "`fill` should either be a string or a float", logger) raise_if( isinstance(fill, str) and fill != 'auto', "invalid string for `fill`: can only be set to 'auto'", logger) super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) self._fill = fill
def __init__(self, version: str = "classic", alpha_d: float = None, alpha_p: float = None): """An implementation of the `Croston method <https://otexts.com/fpp3/counts.html>`_ for intermittent count series. Relying on the implementation of `Statsforecasts package <https://github.com/Nixtla/statsforecast>`_. Parameters ---------- version - "classic" corresponds to classic Croston. - "optimized" corresponds to optimized classic Croston, which searches for the optimal ``alpha`` smoothing parameter and can take longer to run. Otherwise, a fixed value of ``alpha=0.1`` is used. - "sba" corresponds to the adjustment of the Croston method known as the Syntetos-Boylan Approximation [1]_. - "tsb" corresponds to the adjustment of the Croston method proposed by Teunter, Syntetos and Babai [2]_. In this case, `alpha_d` and `alpha_p` must be set. alpha_d For the "tsb" version, the alpha smoothing parameter to apply on demand. alpha_p For the "tsb" version, the alpha smoothing parameter to apply on probability. References ---------- .. [1] Aris A. Syntetos and John E. Boylan. The accuracy of intermittent demand estimates. International Journal of Forecasting, 21(2):303 – 314, 2005. .. [2] Ruud H. Teunter, Aris A. Syntetos, and M. Zied Babai. Intermittent demand: Linking forecasting to inventory obsolescence. European Journal of Operational Research, 214(3):606 – 615, 2011. """ super().__init__() raise_if_not( version.lower() in ["classic", "optimized", "sba", "tsb"], 'The provided "version" parameter must be set to "classic", "optimized", "sba" or "tsb".', ) if version == "classic": self.method = croston_classic elif version == "optimized": self.method = croston_optimized elif version == "sba": self.method = croston_sba else: raise_if( alpha_d is None or alpha_p is None, 'alpha_d and alpha_p must be specified when using "tsb".', ) self.method = croston_tsb self.alpha_d = alpha_d self.alpha_p = alpha_p self.version = version
def fit(self, series: TimeSeries): super().fit(series) raise_if_not( len(series) >= self.K, f"The time series requires at least K={self.K} points", logger, ) self.last_k_vals = series.univariate_values()[-self.K:] return self
def _check_sizes(tup, name): raise_if_not( len(tup) == num_stacks, f"the length of {name} must match the number of stacks.", ) raise_if_not( all([len(i) == num_blocks for i in tup]), f"the length of each tuple in {name} must be `num_blocks={num_blocks}`", )
def inverse_transform(self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs) -> Union[TimeSeries, List[TimeSeries]]: """Inverse-transform a (sequence of) series. In case a sequence is passed as input data, this function takes care of parallelising the transformation of multiple series in the sequence at the same time. Parameters ---------- series the (sequence of) series be inverse-transformed. args Additional positional arguments for the :func:`ts_inverse_transform()` method kwargs Additional keyword arguments for the :func:`ts_inverse_transform()` method component_mask : Optional[np.ndarray] = None Optionally, a 1-D boolean np.ndarray of length ``series.n_components`` that specifies which components of the underlying `series` the Scaler should consider. Returns ------- Union[TimeSeries, List[TimeSeries]] Inverse transformed data. """ if hasattr(self, "_fit_called"): raise_if_not( self._fit_called, "fit() must have been called before inverse_transform()", logger, ) desc = f"Inverse ({self._name})" if isinstance(series, TimeSeries): data = [series] else: data = series input_iterator = _build_tqdm_iterator( self._inverse_transform_iterator(data), verbose=self._verbose, desc=desc, total=len(data), ) transformed_data = _parallel_apply( input_iterator, self.__class__.ts_inverse_transform, self._n_jobs, args, kwargs, ) return (transformed_data[0] if isinstance(series, TimeSeries) else transformed_data)
def __init__( self, theta: int = 2, seasonality_period: Optional[int] = None, season_mode: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE, ): """ An implementation of the Theta method with configurable `theta` parameter. See [1]_. The training time series is de-seasonalized according to `seasonality_period`, or an inferred seasonality period. `season_mode` must be a ``SeasonalityMode`` Enum member. You can access the Enum with ``from darts import SeasonalityMode``. Parameters ---------- theta Value of the theta parameter. Defaults to 2. Cannot be set to 0. If `theta = 1`, then the theta method restricts to a simple exponential smoothing (SES) seasonality_period User-defined seasonality period. If not set, will be tentatively inferred from the training series upon calling :func:`fit()`. season_mode Type of seasonality. Either ``SeasonalityMode.MULTIPLICATIVE``, ``SeasonalityMode.ADDITIVE`` or ``SeasonalityMode.NONE``. Defaults to ``SeasonalityMode.MULTIPLICATIVE``. References ---------- .. [1] `Unmasking the Theta method <https://robjhyndman.com/papers/Theta.pdf` """ super().__init__() self.model = None self.coef = 1 self.alpha = 1 self.length = 0 self.theta = theta self.is_seasonal = False self.seasonality = None self.seasonality_period = seasonality_period self.season_period = None self.season_mode = season_mode raise_if_not( season_mode in SeasonalityMode, f"Unknown value for season_mode: {season_mode}.", logger, ) if self.theta == 0: raise_log(ValueError("The parameter theta cannot be equal to 0."), logger)
def set_n_jobs(self, value: int): """Set the number of processors to be used by the transformer while processing multiple ``TimeSeries``. Parameters ---------- value New n_jobs value. Set to `-1` for using all the available cores. """ raise_if_not(isinstance(value, int), "n_jobs must be an integer") self._n_jobs = value
def init_size(self, n: int, m: int): self.n = n self.m = m max_slope = self.max_slope diagonal_slope = m / n # rise over run raise_if_not( max_slope > diagonal_slope, f"Itakura slope {max_slope} must be greater than {diagonal_slope} to form valid parallelogram.", ) max_slope_angle = atan(max_slope) diagonal_slope_angle = atan(diagonal_slope) diff_slope_angle = max_slope_angle - diagonal_slope_angle min_slope = tan(diagonal_slope_angle - diff_slope_angle) # Derivation for determining how wide the steep top sides (A) and shallow bottom (D) are # max_slope*x + (n-x)*min_slope = m # max_slope*x + n*min_slope - min_slope*x = m # (max_slope - min_slope)*x = m - n*min_slope # x = (m - n*min_slope) / (max_slope - min_slope) ranges = np.zeros((self.n, 2), dtype=float) shallow_bottom = int( np.round((m - n * max_slope) / (min_slope - max_slope)) + 1) ranges[:shallow_bottom, 0] = np.arange(shallow_bottom) ranges[shallow_bottom:, 0] = np.arange(n - shallow_bottom) + 1 ranges[:shallow_bottom, 0] *= min_slope ranges[shallow_bottom:, 0] *= max_slope ranges[shallow_bottom:, 0] += ranges[shallow_bottom - 1, 0] steep_top = int(np.round( (m - n * min_slope) / (max_slope - min_slope))) ranges[:steep_top, 1] = np.arange(steep_top) + 1 ranges[steep_top:, 1] = np.arange(n - steep_top) + 1 ranges[:steep_top:, 1] *= max_slope ranges[steep_top:, 1] *= min_slope ranges[steep_top:, 1] += ranges[steep_top - 1, 1] np.floor(ranges[:, 0], out=ranges[:, 0]) np.ceil(ranges[:, 1], out=ranges[:, 1]) ranges = np.maximum([0, 1], ranges) ranges = np.minimum([self.m - 1, self.m], ranges) ranges = ranges.astype(int) ranges[0][0] = 0 super().__init__(n, m, ranges)
def remove_seasonality( ts: TimeSeries, freq: int = None, model: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE, method: str = "naive", **kwargs, ) -> TimeSeries: """ Adjusts the TimeSeries `ts` for a seasonality of order `frequency` using the `model` decomposition. Parameters ---------- ts The TimeSeries to adjust. freq The seasonality period to use. model The type of decomposition to use. Must be a `from darts import SeasonalityMode` Enum member. Either SeasonalityMode.MULTIPLICATIVE or SeasonalityMode.ADDITIVE. Defaults SeasonalityMode.MULTIPLICATIVE. method The method to be used to decompose the series. - "naive" : Seasonal decomposition using moving averages [1]_. - "STL" : Season-Trend decomposition using LOESS [2]_. Only compatible with ``ADDITIVE`` model type. Defaults to "naive" kwargs Other keyword arguments are passed down to the decomposition method. Returns ------- TimeSeries A new TimeSeries instance that corresponds to the seasonality-adjusted 'ts'. References ------- .. [1] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.seasonal_decompose.html .. [2] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.STL.html """ ts._assert_univariate() raise_if_not( model is not SeasonalityMode.NONE, "The model must be either MULTIPLICATIVE or ADDITIVE.", ) raise_if( model not in [SeasonalityMode.ADDITIVE, ModelMode.ADDITIVE] and method == "STL", f"Only ADDITIVE seasonality is compatible with the STL method. Current model is {model}.", logger, ) _, seasonality = extract_trend_and_seasonality(ts, freq, model, method, **kwargs) new_ts = remove_from_series(ts, seasonality, model) return new_ts
def __init__( self, target_series: Union[TimeSeries, Sequence[TimeSeries]], covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, n: int = 1, input_chunk_length: int = 12, output_chunk_length: int = 1, covariate_type: CovariateType = CovariateType.PAST, ): """ Contains (past_target, past_covariates | historic_future_covariates, future_past_covariates | future_covariate, static_covariates). "future_past_covariates" are past covariates that happen to be also known in the future - those are needed for forecasting with n > output_chunk_length by any model relying on past covariates. For this reason, when n > output_chunk_length, this dataset will also emmit the "future past_covariates". "historic_future_covariates" are historic future covariates that are given for the input_chunk in the past. Parameters ---------- target_series The target series that are to be predicted into the future. covariates Optionally, one or a sequence of `TimeSeries` containing either past or future covariates. If covariates were used during training, the same type of cavariates must be supplied at prediction. n Forecast horizon: The number of time steps to predict after the end of the target series. input_chunk_length The length of the target series the model takes as input. output_chunk_length The length of the target series the model emits in output. """ super().__init__() self.target_series = ( [target_series] if isinstance(target_series, TimeSeries) else target_series ) self.covariates = ( [covariates] if isinstance(covariates, TimeSeries) else covariates ) self.covariate_type = covariate_type self.n = n self.input_chunk_length = input_chunk_length self.output_chunk_length = output_chunk_length raise_if_not( (covariates is None or len(self.target_series) == len(self.covariates)), "The number of target series must be equal to the number of covariates.", )
def fit(self, series: TimeSeries) -> None: """ Trains the model on the provided series Parameters ---------- series A target time series. The model will be trained to forecast this time series. """ raise_if_not( len(series) >= self.min_train_series_length, "Train series only contains {} elements but {} model requires at least {} entries" .format(len(series), str(self), self.min_train_series_length)) self.training_series = series self._fit_called = True
def _reshape_out( series: TimeSeries, vals: np.ndarray, component_mask: Optional[np.ndarray] = None, ) -> np.ndarray: """Reshapes the 2-D matrix coming out of a transformer into a 3-D matrix suitable to build a TimeSeries. The output is a 3-D matrix, built by taking each column of the 2-D matrix (the flattened components) and reshaping them to (len(series), n_samples), then stacking them on 2nd axis. Parameters ---------- series input TimeSeries that was fed into transformer. vals: transformer output component_mask Optionally, np.ndarray boolean mask of shape (n_components, 1) specifying which components were extracted from `series`. If given, insert `vals` back into the columns of the original array. """ raise_if_not( component_mask is None or isinstance(component_mask, np.ndarray) and component_mask.dtype == bool, "If `component_mask` is given, must be a boolean np.ndarray`", logger, ) series_width = series.width if component_mask is None else component_mask.sum( ) reshaped = np.stack( [ vals[:, i].reshape(-1, series.n_samples) for i in range(series_width) ], axis=1, ) if component_mask is None: return reshaped raise_if_not( series.width == len(component_mask), "mismatch between number of components in `series` and length of `component_mask`", logger, ) series_vals = series.all_values(copy=True) series_vals[:, component_mask, :] = reshaped return series_vals
def __init__(self, sample_freq: str = "hourly", multivariate: bool = True): """ Parameters ---------- sample_freq: str The sampling frequency of the data. Can be "hourly" or "daily". Default is "hourly". multivariate: bool Whether to return a single multivariate timeseries - if False returns a list of univariate TimeSeries. Default is True. """ valid_sample_freq = ["daily", "hourly"] raise_if_not( sample_freq in valid_sample_freq, f"sample_freq must be one of {valid_sample_freq}", logger, ) def pre_proces_fn(extracted_dir, dataset_path): df = pd.read_csv( Path(extracted_dir, "uber-raw-data-janjune-15.csv"), header=0, usecols=["Pickup_date", "locationID"], index_col=0, ) output_dict = {} freq_setting = "1H" if "hourly" in str(dataset_path) else "1D" time_series_of_locations = list(df.groupby(by="locationID")) for locationID, df in time_series_of_locations: df.sort_index() df.index = pd.to_datetime(df.index) count_series = df.resample(rule=freq_setting).size() output_dict[locationID] = count_series output_df = pd.DataFrame(output_dict) output_df.to_csv(dataset_path, line_terminator="\n") super().__init__(metadata=DatasetLoaderMetadata( f"uber_tlc_{sample_freq}.csv", uri="https://github.com/fivethirtyeight/uber-tlc-foil-response/raw/" "63bb878b76f47f69b4527d50af57aac26dead983/" "uber-trip-data/uber-raw-data-janjune-15.csv.zip", hash="9ed84ebe0df4bc664748724b633b3fe6" if sample_freq == "hourly" else "24f9fd67e4b9e53f0214a90268cd9bee", header_time="Pickup_date", format_time="%Y-%m-%d %H:%M", pre_process_zipped_csv_fn=pre_proces_fn, multivariate=multivariate, ))
def set_verbose(self, value: bool): """ Setter for the verbosity status. `True` for enabling the detailed report about scaler's operation progress, `False` for no additional information Parameters ---------- value New verbosity status """ raise_if_not(isinstance(value, bool), "Verbosity status must be a boolean.") self._verbose = value
def test_raise_if_not(self): exception_was_raised = False with LogCapture() as lc: logger = get_logger(__name__) logger.handlers = [] try: raise_if_not(True, "test", logger) raise_if_not(False, "test", logger) except Exception: exception_was_raised = True # testing correct log message lc.check((__name__, "ERROR", "ValueError: test")) # checking whether exception was properly raised self.assertTrue(exception_was_raised)
def __init__(self, transformers: Sequence[BaseDataTransformer], copy: bool = False, verbose: bool = None, n_jobs: int = None): """ Pipeline combines multiple data transformers chaining them together. Parameters ---------- transformers Sequence of data transformers. copy If set makes a (deep) copy of each data transformer before adding them to the pipeline n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a `Sequence[TimeSeries]` is passed as input to a method, parallelising operations regarding different `TimeSeries`. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. Note: for a small amount of data, the parallelisation overhead could end up increasing the total required amount of time. Note: this parameter will overwrite the value set in each single transformer. Leave this parameter set to `None` for keeping the original transformers' configurations. verbose Whether to print progress of the operations. Note: this parameter will overwrite the value set in each single transformer. Leave this parameter set to `None` for keeping the transformers configurations. """ raise_if_not(all((isinstance(t, BaseDataTransformer)) for t in transformers), "transformers should be objects deriving from BaseDataTransformer", logger) if transformers is None or len(transformers) == 0: logger.warning("Empty pipeline created") self._transformers: Sequence[BaseDataTransformer[TimeSeries]] = [] elif copy: self._transformers = deepcopy(transformers) else: self._transformers = transformers self._invertible = all((isinstance(t, InvertibleDataTransformer) for t in self._transformers)) if verbose is not None: for transformer in self._transformers: transformer.set_verbose(verbose) if n_jobs is not None: for transformer in self._transformers: transformer.set_n_jobs(n_jobs)
def _prepare_pooling_downsampling(pooling_kernel_sizes, n_freq_downsample, in_len, out_len, num_blocks, num_stacks): def _check_sizes(tup, name): raise_if_not( len(tup) == num_stacks, f"the length of {name} must match the number of stacks.", ) raise_if_not( all([len(i) == num_blocks for i in tup]), f"the length of each tuple in {name} must be `num_blocks={num_blocks}`", ) if pooling_kernel_sizes is None: # make stacks handle different frequencies # go from in_len/2 to 1 in num_stacks steps: max_v = max(in_len // 2, 1) pooling_kernel_sizes = tuple( (int(v), ) * num_blocks for v in max_v // np.geomspace(1, max_v, num_stacks)) logger.info( f"(N-HiTS): Using automatic kernel pooling size: {pooling_kernel_sizes}." ) else: # check provided pooling format _check_sizes(pooling_kernel_sizes, "`pooling_kernel_sizes`") if n_freq_downsample is None: # go from out_len/2 to 1 in num_stacks steps: max_v = max(out_len // 2, 1) n_freq_downsample = tuple( (int(v), ) * num_blocks for v in max_v // np.geomspace(1, max_v, num_stacks)) logger.info( f"(N-HiTS): Using automatic downsampling coefficients: {n_freq_downsample}." ) else: # check provided downsample format _check_sizes(n_freq_downsample, "`n_freq_downsample`") # check that last value is 1 raise_if_not( n_freq_downsample[-1][-1] == 1, "the downsampling coefficient of the last block of the last stack must be 1 " + "(i.e., `n_freq_downsample[-1][-1]`).", ) return pooling_kernel_sizes, n_freq_downsample
def __init__( self, forecasting_models: Union[List[ForecastingModel], List[GlobalForecastingModel]], regression_train_n_points: int, regression_model=None, ): """ Use a regression model for ensembling individual models' predictions. The provided regression model must implement ``fit()`` and ``predict()`` methods (e.g. scikit-learn regression models). Note that here the regression model is used to learn how to best ensemble the individual forecasting models' forecasts. It is not the same usage of regression as in :class:`RegressionModel`, where the regression model is used to produce forecasts based on the lagged series. Parameters ---------- forecasting_models List of forecasting models whose predictions to ensemble regression_train_n_points The number of points to use to train the regression model regression_model Any regression model with ``predict()`` and ``fit()`` methods (e.g. from scikit-learn) Default: ``darts.model.LinearRegressionModel(fit_intercept=False)`` """ super().__init__(forecasting_models) if regression_model is None: regression_model = LinearRegressionModel( lags=None, lags_future_covariates=[0], fit_intercept=False) elif isinstance(regression_model, RegressionModel): regression_model = regression_model else: # scikit-learn like model regression_model = RegressionModel(lags_future_covariates=[0], model=regression_model) # check lags of the regression model raise_if_not( regression_model.lags == {"future": [0]}, f"`lags` and `lags_past_covariates` of regression model must be `None`" f"and `lags_future_covariates` must be [0]. Given:\n" f"{regression_model.lags}", ) self.regression_model = regression_model self.train_n_points = regression_train_n_points
def filter(self, series: TimeSeries) -> TimeSeries: """Filters a given series Parameters ---------- series The series to filter. Returns ------- TimeSeries A time series containing the filtered values. """ raise_if_not( series.is_deterministic, "The input series must be deterministic (observations).", )
def __init__(self, n: int, m: int, ranges: np.ndarray = None): """ Parameters ---------- n The width of the window, must be equal to the length of series1 m The height of the window, must be equal to the length of series2 ranges Ranges of active cells within a column [[start_column0, end_column0], ...] with shape (n, 2) and where start >= 0 and end <= m. """ self.n = n self.m = m if ranges is not None: raise_if_not( ranges.shape == (n, 2), f"Expects a 2d array with [start, end] for each column and shape = ({n}, 2)", ) ranges = np.insert(ranges, 0, [0, 1], axis=0) start = ranges[:, 0] end = ranges[:, 1] raise_if(np.any(start < 0), "Start must be >=0") raise_if(np.any(end > m), "End must be <m") diff = np.maximum(end - start, 0) self.length = np.sum(diff) ranges[1:] += 1 ranges = ranges.flatten() else: ranges = np.zeros((n + 1) * 2, dtype=int) ranges[0::2] = self.m # start ranges[1::2] = 0 # end ranges = array.array("i", ranges) ranges[0] = 0 ranges[1] = 1 self.length = 1 self.column_ranges = array.array("i", ranges)
def init_size(self, n: int, m: int): self.n = n self.m = m diff = abs(n - m) raise_if_not( diff < self.window_size, f"Window size must at least cover size difference ({diff})", ) ranges = np.repeat(np.arange(n), 2) ranges[0::2] -= (self.window_size, ) ranges[1::2] += self.window_size ranges[0::2] = np.maximum(0, ranges[0::2]) ranges[1::2] = np.minimum(self.m, ranges[1::2] + 1) ranges = np.reshape(ranges, (-1, 2)) super().__init__(n, m, ranges)