Beispiel #1
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """
        Sample the given time series x by the lag.

        :param x: the input
        :type x: xr.DataArray
        :return: A shifted time series.
        :rtype: xr.DataArray
        """
        indexes = self.indexes
        if not indexes:
            indexes = _get_time_indeces(x)
        try:
            result = x
            for i in range(1, self.sample_size):
                result = xr.concat([
                    result,
                    x.shift({index: i
                             for index in indexes}, fill_value=0)
                ],
                                   dim="horizon")
        except ValueError:
            raise WrongParameterException(
                f"Not all indexes ({indexes}) are in the indexes of x ({list(x.indexes.keys())}).",
                "Perhaps you set the wrong indexes with set_params or during the initialization of the Sampler.",
                module=self.name)
        result = result.transpose(_get_time_indeces(x)[0], "horizon", ...)

        return result
Beispiel #2
0
    def transform(self, **x: xr.DataArray) -> xr.DataArray:
        """
        Transform the input into output, by performing all the step in this pipeline.
        Moreover, this method collects the results of the last steps in this pipeline.

        Note, this method is necessary for enabling subpipelining.

        :param x: The input data
        :type x: xr.DataArray
        :return:The transformed data
        :rtype: xr.DataArray
        """
        for key, (start_step, _) in self.start_steps.items():
            start_step.buffer = {key: x[key].copy()}
            start_step.finished = True

        time_index = _get_time_indeces(x)
        self.counter = list(x.values())[0].indexes[time_index[0]][
            0]  # The start date of the input time series.

        last_steps = list(filter(lambda x: x.last, self.id_to_step.values()))

        if not self.batch:
            return self._collect_results(last_steps)
        return self._collect_batches(last_steps, time_index)
Beispiel #3
0
 def _pack_data(self, start, end, buffer_element=None, return_all=False):
     # Provide requested data
     time_index = _get_time_indeces(self.buffer)
     if end and start and end > start:
         index = list(self.buffer.values())[0].indexes[time_index[0]]
         start = max(index[0], start.to_numpy())
         if buffer_element is not None:
             return self.buffer.copy()[buffer_element].sel(
                 **{
                     time_index[0]:
                     index[(index >= start) & (index < end.to_numpy())]
                 })
         elif return_all:
             return {
                 key: b.copy().sel(
                     **{
                         time_index[0]:
                         index[(index >= start) & (index < end.to_numpy())]
                     })
                 for key, b in self.buffer.items()
             }
         else:
             return list(self.buffer.copy().values())[0].sel(
                 **{
                     time_index[0]:
                     index[(index >= start) & (index < end.to_numpy())]
                 })
     else:
         self.finished = True
         if buffer_element is not None:
             return self.buffer[buffer_element].copy()
         elif return_all:
             return self.buffer.copy()
         else:
             return list(self.buffer.copy().values())[0]
    def transform(self, **kwargs: xr.DataArray) -> xr.DataArray:
        """
        Predicts the result with the wrapped statsmodels module

        :param kwargs: A dict of input arrays
        :type kwargs: xr.DataArray
        :return: the transformed dataarray
        :rtype: xr.DataArray
        """
        time_data = list(kwargs.values())[0][_get_time_indeces(kwargs)[0]]

        x = []
        for key, value in kwargs.items():
            x.append(value.values)

        if hasattr(self.model, "forecast"):
            if "exog" in inspect.signature(
                    self.model.forecast
            ).parameters or "kwargs" in inspect.signature(
                    self.model.forecast).parameters:
                prediction = \
                    self.model.forecast(len(time_data), exog=np.concatenate(x, axis=-1), **self.predict_kwargs)[0]

            else:
                prediction = self.model.forecast(len(time_data),
                                                 **self.predict_kwargs)[0]
        else:
            raise Exception(
                f"{self.module.__class__.__name__} has not forecast method...")

        return numpy_to_xarray(prediction, list(kwargs.values())[0], self.name)
Beispiel #5
0
    def transform(self, y: xr.DataArray,
                  **kwargs: xr.DataArray) -> xr.DataArray:
        """
        Calculates the RMSE based on the predefined target and predictions variables.

        :param x: the input dataset
        :type x: Optional[xr.DataArray]

        :return: The calculated RMSE
        :rtype: xr.DataArray
        """
        t = y.values
        rmse = []
        predictions = []
        if kwargs == {}:
            logger.error(
                "No predictions are provided as input for the RMSE Calculator. "
                "You should add the predictions by a seperate key word arguments if you add the RMSECalculator "
                "to the pipeline.")
            raise InputNotAvailable(
                "No predictions are provided as input for the RMSE Calculator. "
                "You should add the predictions by a seperate key word arguments if you add the RMSECalculator "
                "to the pipeline.")

        for key, y_hat in kwargs.items():
            p = y_hat.values
            predictions.append(key)
            if self.rolling:
                time = y[_get_time_indeces(y)[0]][self.offset:]
                p_, t_ = p.reshape((len(p), -1)), t.reshape((len(t), -1))
                _rmse = pd.DataFrame(
                    (p_[self.offset:] - t_[self.offset:])**2).rolling(
                        self.window).apply(
                            lambda x: np.sqrt(np.mean(x))).values
            else:
                time = [y.indexes[_get_time_indeces(y)[0]][-1]]
                _rmse = [
                    np.sqrt(np.mean((p[self.offset:] - t[self.offset:])**2))
                ]
            rmse.append(_rmse)
        return xr.DataArray(np.stack(rmse).swapaxes(0, 1).reshape(
            (-1, len(predictions))),
                            coords={
                                "time": time,
                                "predictions": predictions
                            },
                            dims=["time", "predictions"])
Beispiel #6
0
    def further_elements(self, counter):
        """
        Checks if there exist at least one data for the time after counter.

        :param counter: The timestamp for which it should be tested if there exist further data after it.
        :type counter: pd.Timestamp
        :return: True if there exist further data
        :rtype: bool
        """
        indeces = _get_time_indeces(self.buffer)
        if len(indeces) == 0 or not all([
                counter < b.indexes[_get_time_indeces(self.buffer)[0]][-1]
                for b in self.buffer.values()
        ]):
            return False
        else:
            return True
Beispiel #7
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """ Add n-th order differentiate to xarray dataset.

        :param x: Xarray dataset to apply differentiation on.
        :type x: xr.DataArray
        :return: Xarray dataset containing the n-th order differentiations.
        :rtype: xr.DataArray
        """
        # check parameters for non list types and make it a list
        if isinstance(self.n, int):
            ns = [self.n]
        else:
            ns = self.n

        if self.target_index is None:
            idxs = _get_time_indeces(x)
        elif isinstance(self.target_index, str):
            idxs = [self.target_index]
        else:
            idxs = self.target_index

        # check if idxs are valid idxs of the dataset
        for idx in idxs:
            if idx not in x:
                raise WrongParameterException(
                    f"Index {idx} not in dataset!",
                    "Assert that the previous modules provide the correct index.",
                    module=self.name)

        # iterate over xarray indizes and n-th orders
        # and apply the differentiation on xarray dataset
        for idx in idxs:
            for n in ns:
                diff = np.diff(x[idx], n=n, axis=self.axis)

                # dims needed for multidim DataArray initialization
                # otherwise will lead to conflicts when dim_0 already set
                dims = list(x[idx].dims)

                if self.pad:
                    # pad if padding is enabled by using np.pad
                    # and correct padding widths for dimensions
                    original_size = x[idx].shape[self.axis]
                    pad_width = [(0, 0) for _ in range(len(diff.shape))]
                    pad_width[self.axis] = (original_size -
                                            diff.shape[self.axis], 0)
                    diff = np.pad(diff, pad_width=pad_width, **self.pad_args)
                else:
                    # if differentiate is not padded the dims of the differences aren't
                    # the same as before. So, we need to change dim[axis] name.
                    dims[self.axis] = f"{dims[self.axis]}_d{n}"

                # finally, add difference to xarray dataset
                x[f"{idx}_d{n}"] = xr.DataArray(diff, dims=dims)

        return x
Beispiel #8
0
    def _post_transform(self, result):
        if isinstance(result, dict) and len(result) <= 1:
            result = {self.name: list(result.values())[0]}
        elif not isinstance(result, dict):
            result = {self.name: result}

        if not self.buffer:
            self.buffer = result
        else:
            # Time dimension is mandatory, consequently there dim has to exist
            dim = _get_time_indeces(result)[0]
            for key in self.buffer.keys():
                self.buffer[key] = xr.concat([self.buffer[key], result[key]],
                                             dim=dim)
Beispiel #9
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """
        Extract trend values

        :param x: input xarray DataArray
        :type x: xr.DataArray
        :return: a dataset containing the trend information
        :rtype: xr.DataArray
        """
        indexes = self.indexes
        if not indexes:
            indexes = _get_time_indeces(x)
        trend = x.shift({index: self.period
                         for index in indexes},
                        fill_value=0)
        for i in range(2, self.length + 1):
            trend = xr.concat([
                trend,
                x.shift({index: self.period * i
                         for index in indexes},
                        fill_value=0)
            ],
                              dim="length")
        return trend.transpose(_get_time_indeces(x)[0], "length", ...)
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """ Add date features to xarray dataset as configured.

        :param x: Xarray dataset containing a timeseries specified by the object's 'time_index'
        :return: The xarray dataset with date features added.
        """

        time_index = _get_time_indeces(x)[0]
        data = [
            self._encode(feature, x[time_index].to_series())
            for feature in self.features
        ]
        return xr.DataArray(
            np.array(data).swapaxes(0, 1),
            coords=[getattr(x, time_index),
                    self.features.copy()],
            dims=[time_index, "features"])
Beispiel #11
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """
        Transforms the time series in a time series that indicates whether the next value is higher, lower, or the same

        :param x: The time series that should be transformed
        :type x: xr.DataArray, optional
        :return: A time series, where 1 indicates that the next value is higher, -1 that the next value
        is lower, and 0 that the next value is the same
        :rtype: xr.DataArray
        :raises WrongParameterException: If not all indices are part of x
        """
        indices = _get_time_indeces(x)
        try:
            return xr.ufuncs.sign(x - x.shift({index: 1 for index in indices}))
        except ValueError:
            raise WrongParameterException(
                f"Not all indices ({indices}) are in the indices of x ({list(x.indices.keys())}).",
                "Either correct the indices which you passed to that module or assert that this index occurs in the "
                "data which are passed by the previous modules to the current one.",
                module=self.name)
Beispiel #12
0
    def further_elements(self, counter: pd.Timestamp) -> bool:
        """
        Checks if there exist at least one data for the time after counter.

        :param counter: The timestampe for which it should be tested if there exist further data after it.
        :type counter: pd.Timestamp
        :return: True if there exist further data
        :rtype: bool
        """
        if not self.buffer or all([
                counter < b.indexes[_get_time_indeces(self.buffer)[0]][-1]
                for b in self.buffer.values()
        ]):
            return True
        for input_step in self.input_steps.values():
            if not input_step.further_elements(counter):
                return False
        for target_step in self.targets.values():
            if not target_step.further_elements(counter):
                return False
        return True
Beispiel #13
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """
        Shifts the given time series x by the defined lag

        :param x: the time series to be shifted
        :type x: xr.DataArray
        :return: The shifted time series
        :rtype: xr.DataArray
        :raises WrongParameterException: If not all indices are part of x
        """
        indices = self.indices
        if not indices:
            indices = _get_time_indeces(x)
        try:
            return x.shift({index: self.lag
                            for index in indices},
                           fill_value=0)
        except ValueError:
            raise WrongParameterException(
                f"Not all indices ({indices}) are in the indices of x ({list(x.indexes.keys())}).",
                "Perhaps you set the wrong indices with set_params or during the initialization of the ClockShift.",
                module=self.name)
Beispiel #14
0
    def transform(self, y: xr.DataArray,
                  **kwargs: xr.DataArray) -> xr.DataArray:
        """
        Calculates the RMSE based on the predefined target and predictions variables.

        :param x: the input dataset
        :type x: Optional[xr.DataArray]

        :return: The calculated RMSE
        :rtype: xr.DataArray
        """
        t = y.values
        rmse = []
        predictions = []
        if kwargs == {}:
            logger.error(
                "No predictions are provided as input for the RMSE Calculator. "
                "You should add the predictions by a seperate key word arguments if you add the RMSECalculator "
                "to the pipeline.")
            raise InputNotAvailable(
                "No predictions are provided as input for the RMSE Calculator. "
                "You should add the predictions by a seperate key word arguments if you add the RMSECalculator "
                "to the pipeline.")

        for key, y_hat in kwargs.items():
            p = y_hat.values
            predictions.append(key)
            rmse.append(
                np.sqrt(np.mean((p[self.offset:] - t[self.offset:])**2)))

        time = y.indexes[_get_time_indeces(y)[0]][-1]
        return xr.DataArray(np.array([rmse]),
                            coords={
                                "time": [time],
                                "predictions": predictions
                            },
                            dims=["time", "predictions"])