def transform(self, y: xr.DataArray,
                  **kwargs: xr.DataArray) -> xr.DataArray:
        """
        Calculates the MAE based on the predefined target and predictions variables.

        :param x: the input dataset
        :type x: Optional[xr.DataArray]

        :return: The calculated MAE
        :rtype: xr.DataArray
        """

        if kwargs == {}:
            error_message = f"No predictions are provided as input for the {self.__class__.__name__}. You should add the predictions" \
                            f" by a seperate key word arguments if you add the {self.__class__.__name__} to the pipeline."
            self.logger.error(error_message)
            raise InputNotAvailable(error_message)

        t = y.values
        results = {}
        for key, y_hat in kwargs.items():
            p = y_hat.values
            p_, t_ = p.reshape((len(p), -1)), t.reshape((len(t), -1))
            index = y.indexes[_get_time_indexes(y)[0]]
            results[key] = self._apply_rolling_metric(p_, t_, index)
        time = y.indexes[_get_time_indexes(y)[0]]

        return xr.DataArray(np.concatenate(list(results.values()), axis=1),
                            coords={
                                _get_time_indexes(y)[0]: time,
                                "predictions": list(results.keys())
                            },
                            dims=[_get_time_indexes(y)[0], "predictions"])
Exemple #2
0
    def transform(self, **x: xr.DataArray) -> xr.DataArray:
        """
        Transform the input into output, by performing all the step in this pipeline.
        Moreover, this method collects the results of the last steps in this pipeline.

        Note, this method is necessary for enabling subpipelining.

        :param x: The input data
        :type x: xr.DataArray
        :return:The transformed data
        :rtype: xr.DataArray
        """
        for key, (start_step, _) in self.start_steps.items():
            start_step.buffer = {key: x[key].copy()}
            start_step.finished = True

        time_index = _get_time_indexes(x)
        self.counter = list(x.values())[0].indexes[time_index[0]][
            0]  # The start date of the input time series.

        last_steps = list(filter(lambda x: x.last, self.id_to_step.values()))

        if not self.batch:
            return self._collect_results(last_steps)
        return self._collect_batches(last_steps, time_index)
Exemple #3
0
 def _sklearn_output_to_dataset(kwargs: xr.DataArray, prediction,
                                targets: List[Tuple[str, int]]):
     reference = kwargs[list(kwargs)[0]]
     time_index = reference.indexes[_get_time_indexes(reference)[0]]
     if len(targets) == 0:
         coords = (
             # first dimension is number of batches. We assume that this is the time.
             ("time", time_index.values),
             *[(f"dim_{j}", list(range(size)))
               for j, size in enumerate(prediction.shape[1:])])
         result = xr.DataArray(prediction, coords=coords)
     else:
         result = {}
         position = 0
         prediction = prediction.reshape(
             len(list(reference.coords.values())[0]), -1)
         for i, target in enumerate(targets):
             result[target[0]] = xr.DataArray(
                 prediction[:, position:position + target[1]],
                 coords={
                     "time": time_index.values,
                     "dim_0": list(range(target[1]))
                 },
                 dims=["time", "dim_0"])
             position += target[1]
     return result
Exemple #4
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """
        Extract trend values

        :param x: input xarray DataArray
        :type x: xr.DataArray
        :return: a dataset containing the trend information
        :rtype: xr.DataArray
        """
        indexes = self.indexes
        if not indexes:
            indexes = _get_time_indexes(x)
        trends = [x.shift({index: self.period * i for index in indexes}, fill_value=0) for i in
                  range(1, self.length + 1)]
        trend = xr.DataArray(np.stack(trends, axis=-1), dims=(*x.dims, "length"), coords=x.coords)
        return trend.transpose(_get_time_indexes(x)[0], "length", ...)
Exemple #5
0
    def further_elements(self, counter):
        """
        Checks if there exist at least one data for the time after counter.

        :param counter: The timestamp for which it should be tested if there exist further data after it.
        :type counter: pd.Timestamp
        :return: True if there exist further data
        :rtype: bool
        """
        indexes = _get_time_indexes(self.buffer)
        if len(indexes) == 0 or not all([
                counter < b.indexes[_get_time_indexes(self.buffer)[0]][-1]
                for b in self.buffer.values()
        ]):
            return False
        else:
            return True
Exemple #6
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """ Add n-th order differentiate to xarray dataset.

        :param x: Xarray dataset to apply differentiation on.
        :type x: xr.DataArray
        :return: Xarray dataset containing the n-th order differentiations.
        :rtype: xr.DataArray
        """
        # check parameters for non list types and make it a list
        if isinstance(self.n, int):
            ns = [self.n]
        else:
            ns = self.n

        if self.target_index is None:
            idxs = _get_time_indexes(x)
        elif isinstance(self.target_index, str):
            idxs = [self.target_index]
        else:
            idxs = self.target_index

        # check if idxs are valid idxs of the dataset
        for idx in idxs:
            if idx not in x:
                raise WrongParameterException(
                    f"Index {idx} not in dataset!",
                    "Assert that the previous modules provide the correct index.",
                    module=self.name)

        # iterate over xarray indizes and n-th orders
        # and apply the differentiation on xarray dataset
        for idx in idxs:
            for n in ns:
                diff = np.diff(x[idx], n=n, axis=self.axis)

                # dims needed for multidim DataArray initialization
                # otherwise will lead to conflicts when dim_0 already set
                dims = list(x[idx].dims)

                if self.pad:
                    # pad if padding is enabled by using np.pad
                    # and correct padding widths for dimensions
                    original_size = x[idx].shape[self.axis]
                    pad_width = [(0, 0) for _ in range(len(diff.shape))]
                    pad_width[self.axis] = (original_size -
                                            diff.shape[self.axis], 0)
                    diff = np.pad(diff, pad_width=pad_width, **self.pad_args)
                else:
                    # if differentiate is not padded the dims of the differences aren't
                    # the same as before. So, we need to change dim[axis] name.
                    dims[self.axis] = f"{dims[self.axis]}_d{n}"

                # finally, add difference to xarray dataset
                x[f"{idx}_d{n}"] = xr.DataArray(diff, dims=dims)

        return x
Exemple #7
0
    def transform(self, y: xr.DataArray, **kwargs: xr.DataArray) -> xr.DataArray:
        """
        Calculates the RMSE based on the predefined target and predictions variables.

        :param x: the input dataset
        :type x: Optional[xr.DataArray]

        :return: The calculated RMSE
        :rtype: xr.DataArray
        """
        t = y.values
        rmse = []
        predictions = []
        if kwargs == {}:
            logger.error("No predictions are provided as input for the RMSE Calculator. "
                         "You should add the predictions by a seperate key word arguments if you add the RMSECalculator "
                         "to the pipeline.")
            raise InputNotAvailable("No predictions are provided as input for the RMSE Calculator. "
                                    "You should add the predictions by a seperate key word arguments if you add the RMSECalculator "
                                    "to the pipeline.")

        for key, y_hat in kwargs.items():
            p = y_hat.values
            predictions.append(key)
            if self.rolling:
                if self.filter:
                    p_, t_ = self.filter(p, t)
                    time = y[_get_time_indexes(y)[0]][-len(p_) + self.offset:]
                else:
                    time = y[_get_time_indexes(y)[0]][self.offset:]
                    p_, t_ = p.reshape((len(p), -1)), t.reshape((len(t), -1))
                _rmse = pd.DataFrame(np.mean((p_[self.offset:] - t_[self.offset:]) ** 2, axis=-1)).rolling(
                    self.window).apply(lambda x: np.sqrt(np.mean(x))).values
            else:
                time = [y.indexes[_get_time_indexes(y)[0]][-1]]
                _rmse = [np.sqrt(np.mean((p[self.offset:] - t[self.offset:]) ** 2))]
            rmse.append(_rmse)
        return xr.DataArray(np.stack(rmse).swapaxes(0, 1).reshape((-1, len(predictions))),
                            coords={"time": time, "predictions": predictions},
                            dims=["time", "predictions"])
Exemple #8
0
    def _post_transform(self, result):
        if isinstance(result, dict) and len(result) <= 1:
            result = {self.name: list(result.values())[0]}
        elif not isinstance(result, dict):
            result = {self.name: result}

        if not self.buffer:
            self.buffer = result
        else:
            # Time dimension is mandatory, consequently there dim has to exist
            dim = _get_time_indexes(result)[0]
            for key in self.buffer.keys():
                self.buffer[key] = xr.concat([self.buffer[key], result[key]],
                                             dim=dim)
        return result
Exemple #9
0
    def transform(self, y: xr.DataArray, **kwargs: xr.DataArray) -> xr.DataArray:
        """
        Calculates the MAE based on the predefined target and predictions variables.

        :param y: the input dataset
        :type y: Optional[xr.DataArray]

        :return: The calculated MAE
        :rtype: xr.DataArray
        """
        t = y.values
        mae = []
        predictions = []
        if kwargs == {}:
            error_msg = ("No predictions are provided as input for the MAE Calculator. " +
                         "You should add the predictions by a separate key word arguments if you add the " +
                         "MaeCalculator to the pipeline.")
            logger.error(error_msg)
            raise InputNotAvailable(error_msg)

        for key, y_hat in kwargs.items():
            p = y_hat.values
            predictions.append(key)

            if self.rolling:
                time = y[_get_time_indexes(y)[0]][self.offset:]
                p_, t_ = p.reshape((len(p), -1)), t.reshape((len(t), -1))
                _mae = pd.DataFrame(p_[self.offset:] - t_[self.offset:]).rolling(
                    self.window).apply(lambda x: np.mean(np.abs(x))).values
            else:
                time = [y.indexes[_get_time_indexes(y)[0]][-1]]
                _mae = [np.mean(np.abs(p[self.offset:] - t[self.offset:]))]
            mae.append(_mae)
        return xr.DataArray(np.stack(mae).swapaxes(0, 1).reshape((-1, len(predictions))),
                            coords={"time": time, "predictions": predictions},
                            dims=["time", "predictions"])
Exemple #10
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """ Add date features to xarray dataset as configured.

        :param x: Xarray dataset containing a timeseries specified by the object's 'time_index'
        :return: The xarray dataset with date features added.
        """

        time_index = _get_time_indexes(x)[0]
        data = [
            self._encode(feature, x[time_index].to_series())
            for feature in self.features
        ]
        return xr.DataArray(
            np.array(data).swapaxes(0, 1),
            coords=[getattr(x, time_index),
                    self.features.copy()],
            dims=[time_index, "features"])
Exemple #11
0
    def transform(self, **kwargs: xr.DataArray) -> xr.DataArray:
        """
        Predicts the result with the wrapped statsmodels module

        :param kwargs: A dict of input arrays
        :type kwargs: xr.DataArray
        :return: the transformed dataarray
        :rtype: xr.DataArray
        """
        time_data = list(kwargs.values())[0][_get_time_indexes(kwargs)[0]]

        x = []
        for key, value in kwargs.items():
            x.append(value.values)

        if hasattr(self.model, "forecast"):
            if "exog" in inspect.signature(
                    self.model.forecast
            ).parameters or "kwargs" in inspect.signature(
                    self.model.forecast).parameters and self.use_exog:
                prediction = \
                    self.model.forecast(len(time_data), exog=np.concatenate(x, axis=-1), **self.predict_kwargs)[0]

            else:
                prediction = self.model.forecast(len(time_data),
                                                 **self.predict_kwargs)[0]
        elif hasattr(self.model, "predict"):
            if "exog" in inspect.signature(
                    self.model.predict
            ).parameters or "kwargs" in inspect.signature(
                    self.model.predict).parameters and self.use_exog:
                prediction = \
                    self.model.predict(len(time_data), exog=np.concatenate(x, axis=-1), **self.predict_kwargs)[0]

            else:
                prediction = self.model.predict(len(time_data),
                                                **self.predict_kwargs)[0]
        else:
            raise Exception(
                f"{self.module.__class__.__name__} has no forecast or predict method..."
            )

        return numpy_to_xarray(prediction, list(kwargs.values())[0], self.name)
Exemple #12
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """
        Transforms the time series in a time series that indicates whether the next value is higher, lower, or the same

        :param x: The time series that should be transformed
        :type x: xr.DataArray, optional
        :return: A time series, where 1 indicates that the next value is higher, -1 that the next value
        is lower, and 0 that the next value is the same
        :rtype: xr.DataArray
        :raises WrongParameterException: If not all indexes are part of x
        """
        indexes = _get_time_indexes(x)
        try:
            return xr.ufuncs.sign(x - x.shift({index: 1 for index in indexes}))
        except ValueError as exc:
            raise WrongParameterException(
                f"Not all indexes ({indexes}) are in the indexes of x ({list(x.indexes.keys())}).",
                "Either correct the indexes which you passed to that module or assert that this index occurs in the "
                "data which are passed by the previous modules to the current one.",
                module=self.name) from exc
Exemple #13
0
    def further_elements(self, counter: pd.Timestamp) -> bool:
        """
        Checks if there exist at least one data for the time after counter.

        :param counter: The timestampe for which it should be tested if there exist further data after it.
        :type counter: pd.Timestamp
        :return: True if there exist further data
        :rtype: bool
        """
        if not self.buffer or all([
                counter < b.indexes[_get_time_indexes(self.buffer)[0]][-1]
                for b in self.buffer.values()
        ]):
            return True
        for input_step in self.input_steps.values():
            if not input_step.further_elements(counter):
                return False
        for target_step in self.targets.values():
            if not target_step.further_elements(counter):
                return False
        return True
Exemple #14
0
    def transform(self, x: xr.DataArray) -> xr.DataArray:
        """
        Shifts the given time series x by the defined lag

        :param x: the time series to be shifted
        :type x: xr.DataArray
        :return: The shifted time series
        :rtype: xr.DataArray
        :raises WrongParameterException: If not all indexes are part of x
        """
        indexes = self.indexes
        if not indexes:
            indexes = _get_time_indexes(x)
        try:
            return x.shift({index: self.lag
                            for index in indexes},
                           fill_value=0)
        except ValueError as exc:
            raise WrongParameterException(
                f"Not all indexes ({indexes}) are in the indexes of x ({list(x.indexes.keys())}).",
                "Perhaps you set the wrong indexes with set_params or during the initialization of the ClockShift.",
                module=self.name) from exc
Exemple #15
0
 def _pack_data(self, start, end, buffer_element=None, return_all=False):
     # Provide requested data
     time_index = _get_time_indexes(self.buffer)
     if end and start and end > start:
         index = list(self.buffer.values())[0].indexes[time_index[0]]
         start = max(index[0], start.to_numpy())
         # After sel copy is not needed, since it returns a new array.
         if buffer_element is not None:
             return self.buffer[buffer_element].sel(
                 **{
                     time_index[0]:
                     index[(index >= start) & (index < end.to_numpy())]
                 })
         elif return_all:
             return {
                 key: b.sel(
                     **{
                         time_index[0]:
                         index[(index >= start) & (index < end.to_numpy())]
                     })
                 for key, b in self.buffer.items()
             }
         else:
             return list(self.buffer.values())[0].sel(
                 **{
                     time_index[0]:
                     index[(index >= start) & (index < end.to_numpy())]
                 })
     else:
         self.finished = True
         if buffer_element is not None:
             return self.buffer[buffer_element].copy()
         elif return_all:
             return copy.deepcopy(self.buffer)
         else:
             return list(self.buffer.values())[0].copy()