Example #1
0
    def from_geo_events(
        cls,
        df: pandas.DataFrame,
        lat_col: str,
        lon_col: str,
        nodes: Tuple,
        levels: Tuple[int, int] = (6, 7),
        resample_freq: str = "1H",
        min_count: Union[float, int] = 0.2,
        root_name: str = "total",
        fillna: bool = False,
    ):
        """

        Parameters
        ----------
        df : pandas.DataFrame
        lat_col : str
            Column where the latitude coordinates can be found
        lon_col : str
            Column where the longitude coordinates can be found
        nodes : str

        levels :
        resample_freq
        min_count
        root_name
        fillna

        Returns
        -------
        HierarchyTree
        """

        hexified = hexify(df, lat_col, lon_col, levels=levels)
        total = resample_count(hexified, resample_freq, root_name)
        hierarchy = cls(key=root_name, item=total)
        grouped = groupify(
            hierarchy,
            df=hexified,
            nodes=nodes,
            freq=resample_freq,
            min_count=min_count,
            total=total,
        )
        # TODO: more flexible strategy
        if fillna:
            df = grouped.to_pandas()
            df = df.fillna(method="ffill").dropna()
            for node in make_iterable(grouped, prop=None):
                repl = df[[node.key]]
                node.item = repl
        return grouped
Example #2
0
    def _revise(self, steps_ahead=1):
        logger.info(f'Reconciling forecasts using {self.revision_method}')
        revised = self.revision_method.revise(
            forecasts=self.hts_result.forecasts,
            mse=self.hts_result.errors,
            nodes=self.nodes)

        revised_columns = list(make_iterable(self.nodes))
        revised_index = self._get_predict_index(steps_ahead=steps_ahead)
        return pandas.DataFrame(revised,
                                index=revised_index,
                                columns=revised_columns)
Example #3
0
def _model_mapping_to_iterable(
        model_mapping: Dict[str, ModelFitResultT],
        nodes: NAryTreeT) -> List[Tuple[str, ModelFitResultT, NAryTreeT]]:
    prediction_triplet = []

    for node in make_iterable(nodes, prop=None):
        if isinstance(model_mapping[node.key], tuple):
            model = model_mapping[node.key][1]
        else:
            model = model_mapping[node.key]
        prediction_triplet.append((node.key, model, node))
    return prediction_triplet
Example #4
0
    def revise(self, forecasts=None, mse=None, nodes=None):
        """


        Parameters
        ----------
        forecasts
        mse
        nodes

        Returns
        -------

        """
        if self.name == MethodsT.NONE.name:
            return y_hat_matrix(forecasts=forecasts)

        if self.name in [
                MethodsT.OLS.name, MethodsT.WLSS.name, MethodsT.WLSV.name
        ]:
            return optimal_combination(forecasts=forecasts,
                                       sum_mat=self.sum_mat,
                                       method=MethodsT.OLS.name,
                                       mse=mse)

        elif self.name == MethodsT.BU.name:
            print("Name:::" + str(self.name))
            print(list(forecasts.keys()))
            y_hat = self._y_hat_matrix(forecasts)
            return self._new_mat(y_hat)

        elif self.name in [MethodsT.AHP.name, MethodsT.PHA.name]:
            if self.transformer:
                for node in make_iterable(nodes, prop=None):
                    node.item[node.key] = self.transformer.inverse_transform(
                        node.item[node.key])
            y_hat = proportions(nodes=nodes,
                                forecasts=forecasts,
                                sum_mat=self.sum_mat,
                                method=self.name)
            return self._new_mat(y_hat)

        elif self.name == MethodsT.FP.name:
            return forecast_proportions(forecasts, nodes)

        else:
            raise InvalidArgumentException('Revision model name is invalid')
Example #5
0
 def __validate_steps_ahead(self, exogenous_df: pandas.DataFrame,
                            steps_ahead: int) -> int:
     if exogenous_df is None and not steps_ahead:
         logger.info(
             "No arguments passed for 'steps_ahead', defaulting to predicting 1-step-ahead"
         )
         steps_ahead = 1
     elif exogenous_df is not None:
         steps_ahead = len(exogenous_df)
         for node in make_iterable(self.nodes, prop=None):
             exog_cols = node.exogenous
             try:
                 _ = exogenous_df[exog_cols]
             except KeyError:
                 raise MissingRegressorException(
                     f"Node {node.key} has as exogenous variables {node.exogenous} but "
                     f"these columns were not found in 'exogenous_df'")
     return steps_ahead
Example #6
0
 def __init_predict_step(self, exogenous_df: pandas.DataFrame,
                         steps_ahead: int):
     if self.exogenous and not exogenous_df:
         raise MissingRegressorException(
             f"Exogenous variables were provided at fit step, hence are required at "
             f"predict step. Please pass the 'exogenous_df' variable to predict "
             f"function")
     if not exogenous_df and not steps_ahead:
         logger.info(
             f"No arguments passed for 'steps_ahead', defaulting to predicting 1-step-ahead"
         )
         steps_ahead = 1
     elif exogenous_df:
         steps_ahead = len(exogenous_df)
         for node in make_iterable(self.nodes, prop=None):
             exog_cols = node.exogenous
             try:
                 node.item = exogenous_df[exog_cols]
             except KeyError:
                 raise MissingRegressorException(
                     f"Node {node.key} has as exogenous variables {node.exogenous} but "
                     f"these columns were not found in 'exogenous_df'")
     return steps_ahead
Example #7
0
    def fit(
        self,
        df: Optional[pandas.DataFrame] = None,
        nodes: Optional[NodesT] = None,
        tree: Optional[HierarchyTree] = None,
        exogenous: Optional[ExogT] = None,
        root: str = "total",
        distributor: Optional[DistributorBaseClass] = None,
        disable_progressbar=defaults.DISABLE_PROGRESSBAR,
        show_warnings=defaults.SHOW_WARNINGS,
        **fit_kwargs: Any,
    ) -> "HTSRegressor":
        """
        Fit hierarchical model to dataframe containing hierarchical data as specified in the ``nodes`` parameter.

        Exogenous can also be passed as a dict of (string, list), where string is the specific node key and the list
        contains the names of the columns to be used as exogenous variables for that node.

        Alternatively, a pre-built HierarchyTree can be passed without specifying the node and df. See more at
        :class:`hts.hierarchy.HierarchyTree`

        Parameters
        ----------
        df : pandas.DataFrame
            A Dataframe of time series with a DateTimeIndex. Each column represents a node in the hierarchy. Ignored if
            tree argument is passed
        nodes : Dict[str, List[str]]
            The hierarchy defined as a dict of (string, list), as specified in
             :py:func:`HierarchyTree.from_nodes <hts.hierarchy.HierarchyTree.from_nodes>`
        tree : HierarchyTree
            A pre-built HierarchyTree. Ignored if df and nodes are passed, as the tree will be built from thise
        distributor : Optional[DistributorBaseClass]
             A distributor, for parallel/distributed processing
        exogenous : Dict[str, List[str]] or None
            Node key mapping to columns that contain the exogenous variable for that node
        root : str
            The name of the root node
        disable_progressbar : Bool
            Disable or enable progressbar
        show_warnings : Bool
            Disable warnings
        fit_kwargs : Any
            Any arguments to be passed to the underlying forecasting model's fit function

        Returns
        -------
        HTSRegressor
            The fitted HTSRegressor instance
        """

        self.__init_hts(nodes=nodes,
                        df=df,
                        tree=tree,
                        root=root,
                        exogenous=exogenous)

        nodes = make_iterable(self.nodes, prop=None)

        fit_function_kwargs = {
            "fit_kwargs": fit_kwargs,
            "low_memory": self.low_memory,
            "tmp_dir": self.tmp_dir,
            "model_instance": self.model_instance,
            "model_args": self.model_args,
            "transform": self.transform,
        }

        fitted_models = _do_fit(
            nodes=nodes,
            function_kwargs=fit_function_kwargs,
            n_jobs=self.n_jobs,
            disable_progressbar=disable_progressbar,
            show_warnings=show_warnings,
            distributor=distributor,
        )

        for model in fitted_models:
            if isinstance(model, tuple):
                self.hts_result.models = model
            else:
                self.hts_result.models = (model.node.key, model)
        return self