def test_sum_mat_hierarchical(): hierarchy = {"total": ["A", "B"], "A": ["A_X", "A_Y", "A_Z"], "B": ["B_X", "B_Y"]} hier_df = pandas.DataFrame( data={ "total": [], "A": [], "B": [], "A_X": [], "A_Y": [], "A_Z": [], "B_X": [], "B_Y": [], } ) tree = hts.hierarchy.HierarchyTree.from_nodes(hierarchy, hier_df) sum_mat, sum_mat_labels = to_sum_mat(tree) expected_sum_mat = numpy.array( [ [1, 1, 1, 1, 1], # total [0, 0, 0, 1, 1], # B [1, 1, 1, 0, 0], # A [1, 0, 0, 0, 0], # A_X [0, 1, 0, 0, 0], # A_Y [0, 0, 1, 0, 0], # A_Z [0, 0, 0, 1, 0], # B_X [0, 0, 0, 0, 1], ] ) # B_Y numpy.testing.assert_array_equal(sum_mat, expected_sum_mat) assert sum_mat_labels == ["total", "B", "A", "A_X", "A_Y", "A_Z", "B_X", "B_Y"]
def test_demo_unique_constraint(): # Example https://otexts.com/fpp2/hts.html # Does not work when you have elements that are named the same, but represent # different levels in the hierarchy. See expected_sum_mat below for example. hierarchy = {"total": ["A", "B"], "A": ["AA", "AB", "AC"], "B": ["BA", "BB"]} hier_df = pandas.DataFrame( data={ "total": [], "A": [], "B": [], "AA": [], "AB": [], "AC": [], "BA": [], "BB": [], } ) tree = hts.hierarchy.HierarchyTree.from_nodes(hierarchy, hier_df) sum_mat, sum_mat_labels = to_sum_mat(tree) expected_sum_mat = numpy.array( [ [1, 1, 1, 1, 1], # total [0, 1, 0, 1, 1], # B, Incorrectly finds B in AB [1, 1, 1, 1, 0], # A, Incorrectly finds A in BA [1, 0, 0, 0, 0], # AA [0, 1, 0, 0, 0], # AB [0, 0, 1, 0, 0], # AC [0, 0, 0, 1, 0], # BA [0, 0, 0, 0, 1], # BB ] ) numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)
def __init_hts( self, nodes: Optional[NodesT] = None, df: Optional[pandas.DataFrame] = None, tree: Optional[HierarchyTree] = None, root: str = "root", exogenous: Optional[List[str]] = None, ): if not nodes and not df: if not tree: raise InvalidArgumentException( "Either nodes and df must be passed, or a pre-built hierarchy tree" ) else: self.nodes = tree else: self.nodes = HierarchyTree.from_nodes(nodes=nodes, df=df, exogenous=exogenous, root=root) self.exogenous = exogenous self.sum_mat, sum_mat_labels = to_sum_mat(self.nodes) self._set_model_instance() self._init_revision()
def revise_forecasts( method: str, forecasts: Dict[str, numpy.ndarray], errors: Optional[Dict[str, numpy.ndarray]] = None, residuals: Optional[Dict[str, numpy.ndarray]] = None, summing_matrix: numpy.ndarray = None, nodes: NAryTreeT = None, transformer: TransformT = None, ): """ Convenience function to get revised forecast for pre-computed base forecasts Parameters ---------- method : str The reconciliation method to use forecasts : Dict[str, numpy.ndarray] A dict mapping key name to its forecasts (including in-sample forecasts). Required. errors : Dict[str, numpy.ndarray] A dict mapping key name to the in-sample errors. Required for methods: ``OLS``, ``WLSS``, ``WLSV`` residuals : Dict[str, numpy.ndarray] A dict mapping key name to the residuals of in-sample forecasts. Required for methods: OLS, WLSS, WLSV summing_matrix : numpy.ndarray Not required if ``nodes`` argument is passed, or if using ``BU`` approach nodes : NAryTreeT The tree of nodes as specified in :py:class:`HierarchyTree <hts.hierarchy.HierarchyTree>`. Required if not if using ``AHP``, ``PHA` ``FP`` methods, or if using passing the ``OLS``, ``WLSS``, ``WLSV`` methods and not passing the ``summing_matrix`` parameter transformer : TransformT A transform with the method: ``inv_func`` that will be applied to the forecasts Returns ------- revised forecasts : pandas.DataFrame The revised forecasts """ if nodes: summing_matrix = to_sum_mat(nodes) if method in [MethodT.AHP.name, MethodT.PHA.name, MethodT.FP.name ] and not nodes: raise ValueError(f"Method {method} requires an NAryTree to be passed") if method in [MethodT.OLS.name, MethodT.WLSS.name, MethodT.WLSV.name]: if not (all([forecasts, errors, residuals]) or (not summing_matrix)): raise ValueError( f"Method {method} requires forecasts, errors, and residuals to be passed, as " f"well as an NAryTree or a summing matrix") revision = RevisionMethod(name=method, sum_mat=summing_matrix, transformer=transformer) revised = revision.revise(forecasts=forecasts, mse=errors, nodes=nodes) return pandas.DataFrame(revised, columns=list(forecasts.keys()))
def test_sum_mat_visnights_hier(visnights_hier): hier_df = pandas.DataFrame( data={ "total": [], "VIC": [], "QLD": [], "SAU": [], "WAU": [], "OTH": [], "NSW": [], "NSW_Metro": [], "NSW_NthCo": [], "NSW_NthIn": [], "NSW_SthCo": [], "NSW_SthIn": [], "OTH_Metro": [], "OTH_NoMet": [], "QLD_Cntrl": [], "QLD_Metro": [], "QLD_NthCo": [], "SAU_Coast": [], "SAU_Inner": [], "SAU_Metro": [], "VIC_EstCo": [], "VIC_Inner": [], "VIC_Metro": [], "VIC_WstCo": [], "WAU_Coast": [], "WAU_Inner": [], "WAU_Metro": [], } ) tree = hts.hierarchy.HierarchyTree.from_nodes(visnights_hier, hier_df) sum_mat, sum_mat_labels = to_sum_mat(tree) expected_sum_mat = numpy.array( [ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], # total [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], # VIC [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], # QLD [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], # SAU [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # OTH [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_Metro [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_NthCo [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_NthIn [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_SthCo [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # NSW_SthIn [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # OTH_Metro [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # OTH_NoMet [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU_Coast [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU_Inner [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # WAU_Metro [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], # SAU_Coast [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], # SAU_Inner [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], # SAU_Metro [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], # QLD_Cntrl [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], # QLD_Metro [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], # QLD_NthCo [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], # VIC_EstCo [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], # VIC_Inner [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], # VIC_Metro [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], # VIC_WstCo ] ) numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)
def test_sum_mat_uv(uv_tree): mat, sum_mat_labels = to_sum_mat(uv_tree) assert isinstance(mat, numpy.ndarray) shp = mat.shape assert shp[0] == uv_tree.num_nodes() + 1 assert shp[1] == uv_tree.leaf_sum()
def revise_forecasts( method: str, forecasts: Dict[str, ArrayLike], errors: Optional[Dict[str, float]] = None, residuals: Optional[Dict[str, ArrayLike]] = None, summing_matrix: numpy.ndarray = None, nodes: NAryTreeT = None, transformer: TransformT = None, ): """ Convenience function to get revised forecast for pre-computed base forecasts Parameters ---------- method : str The reconciliation method to use forecasts : Dict[str, ArrayLike] A dict mapping key name to its forecasts (including in-sample forecasts). Required, can be of type ``numpy.ndarray`` of ``ndim == 1``, ``pandas.Series``, or single columned ``pandas.DataFrame`` errors : Dict[str, float] A dict mapping key name to the in-sample errors. Required for methods: ``OLS``, ``WLSS``, ``WLSV`` if ``residuals`` is not passed residuals : Dict[str, ArrayLike] A dict mapping key name to the residuals of in-sample forecasts. Required for methods: ``OLS``, ``WLSS``, ``WLSV``, can be of type ``numpy.ndarray`` of ndim == 1, ``pandas.Series``, or single columned ``pandas.DataFrame``. If passing residuals, ``errors`` dict is not required and will instead be calculated using MSE metric: ``numpy.mean(numpy.array(residual) ** 2)`` summing_matrix : numpy.ndarray Not required if ``nodes`` argument is passed, or if using ``BU`` approach nodes : NAryTreeT The tree of nodes as specified in :py:class:`HierarchyTree <hts.hierarchy.HierarchyTree>`. Required if not if using ``AHP``, ``PHA``, ``FP`` methods, or if using passing the ``OLS``, ``WLSS``, ``WLSV`` methods and not passing the ``summing_matrix`` parameter transformer : TransformT A transform with the method: ``inv_func`` that will be applied to the forecasts Returns ------- revised forecasts : ``pandas.DataFrame`` The revised forecasts """ if nodes: summing_matrix, sum_mat_labels = to_sum_mat(nodes) if method in [MethodT.AHP.name, MethodT.PHA.name, MethodT.FP.name ] and not nodes: raise ValueError(f"Method {method} requires an NAryTree to be passed") if method in [MethodT.OLS.name, MethodT.WLSS.name, MethodT.WLSV.name]: errors = _calculate_errors(method=method, errors=errors, residuals=residuals) if not (all([forecasts, errors]) or (not summing_matrix)): raise ValueError( f"Method {method} requires forecasts, errors, and residuals to be passed, as " f"well as an NAryTree or a summing matrix") revision = RevisionMethod(name=method, sum_mat=summing_matrix, transformer=transformer) sanitized_forecasts = _sanitize_forecasts_dict(forecasts) revised = revision.revise(forecasts=sanitized_forecasts, mse=errors, nodes=nodes) return pandas.DataFrame(revised, columns=list(sanitized_forecasts.keys()))
def test_sum_mat_mv(mv_tree): mat = to_sum_mat(mv_tree) assert isinstance(mat, numpy.ndarray) shp = mat.shape assert shp[0] == mv_tree.num_nodes() + 1 assert shp[1] == mv_tree.leaf_sum()