예제 #1
0
def test_sum_mat_hierarchical():
    hierarchy = {"total": ["A", "B"], "A": ["A_X", "A_Y", "A_Z"], "B": ["B_X", "B_Y"]}
    hier_df = pandas.DataFrame(
        data={
            "total": [],
            "A": [],
            "B": [],
            "A_X": [],
            "A_Y": [],
            "A_Z": [],
            "B_X": [],
            "B_Y": [],
        }
    )

    tree = hts.hierarchy.HierarchyTree.from_nodes(hierarchy, hier_df)
    sum_mat, sum_mat_labels = to_sum_mat(tree)

    expected_sum_mat = numpy.array(
        [
            [1, 1, 1, 1, 1],  # total
            [0, 0, 0, 1, 1],  # B
            [1, 1, 1, 0, 0],  # A
            [1, 0, 0, 0, 0],  # A_X
            [0, 1, 0, 0, 0],  # A_Y
            [0, 0, 1, 0, 0],  # A_Z
            [0, 0, 0, 1, 0],  # B_X
            [0, 0, 0, 0, 1],
        ]
    )  # B_Y

    numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)
    assert sum_mat_labels == ["total", "B", "A", "A_X", "A_Y", "A_Z", "B_X", "B_Y"]
예제 #2
0
def test_demo_unique_constraint():
    # Example https://otexts.com/fpp2/hts.html
    # Does not work when you have elements that are named the same, but represent
    # different levels in the hierarchy. See expected_sum_mat below for example.
    hierarchy = {"total": ["A", "B"], "A": ["AA", "AB", "AC"], "B": ["BA", "BB"]}
    hier_df = pandas.DataFrame(
        data={
            "total": [],
            "A": [],
            "B": [],
            "AA": [],
            "AB": [],
            "AC": [],
            "BA": [],
            "BB": [],
        }
    )

    tree = hts.hierarchy.HierarchyTree.from_nodes(hierarchy, hier_df)
    sum_mat, sum_mat_labels = to_sum_mat(tree)

    expected_sum_mat = numpy.array(
        [
            [1, 1, 1, 1, 1],  # total
            [0, 1, 0, 1, 1],  # B, Incorrectly finds B in AB
            [1, 1, 1, 1, 0],  # A, Incorrectly finds A in BA
            [1, 0, 0, 0, 0],  # AA
            [0, 1, 0, 0, 0],  # AB
            [0, 0, 1, 0, 0],  # AC
            [0, 0, 0, 1, 0],  # BA
            [0, 0, 0, 0, 1],  # BB
        ]
    )

    numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)
예제 #3
0
    def __init_hts(
        self,
        nodes: Optional[NodesT] = None,
        df: Optional[pandas.DataFrame] = None,
        tree: Optional[HierarchyTree] = None,
        root: str = "root",
        exogenous: Optional[List[str]] = None,
    ):

        if not nodes and not df:
            if not tree:
                raise InvalidArgumentException(
                    "Either nodes and df must be passed, or a pre-built hierarchy tree"
                )
            else:
                self.nodes = tree
        else:
            self.nodes = HierarchyTree.from_nodes(nodes=nodes,
                                                  df=df,
                                                  exogenous=exogenous,
                                                  root=root)
        self.exogenous = exogenous
        self.sum_mat, sum_mat_labels = to_sum_mat(self.nodes)
        self._set_model_instance()
        self._init_revision()
예제 #4
0
def revise_forecasts(
    method: str,
    forecasts: Dict[str, numpy.ndarray],
    errors: Optional[Dict[str, numpy.ndarray]] = None,
    residuals: Optional[Dict[str, numpy.ndarray]] = None,
    summing_matrix: numpy.ndarray = None,
    nodes: NAryTreeT = None,
    transformer: TransformT = None,
):
    """
    Convenience function to get revised forecast for pre-computed base forecasts

    Parameters
    ----------
    method : str
        The reconciliation method to use
    forecasts : Dict[str, numpy.ndarray]
        A dict mapping key name to its forecasts (including in-sample forecasts). Required.
    errors : Dict[str, numpy.ndarray]
        A dict mapping key name to the in-sample errors. Required for methods: ``OLS``, ``WLSS``, ``WLSV``
    residuals : Dict[str, numpy.ndarray]
        A dict mapping key name to the residuals of in-sample forecasts. Required for methods: OLS, WLSS, WLSV
    summing_matrix : numpy.ndarray
        Not required if ``nodes`` argument is passed, or if using ``BU`` approach
    nodes : NAryTreeT
        The tree of nodes as specified in :py:class:`HierarchyTree <hts.hierarchy.HierarchyTree>`. Required if not
        if using ``AHP``, ``PHA` ``FP`` methods, or if using  passing the ``OLS``, ``WLSS``, ``WLSV`` methods
          and not passing the ``summing_matrix`` parameter
    transformer : TransformT
        A transform with the method: ``inv_func`` that will be applied to the forecasts

    Returns
    -------
    revised forecasts : pandas.DataFrame
        The revised forecasts
    """

    if nodes:
        summing_matrix = to_sum_mat(nodes)

    if method in [MethodT.AHP.name, MethodT.PHA.name, MethodT.FP.name
                  ] and not nodes:
        raise ValueError(f"Method {method} requires an NAryTree to be passed")

    if method in [MethodT.OLS.name, MethodT.WLSS.name, MethodT.WLSV.name]:
        if not (all([forecasts, errors, residuals]) or (not summing_matrix)):
            raise ValueError(
                f"Method {method} requires forecasts, errors, and residuals to be passed, as "
                f"well as an NAryTree or a summing matrix")

    revision = RevisionMethod(name=method,
                              sum_mat=summing_matrix,
                              transformer=transformer)

    revised = revision.revise(forecasts=forecasts, mse=errors, nodes=nodes)

    return pandas.DataFrame(revised, columns=list(forecasts.keys()))
예제 #5
0
def test_sum_mat_visnights_hier(visnights_hier):
    hier_df = pandas.DataFrame(
        data={
            "total": [],
            "VIC": [],
            "QLD": [],
            "SAU": [],
            "WAU": [],
            "OTH": [],
            "NSW": [],
            "NSW_Metro": [],
            "NSW_NthCo": [],
            "NSW_NthIn": [],
            "NSW_SthCo": [],
            "NSW_SthIn": [],
            "OTH_Metro": [],
            "OTH_NoMet": [],
            "QLD_Cntrl": [],
            "QLD_Metro": [],
            "QLD_NthCo": [],
            "SAU_Coast": [],
            "SAU_Inner": [],
            "SAU_Metro": [],
            "VIC_EstCo": [],
            "VIC_Inner": [],
            "VIC_Metro": [],
            "VIC_WstCo": [],
            "WAU_Coast": [],
            "WAU_Inner": [],
            "WAU_Metro": [],
        }
    )

    tree = hts.hierarchy.HierarchyTree.from_nodes(visnights_hier, hier_df)
    sum_mat, sum_mat_labels = to_sum_mat(tree)

    expected_sum_mat = numpy.array(
        [
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # total
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1],  # VIC
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0],  # QLD
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],  # SAU
            [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # WAU
            [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # OTH
            [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # NSW
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # NSW_Metro
            [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # NSW_NthCo
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # NSW_NthIn
            [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # NSW_SthCo
            [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # NSW_SthIn
            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # OTH_Metro
            [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # OTH_NoMet
            [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # WAU_Coast
            [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # WAU_Inner
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # WAU_Metro
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # SAU_Coast
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],  # SAU_Inner
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],  # SAU_Metro
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # QLD_Cntrl
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],  # QLD_Metro
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],  # QLD_NthCo
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # VIC_EstCo
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],  # VIC_Inner
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],  # VIC_Metro
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],  # VIC_WstCo
        ]
    )

    numpy.testing.assert_array_equal(sum_mat, expected_sum_mat)
예제 #6
0
def test_sum_mat_uv(uv_tree):
    mat, sum_mat_labels = to_sum_mat(uv_tree)
    assert isinstance(mat, numpy.ndarray)
    shp = mat.shape
    assert shp[0] == uv_tree.num_nodes() + 1
    assert shp[1] == uv_tree.leaf_sum()
예제 #7
0
def revise_forecasts(
    method: str,
    forecasts: Dict[str, ArrayLike],
    errors: Optional[Dict[str, float]] = None,
    residuals: Optional[Dict[str, ArrayLike]] = None,
    summing_matrix: numpy.ndarray = None,
    nodes: NAryTreeT = None,
    transformer: TransformT = None,
):
    """
    Convenience function to get revised forecast for pre-computed base forecasts

    Parameters
    ----------
    method : str
        The reconciliation method to use
    forecasts : Dict[str, ArrayLike]
        A dict mapping key name to its forecasts (including in-sample forecasts). Required, can be
        of type ``numpy.ndarray`` of ``ndim == 1``, ``pandas.Series``, or single columned ``pandas.DataFrame``
    errors : Dict[str, float]
        A dict mapping key name to the in-sample errors. Required for methods: ``OLS``, ``WLSS``, ``WLSV`` if
        ``residuals`` is not passed
    residuals : Dict[str, ArrayLike]
        A dict mapping key name to the residuals of in-sample forecasts. Required for methods: ``OLS``, ``WLSS``,
        ``WLSV``, can be of type ``numpy.ndarray`` of ndim == 1, ``pandas.Series``, or single columned
        ``pandas.DataFrame``. If passing residuals, ``errors`` dict is not required and will instead be calculated
        using MSE metric: ``numpy.mean(numpy.array(residual) ** 2)``
    summing_matrix : numpy.ndarray
        Not required if ``nodes`` argument is passed, or if using ``BU`` approach
    nodes : NAryTreeT
        The tree of nodes as specified in :py:class:`HierarchyTree <hts.hierarchy.HierarchyTree>`. Required if not
        if using ``AHP``, ``PHA``, ``FP`` methods, or if using  passing the ``OLS``, ``WLSS``, ``WLSV`` methods
        and not passing the ``summing_matrix`` parameter
    transformer : TransformT
        A transform with the method: ``inv_func`` that will be applied to the forecasts

    Returns
    -------
    revised forecasts : ``pandas.DataFrame``
        The revised forecasts
    """

    if nodes:
        summing_matrix, sum_mat_labels = to_sum_mat(nodes)

    if method in [MethodT.AHP.name, MethodT.PHA.name, MethodT.FP.name
                  ] and not nodes:
        raise ValueError(f"Method {method} requires an NAryTree to be passed")

    if method in [MethodT.OLS.name, MethodT.WLSS.name, MethodT.WLSV.name]:
        errors = _calculate_errors(method=method,
                                   errors=errors,
                                   residuals=residuals)
        if not (all([forecasts, errors]) or (not summing_matrix)):
            raise ValueError(
                f"Method {method} requires forecasts, errors, and residuals to be passed, as "
                f"well as an NAryTree or a summing matrix")

    revision = RevisionMethod(name=method,
                              sum_mat=summing_matrix,
                              transformer=transformer)
    sanitized_forecasts = _sanitize_forecasts_dict(forecasts)
    revised = revision.revise(forecasts=sanitized_forecasts,
                              mse=errors,
                              nodes=nodes)

    return pandas.DataFrame(revised, columns=list(sanitized_forecasts.keys()))
예제 #8
0
def test_sum_mat_mv(mv_tree):
    mat = to_sum_mat(mv_tree)
    assert isinstance(mat, numpy.ndarray)
    shp = mat.shape
    assert shp[0] == mv_tree.num_nodes() + 1
    assert shp[1] == mv_tree.leaf_sum()