Ejemplo n.º 1
0
def test_get_duration(n_timepoints, index_type):
    """Test getting of duration."""
    if index_type != "timedelta":
        index = _make_index(n_timepoints, index_type)
        duration = _get_duration(index)
        # check output type is duration type
        assert isinstance(
            duration,
            (pd.Timedelta, pd.tseries.offsets.BaseOffset, int, np.integer))

        # check integer output
        duration = _get_duration(index, coerce_to_int=True)
        assert isinstance(duration, (int, np.integer))
        assert duration == n_timepoints - 1
    else:
        match = "index_class: timedelta is not supported"
        with pytest.raises(ValueError, match=match):
            _make_index(n_timepoints, index_type)
Ejemplo n.º 2
0
def test_get_duration(n_timepoints, index_type):
    index = _make_index(n_timepoints, index_type)
    duration = _get_duration(index)
    # check output type is duration type
    assert isinstance(
        duration,
        (pd.Timedelta, pd.tseries.offsets.BaseOffset, int, np.integer))

    # check integer output
    duration = _get_duration(index, coerce_to_int=True)
    assert isinstance(duration, (int, np.integer))
    assert duration == n_timepoints - 1
Ejemplo n.º 3
0
def _make_hierarchical(
    hierarchy_levels: Tuple = (2, 4),
    max_timepoints: int = 12,
    min_timepoints: int = 12,
    same_cutoff: bool = True,
    n_columns: int = 1,
    all_positive: bool = True,
    index_type: str = None,
    random_state: Union[int, np.random.RandomState] = None,
    add_nan: bool = False,
) -> pd.DataFrame:
    """Generate hierarchical multiindex mtype for testing.

    Parameters
    ----------
    hierarchy_levels : Tuple, optional
        the number of groups at each hierarchy level, by default (2, 4)
    max_timepoints : int, optional
        maximum time points a series can have, by default 12
    min_timepoints : int, optional
        minimum time points a seires can have, by default 12
    same_cutoff : bool, optional
        If it's True all series will end at the same date, by default True
    n_columns : int, optional
        number of columns in the output dataframe, by default 1
    all_positive : bool, optional
        If True the time series will be , by default True
    index_type : str, optional
        type of index, by default None
        Supported types are "period", "datetime", "range" or "int".
        If it's not provided, "datetime" is selected.
    random_state : int, np.random.RandomState or None
        Controls the randomness of the estimator, by default None
    add_nan : bool, optional
        If it's true the series will contain NaNs, by default False

    Returns
    -------
    pd.DataFrame
        hierarchical mtype dataframe
    """
    levels = [[f"h{i}_{j}" for j in range(hierarchy_levels[i])]
              for i in range(len(hierarchy_levels))]
    level_names = [f"h{i}" for i in range(len(hierarchy_levels))]
    rng = check_random_state(random_state)
    if min_timepoints == max_timepoints:
        time_index = _make_index(max_timepoints, index_type)
        index = pd.MultiIndex.from_product(levels + [time_index],
                                           names=level_names + ["time"])
    else:
        df_list = []
        for levels_tuple in product(*levels):
            n_timepoints = rng.randint(low=min_timepoints, high=max_timepoints)
            if same_cutoff:
                time_index = _make_index(max_timepoints,
                                         index_type)[-n_timepoints:]
            else:
                time_index = _make_index(n_timepoints, index_type)
            d = dict(zip(level_names, levels_tuple))
            d["time"] = time_index
            df_list.append(pd.DataFrame(d))
        index = pd.MultiIndex.from_frame(pd.concat(df_list),
                                         names=level_names + ["time"])

    total_time_points = len(index)
    data = rng.normal(size=(total_time_points, n_columns))
    if add_nan:
        # add some nan values
        data[int(len(data) / 2)] = np.nan
        data[0] = np.nan
        data[-1] = np.nan
    if all_positive:
        data -= np.min(data, axis=0) - 1
    df = pd.DataFrame(data=data,
                      index=index,
                      columns=[f"c{i}" for i in range(n_columns)])

    return df