Example #1
0
def make_and_compare_predictions(
    gathered_stats: TimeSeries,
    predictions_to_make: ModelsToMake,
    prediction_duration_past: pd.Timedelta = _ONE_DAY,
    prediction_duration_future: pd.Timedelta = None,
    metric: Metric = metrics.coefficient_of_variation,
    transform: bool = False,
) -> PredictionEvaluations:
    """Run multiple forecasts and compare their accuracy."""
    train, actual = gathered_stats.split_after(
        gathered_stats.end_time() - prediction_duration_past, )
    n_pred: int = len(actual)
    if prediction_duration_future:
        n_pred += int(prediction_duration_future / gathered_stats.freq())
    if transform:
        forecasts = make_forecasts_ensure_positive(
            train=train,
            n_pred=n_pred,
            predictions_to_make=predictions_to_make,
        )
    else:
        forecasts = make_forecasts(
            train=train,
            n_pred=n_pred,
            predictions_to_make=predictions_to_make,
        )
    return PredictionEvaluations(
        predictions=forecasts,
        evaluations=compare_predictions(actual, forecasts, metric),
    )
Example #2
0
def extract_subseries(series: TimeSeries, min_gap_size: Optional[int] = 1) -> List[TimeSeries]:
    """
    Partitions the series into a sequence of sub-series by using significant gaps of missing values

    Parameters
    ----------
    series
        The TimeSeries to partition into sub-series

    min_gap_size
        The minimum number of contiguous missing values to consider a gap as significant. Defaults to 1.

    Returns
    -------
    subseries
        A list of TimeSeries, sub-series without significant gaps of missing values
    """

    # Remove null values from the series extremes
    series = series.strip()
    freq = series.freq

    if series.pd_dataframe().isna().sum().sum() == 0:
        return [series]

    # Get start/end times of sub-series without gaps of missing values
    gaps_df = series.gaps().query(f'gap_size>={min_gap_size}')
    start_times = [series.start_time()] + (gaps_df['gap_end'] + freq).to_list()
    end_times = (gaps_df['gap_start'] - freq).to_list() + [series.end_time() + freq]

    subseries = []
    for start, end in zip(start_times, end_times):
        subseries.append(series[start:end])

    return subseries