Example #1
0
def pipeline_override_d(data):
    pipeline = Pipeline(steps=[("arima", AutoARIMA(out_of_sample_size=30))])
    util = PmdarimaAnalyzer(df=data.df,
                            group_key_columns=data.key_columns,
                            y_col="y",
                            datetime_col="ds")
    ndiffs = util.calculate_ndiffs(alpha=0.2, test="kpss", max_d=7)
    nsdiffs = util.calculate_nsdiffs(m=7, test="ocsb", max_D=7)
    return GroupedPmdarima(pipeline).fit(
        df=data.df,
        group_key_columns=data.key_columns,
        y_col="y",
        datetime_col="ds",
        ndiffs=ndiffs,
        nsdiffs=nsdiffs,
        silence_warnings=True,
    )
                    suppress_warnings=True,
                    error_action="ignore",
                ),
            )
        ]
    )

    diff_analyzer = PmdarimaAnalyzer(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
    )
    ndiff = diff_analyzer.calculate_ndiffs(
        alpha=0.05,
        test="kpss",
        max_d=4,
    )

    grouped_model = GroupedPmdarima(model_template=pipeline).fit(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
        ndiffs=ndiff,
        silence_warnings=True,
    )

    # Save to local directory
    save_dir = "/tmp/group_pmdarima/pipeline_override.gpmd"
    grouped_model.save(save_dir)
    analyzer = PmdarimaAnalyzer(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
    )

    # Decompose the trends of each group
    decomposed_trends = analyzer.decompose_groups(m=7, type_="additive")

    print("Decomposed trend data for the groups")
    print("-" * 100, "\n")
    print(decomposed_trends[:50].to_string())

    # Calculate optimal differencing for ARMA terms
    ndiffs = analyzer.calculate_ndiffs(alpha=0.1, test="kpss", max_d=5)

    _print_dict(ndiffs, "Differencing")

    # Calculate seasonal differencing
    nsdiffs = analyzer.calculate_nsdiffs(m=365, test="ocsb", max_D=5)

    _print_dict(nsdiffs, "Seasonal Differencing")

    # Get the autocorrelation function for each group
    group_acf = analyzer.calculate_acf(unbiased=True,
                                       nlags=120,
                                       qstat=True,
                                       fft=True,
                                       alpha=0.05,
                                       adjusted=True)