def ljungbox( vdf: vDataFrame, column: str, ts: str, by: list = [], p: int = 1, alpha: float = 0.05, box_pierce: bool = False, ): """ --------------------------------------------------------------------------- Ljung–Box test (whether any of a group of autocorrelations of a time series are different from zero). Parameters ---------- vdf: vDataFrame Input vDataFrame. column: str Input vcolumn to test. ts: str vcolumn used as timeline. It will be to use to order the data. It can be a numerical or type date like (date, datetime, timestamp...) vcolumn. by: list, optional vcolumns used in the partition. p: int, optional Number of lags to consider in the test. alpha: float, optional Significance Level. Probability to accept H0. box_pierce: bool If set to True, the Box-Pierce statistic will be used. Returns ------- tablesample An object containing the result. For more information, see utilities.tablesample. """ check_types( [ ("ts", ts, [str],), ("column", column, [str],), ("by", by, [list],), ("p", p, [int, float],), ("alpha", alpha, [int, float],), ("box_pierce", box_pierce, [bool],), ("vdf", vdf, [vDataFrame,],), ], ) columns_check([column] + [ts] + by, vdf) column = vdf_columns_names([column], vdf)[0] ts = vdf_columns_names([ts], vdf)[0] by = vdf_columns_names(by, vdf) acf = vdf.acf(column=column, ts=ts, by=by, p=p, show=False) if p >= 2: acf = acf.values["value"] else: acf = [acf] n = vdf[column].count() name = ( "Ljung–Box Test Statistic" if not (box_pierce) else "Box-Pierce Test Statistic" ) result = tablesample( {"index": [], name: [], "p_value": [], "Serial Correlation": []} ) Q = 0 for k in range(p): div = n - k - 1 if not (box_pierce) else 1 mult = n * (n + 2) if not (box_pierce) else n Q += mult * acf[k] ** 2 / div pvalue = chi2.sf(Q, k + 1) result.values["index"] += [k + 1] result.values[name] += [Q] result.values["p_value"] += [pvalue] result.values["Serial Correlation"] += [True if pvalue < alpha else False] return result
def plot_acf_pacf( vdf: vDataFrame, column: str, ts: str, by: list = [], p: (int, list) = 15, **style_kwds, ): """ --------------------------------------------------------------------------- Draws the ACF and PACF Charts. Parameters ---------- vdf: vDataFrame Input vDataFrame. column: str Response column. ts: str vcolumn used as timeline. It will be to use to order the data. It can be a numerical or type date like (date, datetime, timestamp...) vcolumn. by: list, optional vcolumns used in the partition. p: int/list, optional Int equals to the maximum number of lag to consider during the computation or List of the different lags to include during the computation. p must be positive or a list of positive integers. **style_kwds Any optional parameter to pass to the Matplotlib functions. Returns ------- tablesample An object containing the result. For more information, see utilities.tablesample. """ check_types([ ( "column", column, [str], ), ( "ts", ts, [str], ), ( "by", by, [list], ), ( "p", p, [int, float], ), ( "vdf", vdf, [ vDataFrame, ], ), ]) tmp_style = {} for elem in style_kwds: if elem not in ("color", "colors"): tmp_style[elem] = style_kwds[elem] if "color" in style_kwds: color = style_kwds["color"] else: color = gen_colors()[0] columns_check([column, ts] + by, vdf) by = vdf_columns_names(by, vdf) column, ts = vdf_columns_names([column, ts], vdf) acf = vdf.acf(ts=ts, column=column, by=by, p=p, show=False) pacf = vdf.pacf(ts=ts, column=column, by=by, p=p, show=False) result = tablesample( { "index": [i for i in range(0, len(acf.values["value"]))], "acf": acf.values["value"], "pacf": pacf.values["value"], "confidence": pacf.values["confidence"], }, ) fig = plt.figure(figsize=(10, 6)) if isnotebook() else plt.figure(figsize=(10, 6)) plt.rcParams["axes.facecolor"] = "#FCFCFC" ax1 = fig.add_subplot(211) x, y, confidence = ( result.values["index"], result.values["acf"], result.values["confidence"], ) plt.xlim(-1, x[-1] + 1) ax1.bar( x, y, width=0.007 * len(x), color="#444444", zorder=1, linewidth=0, ) param = { "s": 90, "marker": "o", "facecolors": color, "edgecolors": "black", "zorder": 2, } ax1.scatter( x, y, **updated_dict( param, tmp_style, ), ) ax1.plot( [-1] + x + [x[-1] + 1], [0 for elem in range(len(x) + 2)], color=color, zorder=0, ) ax1.fill_between(x, confidence, color="#FE5016", alpha=0.1) ax1.fill_between(x, [-elem for elem in confidence], color="#FE5016", alpha=0.1) ax1.set_title("Autocorrelation") y = result.values["pacf"] ax2 = fig.add_subplot(212) ax2.bar(x, y, width=0.007 * len(x), color="#444444", zorder=1, linewidth=0) ax2.scatter( x, y, **updated_dict( param, tmp_style, ), ) ax2.plot( [-1] + x + [x[-1] + 1], [0 for elem in range(len(x) + 2)], color=color, zorder=0, ) ax2.fill_between(x, confidence, color="#FE5016", alpha=0.1) ax2.fill_between(x, [-elem for elem in confidence], color="#FE5016", alpha=0.1) ax2.set_title("Partial Autocorrelation") plt.show() return result