Python vDataFrame.acf Examples

Programming Language: Python

Namespace/Package Name: verticapy

Class/Type: vDataFrame

Method/Function: acf

Examples at hotexamples.com: 2

Python vDataFrame.acf - 2 examples found. These are the top rated real world Python examples of verticapy.vDataFrame.acf extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

__genSQL__(7)

shape(4)

are_namecols_in(3)

__executeSQL__(2)

acf(2)

copy(2)

format_colnames(2)

asfreq(1)

pacf(1)

search(1)

Example #1

Show file

File: tools.py Project: bryanherger/Vertica-ML-Python

def ljungbox(
    vdf: vDataFrame,
    column: str,
    ts: str,
    by: list = [],
    p: int = 1,
    alpha: float = 0.05,
    box_pierce: bool = False,
):
    """
---------------------------------------------------------------------------
Ljung–Box test (whether any of a group of autocorrelations of a time series 
are different from zero).

Parameters
----------
vdf: vDataFrame
    Input vDataFrame.
column: str
    Input vcolumn to test.
ts: str
    vcolumn used as timeline. It will be to use to order the data. It can be
    a numerical or type date like (date, datetime, timestamp...) vcolumn.
by: list, optional
    vcolumns used in the partition.
p: int, optional
    Number of lags to consider in the test.
alpha: float, optional
    Significance Level. Probability to accept H0.
box_pierce: bool
    If set to True, the Box-Pierce statistic will be used.

Returns
-------
tablesample
    An object containing the result. For more information, see
    utilities.tablesample.
    """
    check_types(
        [
            ("ts", ts, [str],),
            ("column", column, [str],),
            ("by", by, [list],),
            ("p", p, [int, float],),
            ("alpha", alpha, [int, float],),
            ("box_pierce", box_pierce, [bool],),
            ("vdf", vdf, [vDataFrame,],),
        ],
    )
    columns_check([column] + [ts] + by, vdf)
    column = vdf_columns_names([column], vdf)[0]
    ts = vdf_columns_names([ts], vdf)[0]
    by = vdf_columns_names(by, vdf)
    acf = vdf.acf(column=column, ts=ts, by=by, p=p, show=False)
    if p >= 2:
        acf = acf.values["value"]
    else:
        acf = [acf]
    n = vdf[column].count()
    name = (
        "Ljung–Box Test Statistic" if not (box_pierce) else "Box-Pierce Test Statistic"
    )
    result = tablesample(
        {"index": [], name: [], "p_value": [], "Serial Correlation": []}
    )
    Q = 0
    for k in range(p):
        div = n - k - 1 if not (box_pierce) else 1
        mult = n * (n + 2) if not (box_pierce) else n
        Q += mult * acf[k] ** 2 / div
        pvalue = chi2.sf(Q, k + 1)
        result.values["index"] += [k + 1]
        result.values[name] += [Q]
        result.values["p_value"] += [pvalue]
        result.values["Serial Correlation"] += [True if pvalue < alpha else False]
    return result

Example #2

Show file

File: tools.py Project: miaviles/VerticaPy

def plot_acf_pacf(
    vdf: vDataFrame,
    column: str,
    ts: str,
    by: list = [],
    p: (int, list) = 15,
    **style_kwds,
):
    """
---------------------------------------------------------------------------
Draws the ACF and PACF Charts.

Parameters
----------
vdf: vDataFrame
    Input vDataFrame.
column: str
    Response column.
ts: str
    vcolumn used as timeline. It will be to use to order the data. 
    It can be a numerical or type date like (date, datetime, timestamp...) 
    vcolumn.
by: list, optional
    vcolumns used in the partition.
p: int/list, optional
    Int equals to the maximum number of lag to consider during the computation
    or List of the different lags to include during the computation.
    p must be positive or a list of positive integers.
**style_kwds
    Any optional parameter to pass to the Matplotlib functions.

Returns
-------
tablesample
    An object containing the result. For more information, see
    utilities.tablesample.
    """
    check_types([
        (
            "column",
            column,
            [str],
        ),
        (
            "ts",
            ts,
            [str],
        ),
        (
            "by",
            by,
            [list],
        ),
        (
            "p",
            p,
            [int, float],
        ),
        (
            "vdf",
            vdf,
            [
                vDataFrame,
            ],
        ),
    ])
    tmp_style = {}
    for elem in style_kwds:
        if elem not in ("color", "colors"):
            tmp_style[elem] = style_kwds[elem]
    if "color" in style_kwds:
        color = style_kwds["color"]
    else:
        color = gen_colors()[0]
    columns_check([column, ts] + by, vdf)
    by = vdf_columns_names(by, vdf)
    column, ts = vdf_columns_names([column, ts], vdf)
    acf = vdf.acf(ts=ts, column=column, by=by, p=p, show=False)
    pacf = vdf.pacf(ts=ts, column=column, by=by, p=p, show=False)
    result = tablesample(
        {
            "index": [i for i in range(0, len(acf.values["value"]))],
            "acf": acf.values["value"],
            "pacf": pacf.values["value"],
            "confidence": pacf.values["confidence"],
        }, )
    fig = plt.figure(figsize=(10,
                              6)) if isnotebook() else plt.figure(figsize=(10,
                                                                           6))
    plt.rcParams["axes.facecolor"] = "#FCFCFC"
    ax1 = fig.add_subplot(211)
    x, y, confidence = (
        result.values["index"],
        result.values["acf"],
        result.values["confidence"],
    )
    plt.xlim(-1, x[-1] + 1)
    ax1.bar(
        x,
        y,
        width=0.007 * len(x),
        color="#444444",
        zorder=1,
        linewidth=0,
    )
    param = {
        "s": 90,
        "marker": "o",
        "facecolors": color,
        "edgecolors": "black",
        "zorder": 2,
    }
    ax1.scatter(
        x,
        y,
        **updated_dict(
            param,
            tmp_style,
        ),
    )
    ax1.plot(
        [-1] + x + [x[-1] + 1],
        [0 for elem in range(len(x) + 2)],
        color=color,
        zorder=0,
    )
    ax1.fill_between(x, confidence, color="#FE5016", alpha=0.1)
    ax1.fill_between(x, [-elem for elem in confidence],
                     color="#FE5016",
                     alpha=0.1)
    ax1.set_title("Autocorrelation")
    y = result.values["pacf"]
    ax2 = fig.add_subplot(212)
    ax2.bar(x, y, width=0.007 * len(x), color="#444444", zorder=1, linewidth=0)
    ax2.scatter(
        x,
        y,
        **updated_dict(
            param,
            tmp_style,
        ),
    )
    ax2.plot(
        [-1] + x + [x[-1] + 1],
        [0 for elem in range(len(x) + 2)],
        color=color,
        zorder=0,
    )
    ax2.fill_between(x, confidence, color="#FE5016", alpha=0.1)
    ax2.fill_between(x, [-elem for elem in confidence],
                     color="#FE5016",
                     alpha=0.1)
    ax2.set_title("Partial Autocorrelation")
    plt.show()
    return result