Beispiel #1
0
def _plot_and_print_results(
    df_stock, ns_parser, df_future, df_pred, model_name, s_ticker
):
    """Plot and print the results. """
    # Plotting
    plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
    plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=3)

    # BACKTESTING
    if ns_parser.n_loops == 1:
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: {model_name} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"{model_name} on {s_ticker} - {ns_parser.n_days} days prediction"
            )
    else:
        if ns_parser.s_end_date:
            plt.title(
                f"{ns_parser.n_loops} loops - BACKTESTING: {model_name} on {s_ticker}"
                f" - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"{ns_parser.n_loops} loops - {model_name} on {s_ticker} - {ns_parser.n_days} days prediction"
            )

    plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1])
    plt.xlabel("Time")
    plt.ylabel("Share Price ($)")
    plt.grid(b=True, which="major", color="#666666", linestyle="-")
    plt.minorticks_on()
    plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)

    if ns_parser.n_loops == 1:
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [
                df_stock["5. adjusted close"].values[-1],
                df_pred[df_pred.columns[0]].values[0],
            ],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred[df_pred.columns[0]], lw=2, c="tab:green")
    else:
        df_quantiles = pd.DataFrame()
        df_quantiles["Quantile 10%"] = df_pred.quantile(0.1, axis=1)
        df_quantiles["Median"] = df_pred.quantile(0.5, axis=1)
        df_quantiles["Quantile 90%"] = df_pred.quantile(0.9, axis=1)

        plt.plot(df_pred.index, df_quantiles["Median"], lw=2, c="tab:green")
        plt.fill_between(
            df_pred.index,
            df_quantiles["Quantile 10%"],
            df_quantiles["Quantile 90%"],
            alpha=0.30,
            color="tab:green",
            interpolate=True,
        )
        plt.fill_between(
            [df_stock.index[-1], df_pred.index[0]],
            [
                df_stock["5. adjusted close"].values[-1],
                df_quantiles["Quantile 10%"].values[0],
            ],
            [
                df_stock["5. adjusted close"].values[-1],
                df_quantiles["Quantile 90%"].values[0],
            ],
            alpha=0.30,
            color="tab:green",
            interpolate=True,
        )

    plt.axvspan(
        df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2
    )
    _, _, ymin, ymax = plt.axis()
    plt.vlines(
        df_stock.index[-1],
        ymin,
        ymax,
        colors="k",
        linewidth=3,
        linestyle="--",
        color="k",
    )

    # BACKTESTING
    if ns_parser.s_end_date:
        plt.plot(
            df_future.index,
            df_future["5. adjusted close"],
            lw=2,
            c="tab:blue",
            ls="--",
        )
        plt.plot(
            [df_stock.index[-1], df_future.index[0]],
            [
                df_stock["5. adjusted close"].values[-1],
                df_future["5. adjusted close"].values[0],
            ],
            lw=1,
            c="tab:blue",
            linestyle="--",
        )

    if gtff.USE_ION:
        plt.ion()

    plt.show()

    # BACKTESTING
    if ns_parser.s_end_date:
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.subplot(211)
        plt.plot(
            df_future.index,
            df_future["5. adjusted close"],
            lw=2,
            c="tab:blue",
            ls="--",
        )
        if ns_parser.n_loops == 1:
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
        else:
            plt.plot(
                df_quantiles["Median"].index, df_quantiles["Median"], lw=2, c="green"
            )

        plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3)
        plt.plot(
            [df_stock.index[-1], df_future.index[0]],
            [
                df_stock["5. adjusted close"].values[-1],
                df_future["5. adjusted close"].values[0],
            ],
            lw=2,
            c="tab:blue",
            ls="--",
        )
        if ns_parser.n_loops == 1:
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
        else:
            plt.scatter(
                df_quantiles["Median"].index, df_quantiles["Median"], lw=3, c="green"
            )
            plt.plot(
                [df_stock.index[-1], df_quantiles["Median"].index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_quantiles["Median"].values[0],
                ],
                lw=2,
                c="green",
                ls="--",
            )
            plt.fill_between(
                df_pred.index,
                df_quantiles["Quantile 10%"],
                df_quantiles["Quantile 90%"],
                alpha=0.30,
                color="tab:green",
                interpolate=True,
            )

        plt.title("BACKTESTING: Real data price versus Prediction")
        plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1))
        plt.xticks(
            [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)],
            visible=True,
        )
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
        plt.legend(["Real data", "Prediction data"])
        plt.xticks([])

        plt.subplot(212)
        plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
        if ns_parser.n_loops == 1:
            plt.plot(
                df_future.index,
                100
                * (
                    df_pred[df_pred.columns[0]].values
                    - df_future["5. adjusted close"].values
                )
                / df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100
                * (
                    df_pred[df_pred.columns[0]].values
                    - df_future["5. adjusted close"].values
                )
                / df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100
                    * (
                        df_pred[df_pred.columns[0]].values[0]
                        - df_future["5. adjusted close"].values[0]
                    )
                    / df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
        else:
            plt.plot(
                df_future.index,
                100
                * (
                    df_quantiles["Median"].values
                    - df_future["5. adjusted close"].values
                )
                / df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100
                * (
                    df_quantiles["Median"].values
                    - df_future["5. adjusted close"].values
                )
                / df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100
                    * (
                        df_quantiles["Median"].values[0]
                        - df_future["5. adjusted close"].values[0]
                    )
                    / df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.fill_between(
                df_pred.index,
                100
                * (
                    df_quantiles["Quantile 10%"].values
                    - df_future["5. adjusted close"].values
                )
                / df_future["5. adjusted close"].values,
                100
                * (
                    df_quantiles["Quantile 90%"].values
                    - df_future["5. adjusted close"].values
                )
                / df_future["5. adjusted close"].values,
                alpha=0.30,
                color="red",
                interpolate=True,
            )
        plt.title("BACKTESTING: Error between Real data and Prediction [%]")

        plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1))
        plt.xticks(
            [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)],
            visible=True,
        )
        plt.xlabel("Time")
        plt.ylabel("Prediction Error (%)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
        plt.legend(["Real data", "Prediction data"])

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # Refactor prediction dataframe for backtesting print
        if ns_parser.n_loops == 1:
            df_pred.rename(columns={df_pred.columns[0]: "Prediction"}, inplace=True)

        else:
            df_pred = pd.DataFrame()
            df_pred["Prediction"] = df_quantiles["Median"]

        df_pred["Real"] = df_future["5. adjusted close"]

        if gtff.USE_COLOR:
            patch_pandas_text_adjustment()

            print("Time         Real [$]  x  Prediction [$]")
            print(df_pred.apply(price_prediction_backtesting_color, axis=1).to_string())
        else:
            print(df_pred[["Real", "Prediction"]].round(2).to_string())

        print("")
        print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values)
        print("")

    else:
        patch_pandas_text_adjustment()
        # Print prediction data
        print_pretty_prediction_nn(df_pred, df_stock["5. adjusted close"].values[-1])
        print("")

        if ns_parser.n_loops > 1:
            print("Prediction Stats:")
            print(df_quantiles.round(2).to_string())
            print("")
def arima(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    ARIMA prediction
    Parameters
    ----------
    other_args: List[str]
        Argparse arguments
    s_ticker: str
        ticker
    df_stock: pd.DataFrame
        Dataframe of prices

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="arima",
        description="""
            In statistics and econometrics, and in particular in time series analysis, an
            autoregressive integrated moving average (ARIMA) model is a generalization of an
            autoregressive moving average (ARMA) model. Both of these models are fitted to time
            series data either to better understand the data or to predict future points in the
            series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative
            integers, p is the order (number of time lags) of the autoregressive model, d is the
            degree of differencing (the number of times the data have had past values subtracted),
            and q is the order of the moving-average model.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-i",
        "--ic",
        action="store",
        dest="s_ic",
        type=str,
        default="aic",
        choices=["aic", "aicc", "bic", "hqic", "oob"],
        help="information criteria.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store_true",
        default=False,
        dest="b_seasonal",
        help="Use weekly seasonal data.",
    )
    parser.add_argument(
        "-o",
        "--order",
        action="store",
        dest="s_order",
        type=str,
        help="arima model order (p,d,q) in format: p,d,q.",
    )
    parser.add_argument(
        "-r",
        "--results",
        action="store_true",
        dest="b_results",
        default=False,
        help="results about ARIMA summary flag.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=5 + ns_parser.n_days)[-1]):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Machine Learning model
        if ns_parser.s_order:
            t_order = tuple(int(ord) for ord in ns_parser.s_order.split(","))
            model = ARIMA(df_stock["5. adjusted close"].values,
                          order=t_order).fit()
            l_predictions = model.predict(
                start=len(df_stock["5. adjusted close"]) + 1,
                end=len(df_stock["5. adjusted close"]) + ns_parser.n_days,
            )
        else:
            if ns_parser.b_seasonal:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=True,
                    m=5,
                    information_criteria=ns_parser.s_ic,
                )
            else:
                model = pmdarima.auto_arima(
                    df_stock["5. adjusted close"].values,
                    error_action="ignore",
                    seasonal=False,
                    information_criteria=ns_parser.s_ic,
                )
            l_predictions = [
                i if i > 0 else 0
                for i in model.predict(n_periods=ns_parser.n_days)
            ]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        if ns_parser.b_results:
            print(model.summary())
            print("")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        if ns_parser.s_order:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        else:
            # BACKTESTING
            if ns_parser.s_end_date:
                plt.title(
                    f"BACKTESTING: ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
            else:
                plt.title(
                    f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
                )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred,
                                    df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e, "\n")
Beispiel #3
0
def k_nearest_neighbors(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="knn",
        description="""
            K nearest neighbors is a simple algorithm that stores all
            available cases and predict the numerical target based on a similarity measure
            (e.g. distance functions).
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-n",
        "--neighbors",
        action="store",
        dest="n_neighbors",
        type=check_positive,
        default=20,
        help="number of neighbors to use on the algorithm.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:
            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=ns_parser.n_inputs + ns_parser.n_days,
            )[-1]:
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Split training data
        stock_x, stock_y = splitTrain.split_train(
            df_stock["5. adjusted close"].values,
            ns_parser.n_inputs,
            ns_parser.n_days,
            ns_parser.n_jumps,
        )

        if not stock_x:
            print("Given the model parameters more training data is needed.\n")
            return

        # Machine Learning model
        knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors)
        knn.fit(stock_x, stock_y)

        # Prediction data
        l_predictions = knn.predict(
            df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape(
                1, -1))[0]
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        s_knn = f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker}"
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: {s_knn} - {ns_parser.n_days} days prediction")
        else:
            plt.title(f"{s_knn} - {ns_parser.n_days} days prediction")
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks([
                df_stock.index[-1],
                df_pred.index[-1] + datetime.timedelta(days=1)
            ])
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks([
                df_stock.index[-1],
                df_pred.index[-1] + datetime.timedelta(days=1)
            ])
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:
                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred,
                                    df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")
Beispiel #4
0
def regression(l_args, s_ticker, df_stock, polynomial):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="regression",
        description="""
            Regression attempts to model the relationship between
            two variables by fitting a linear/quadratic/cubic/other equation to
            observed data. One variable is considered to be an explanatory variable,
            and the other is considered to be a dependent variable.
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    if polynomial == USER_INPUT:
        parser.add_argument(
            "-p",
            "--polynomial",
            action="store",
            dest="n_polynomial",
            type=check_positive,
            required=True,
            help="polynomial associated with regression.",
        )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:
            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=ns_parser.n_inputs + ns_parser.n_days,
            )[-1]:
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Split training data
        stock_x, stock_y = splitTrain.split_train(
            df_stock["5. adjusted close"].values,
            ns_parser.n_inputs,
            ns_parser.n_days,
            ns_parser.n_jumps,
        )

        if not stock_x:
            print("Given the model parameters more training data is needed.\n")
            return

        # Machine Learning model
        if polynomial == LINEAR:
            model = linear_model.LinearRegression(n_jobs=-1)
        else:
            if polynomial == USER_INPUT:
                polynomial = ns_parser.n_polynomial
            model = pipeline.make_pipeline(
                preprocessing.PolynomialFeatures(polynomial),
                linear_model.Ridge())

        model.fit(stock_x, stock_y)
        l_predictions = model.predict(
            df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape(
                1, -1))[0]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred,
                                    df_stock["5. adjusted close"].values[-1])
        print("")

    except SystemExit:
        print("")
    except Exception as e:
        print(e)
        print("")
Beispiel #5
0
def exponential_smoothing(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="ets",
        description="""
            Exponential Smoothing, see https://otexts.com/fpp2/taxonomy.html

            Trend='N',  Seasonal='N': Simple Exponential Smoothing
            Trend='N',  Seasonal='A': Exponential Smoothing
            Trend='N',  Seasonal='M': Exponential Smoothing
            Trend='A',  Seasonal='N': Holt’s linear method
            Trend='A',  Seasonal='A': Additive Holt-Winters’ method
            Trend='A',  Seasonal='M': Multiplicative Holt-Winters’ method
            Trend='Ad', Seasonal='N': Additive damped trend method
            Trend='Ad', Seasonal='A': Exponential Smoothing
            Trend='Ad', Seasonal='M': Holt-Winters’ damped method

            Trend component: N: None, A: Additive, Ad: Additive Damped
            Seasonality component: N: None, A: Additive, M: Multiplicative
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-t",
        "--trend",
        action="store",
        dest="trend",
        type=check_valid_trend,
        default="N",
        help="Trend component: N: None, A: Additive, Ad: Additive Damped.",
    )
    parser.add_argument(
        "-s",
        "--seasonal",
        action="store",
        dest="seasonal",
        type=check_valid_seasonal,
        default="N",
        help="Seasonality component: N: None, A: Additive, M: Multiplicative.",
    )
    parser.add_argument(
        "-p",
        "--periods",
        action="store",
        dest="seasonal_periods",
        type=check_positive,
        default=5,
        help="Seasonal periods.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=5 + ns_parser.n_days)[-1]):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Get ETS model
        model, title = get_exponential_smoothing_model(
            df_stock["5. adjusted close"].values,
            ns_parser.trend,
            ns_parser.seasonal,
            ns_parser.seasonal_periods,
        )

        if model.mle_retvals.success:
            forecast = model.forecast(ns_parser.n_days)

            l_pred_days = get_next_stock_market_days(
                last_stock_day=df_stock["5. adjusted close"].index[-1],
                n_next_days=ns_parser.n_days,
            )
            df_pred = pd.Series(forecast, index=l_pred_days, name="Price")

            if ~np.isnan(forecast).any():

                print(f"\n{title}")
                print("\nFit model parameters:")
                for key, value in model.params.items():
                    print(f"{key} {' '*(18-len(key))}: {value}")

                print("\nAssess fit model:")
                print(f"AIC: {round(model.aic, 2)}")
                print(f"BIC: {round(model.bic, 2)}")
                print(f"SSE: {round(model.sse, 2)}\n")

                # Plotting
                plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
                plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
                # BACKTESTING
                if ns_parser.s_end_date:
                    plt.title(f"BACKTESTING: {title} on {s_ticker}")
                else:
                    plt.title(f"{title} on {s_ticker}")

                plt.xlim(
                    df_stock.index[0],
                    get_next_stock_market_days(df_pred.index[-1], 1)[-1],
                )
                plt.xlabel("Time")
                plt.ylabel("Share Price ($)")
                plt.grid(b=True, which="major", color="#666666", linestyle="-")
                plt.minorticks_on()
                plt.grid(b=True,
                         which="minor",
                         color="#999999",
                         linestyle="-",
                         alpha=0.2)
                plt.plot(
                    [df_stock.index[-1], df_pred.index[0]],
                    [
                        df_stock["5. adjusted close"].values[-1],
                        df_pred.values[0]
                    ],
                    lw=1,
                    c="tab:green",
                    linestyle="--",
                )
                plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
                plt.axvspan(
                    df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2,
                )
                _, _, ymin, ymax = plt.axis()
                plt.vlines(
                    df_stock.index[-1],
                    ymin,
                    ymax,
                    linewidth=1,
                    linestyle="--",
                    color="k",
                )

                # BACKTESTING
                if ns_parser.s_end_date:
                    plt.plot(
                        df_future.index,
                        df_future["5. adjusted close"],
                        lw=2,
                        c="tab:blue",
                        ls="--",
                    )
                    plt.plot(
                        [df_stock.index[-1], df_future.index[0]],
                        [
                            df_stock["5. adjusted close"].values[-1],
                            df_future["5. adjusted close"].values[0],
                        ],
                        lw=1,
                        c="tab:blue",
                        linestyle="--",
                    )

                if gtff.USE_ION:
                    plt.ion()

                plt.show()

                # BACKTESTING
                if ns_parser.s_end_date:
                    plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
                    plt.subplot(211)
                    plt.plot(
                        df_future.index,
                        df_future["5. adjusted close"],
                        lw=2,
                        c="tab:blue",
                        ls="--",
                    )
                    plt.plot(df_pred.index, df_pred, lw=2, c="green")
                    plt.scatter(
                        df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3,
                    )
                    plt.plot(
                        [df_stock.index[-1], df_future.index[0]],
                        [
                            df_stock["5. adjusted close"].values[-1],
                            df_future["5. adjusted close"].values[0],
                        ],
                        lw=2,
                        c="tab:blue",
                        ls="--",
                    )
                    plt.scatter(df_pred.index, df_pred, c="green", lw=3)
                    plt.plot(
                        [df_stock.index[-1], df_pred.index[0]],
                        [
                            df_stock["5. adjusted close"].values[-1],
                            df_pred.values[0]
                        ],
                        lw=2,
                        c="green",
                        ls="--",
                    )
                    plt.title("BACKTESTING: Real data price versus Prediction")
                    plt.xlim(
                        df_stock.index[-1],
                        df_pred.index[-1] + datetime.timedelta(days=1),
                    )
                    plt.ylabel("Share Price ($)")
                    plt.grid(b=True,
                             which="major",
                             color="#666666",
                             linestyle="-")
                    plt.minorticks_on()
                    plt.grid(b=True,
                             which="minor",
                             color="#999999",
                             linestyle="-",
                             alpha=0.2)
                    plt.legend(["Real data", "Prediction data"])
                    plt.xticks([])

                    plt.subplot(212)
                    plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
                    plt.plot(
                        df_future.index,
                        100 * (df_pred.values -
                               df_future["5. adjusted close"].values) /
                        df_future["5. adjusted close"].values,
                        lw=2,
                        c="red",
                    )
                    plt.scatter(
                        df_future.index,
                        100 * (df_pred.values -
                               df_future["5. adjusted close"].values) /
                        df_future["5. adjusted close"].values,
                        c="red",
                        lw=5,
                    )
                    plt.title(
                        "BACKTESTING: Error between Real data and Prediction [%]"
                    )
                    plt.plot(
                        [df_stock.index[-1], df_future.index[0]],
                        [
                            0,
                            100 * (df_pred.values[0] -
                                   df_future["5. adjusted close"].values[0]) /
                            df_future["5. adjusted close"].values[0],
                        ],
                        lw=2,
                        ls="--",
                        c="red",
                    )
                    plt.xlim(
                        df_stock.index[-1],
                        df_pred.index[-1] + datetime.timedelta(days=1),
                    )
                    plt.xlabel("Time")
                    plt.ylabel("Prediction Error (%)")
                    plt.grid(b=True,
                             which="major",
                             color="#666666",
                             linestyle="-")
                    plt.minorticks_on()
                    plt.grid(b=True,
                             which="minor",
                             color="#999999",
                             linestyle="-",
                             alpha=0.2)
                    plt.legend(["Real data", "Prediction data"])

                    if gtff.USE_ION:
                        plt.ion()

                    plt.show()

                    # Refactor prediction dataframe for backtesting print
                    df_pred.name = "Prediction"
                    df_pred = df_pred.to_frame()
                    df_pred["Real"] = df_future["5. adjusted close"]

                    if gtff.USE_COLOR:

                        patch_pandas_text_adjustment()

                        print("Time         Real [$]  x  Prediction [$]")
                        print(
                            df_pred.apply(price_prediction_backtesting_color,
                                          axis=1).to_string())
                    else:
                        print(df_pred[["Real",
                                       "Prediction"]].round(2).to_string())

                    print("")
                    print_prediction_kpis(df_pred["Real"].values,
                                          df_pred["Prediction"].values)

                else:
                    # Print prediction data
                    print_pretty_prediction(
                        df_pred, df_stock["5. adjusted close"].values[-1])
                print("")

            else:
                print(
                    "RuntimeWarning: invalid value encountered in double_scalars."
                )
        else:
            print("ConvergenceWarning: Optimization failed to converge.")

    except Exception as e:
        print(e)
        print("")
Beispiel #6
0
def simple_moving_average(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="sma",
        description="""
            Moving Averages are used to smooth the data in an array to
            help eliminate noise and identify trends. The Simple Moving Average is literally
            the simplest form of a moving average. Each output value is the average of the
            previous n values. In a Simple Moving Average, each value in the time period carries
            equal weight, and values outside of the time period are not included in the average.
            This makes it less responsive to recent changes in the data, which can be useful for
            filtering out those changes.
        """,
    )

    parser.add_argument(
        "-l",
        "--length",
        action="store",
        dest="n_length",
        type=check_positive,
        default=20,
        help="length of SMA window.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (
                ns_parser.s_end_date
                < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days
                )[-1]
            ):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days
            )

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0] : future_index[-1]]
            df_stock = df_stock[: ns_parser.s_end_date]

        # Prediction data
        l_predictions = list()
        for pred_day in range(ns_parser.n_days):
            if pred_day < ns_parser.n_length:
                l_ma_stock = df_stock["5. adjusted close"].values[
                    -ns_parser.n_length + pred_day :
                ]
            else:
                l_ma_stock = list()
            l_predictions.append(np.mean(np.append(l_ma_stock, l_predictions)))

        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: {ns_parser.n_length} Moving Average on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"{ns_parser.n_length} Moving Average on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(
            df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]
        )
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
        df_ma = df_stock["5. adjusted close"].rolling(window=ns_parser.n_length).mean()
        plt.plot(df_ma.index, df_ma, lw=2, linestyle="--", c="tab:orange")
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(
            df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2
        )
        _, _, ymin, ymax = plt.axis()
        plt.vlines(
            df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k"
        )

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(
                df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100
                * (df_pred.values - df_future["5. adjusted close"].values)
                / df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100
                * (df_pred.values - df_future["5. adjusted close"].values)
                / df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title("BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100
                    * (df_pred.values[0] - df_future["5. adjusted close"].values[0])
                    / df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(
                        price_prediction_backtesting_color, axis=1
                    ).to_string()
                )
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")
Beispiel #7
0
def fbprophet(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="fbprophet",
        description="""
            Facebook Prophet is a forecasting procedure that is fast and provides
            completely automated forecasts that can be tuned by hand by data scientists
            and analysts. It was developed by Facebook's data science team and is open
            source.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=5 + ns_parser.n_days)[-1]):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        df_stock = df_stock.sort_index(ascending=True)
        df_stock.reset_index(level=0, inplace=True)
        df_stock = df_stock[["date", "5. adjusted close"]]
        df_stock = df_stock.rename(columns={
            "date": "ds",
            "5. adjusted close": "y"
        })
        df_stock["ds"] = pd.to_datetime(df_stock["ds"])

        model = Prophet(yearly_seasonality=False, daily_seasonality=False)
        model.fit(df_stock)

        l_pred_days = get_next_stock_market_days(
            last_stock_day=pd.to_datetime(df_stock["ds"].values[-1]),
            n_next_days=ns_parser.n_days,
        )
        close_prices = model.make_future_dataframe(periods=ns_parser.n_days)
        forecast = model.predict(close_prices)

        df_pred = forecast["yhat"][
            -ns_parser.n_days:]  # .apply(lambda x: f"{x:.2f} $")
        df_pred.index = l_pred_days

        _, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
        model.plot(
            forecast[:-ns_parser.n_days],
            ax=ax,
            xlabel="Time",
            ylabel="Share Price ($)",
        )
        _, _, ymin, ymax = ax.axis()
        ax.vlines(
            df_stock["ds"].values[-1],
            ymin,
            ymax,
            linewidth=2,
            linestyle="--",
            color="k",
        )
        plt.axvspan(
            df_stock["ds"].values[-1],
            l_pred_days[-1],
            facecolor="tab:orange",
            alpha=0.2,
        )
        plt.ylim(ymin, ymax)
        plt.xlim(df_stock["ds"].values[0],
                 get_next_stock_market_days(l_pred_days[-1], 1)[-1])
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: Fb Prophet on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"Fb Prophet on {s_ticker} - {ns_parser.n_days} days prediction"
            )

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock["ds"].values[-1], df_future.index[0]],
                [
                    df_stock["y"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        plt.plot(df_pred.index, df_pred.values, lw=2, c="green")

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(
                df_future.index,
                df_future["5. adjusted close"],
                c="tab:blue",
                lw=3,
            )
            plt.plot(
                [df_stock["ds"].values[-1], df_future.index[0]],
                [
                    df_stock["y"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock["ds"].values[-1], df_pred.index[0]],
                [df_stock["y"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(
                df_stock["ds"].values[-1],
                df_pred.index[-1] + datetime.timedelta(days=1),
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)

            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock["ds"].values[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(
                df_stock["ds"].values[-1],
                df_pred.index[-1] + datetime.timedelta(days=1),
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)
        else:
            print("")
            print("Predicted share price:")
            print(df_pred.to_string())
        print("")

    except Exception as e:
        print(e)
        print("")