예제 #1
0
def _preprocess_split(df_stock, ns_parser):
    """Preprocess and split training data.
    :return: (scaler, stock_train_data, stock_x, stock_y)
    :raises Exception: if more training data is needed."""
    # Pre-process data
    if ns_parser.s_preprocessing == "standardization":
        scaler = StandardScaler()
        stock_train_data = scaler.fit_transform(
            np.array(df_stock["5. adjusted close"].values.reshape(-1, 1))
        )
    elif ns_parser.s_preprocessing == "normalization":
        scaler = MinMaxScaler()
        stock_train_data = scaler.fit_transform(
            np.array(df_stock["5. adjusted close"].values.reshape(-1, 1))
        )
    else:  # No pre-processing
        stock_train_data = np.array(df_stock["5. adjusted close"].values.reshape(-1, 1))

    # Split training data for the neural network
    stock_x, stock_y = splitTrain.split_train(
        stock_train_data,
        ns_parser.n_inputs,
        ns_parser.n_days,
        numJumps=ns_parser.n_jumps,
    )
    if not stock_x:
        raise Exception("Given the model parameters more training data is needed.")
    stock_x = np.array(stock_x)
    stock_y = np.array(stock_y)
    return scaler, stock_train_data, stock_x, stock_y
예제 #2
0
def regression(l_args, s_ticker, s_interval, df_stock, polynomial):
    parser = argparse.ArgumentParser(
        prog='regression',
        description="""Regression attempts to model the relationship between 
                                     two variables by fitting a linear/quadratic/cubic/other equation to 
                                     observed data. One variable is considered to be an explanatory variable, 
                                     and the other is considered to be a dependent variable. """
    )

    parser.add_argument('-i',
                        "--input",
                        action="store",
                        dest="n_inputs",
                        type=check_positive,
                        default=40,
                        help='number of days to use for prediction.')
    parser.add_argument('-d',
                        "--days",
                        action="store",
                        dest="n_days",
                        type=check_positive,
                        default=5,
                        help='prediction days.')
    parser.add_argument('-j',
                        "--jumps",
                        action="store",
                        dest="n_jumps",
                        type=check_positive,
                        default=1,
                        help='number of jumps in training data.')

    if polynomial == USER_INPUT:
        parser.add_argument('-p',
                            "--polynomial",
                            action="store",
                            dest="n_polynomial",
                            type=check_positive,
                            required=True,
                            help='polynomial associated with regression.')

    (ns_parser, l_unknown_args) = parser.parse_known_args(l_args)

    if l_unknown_args:
        print(
            f"The following args couldn't be interpreted: {l_unknown_args}\n")
        return

    # Split training data
    stock_x, stock_y = splitTrain.split_train(
        df_stock['5. adjusted close'].values, ns_parser.n_inputs,
        ns_parser.n_days, ns_parser.n_jumps)

    # Machine Learning model
    if polynomial == LINEAR:
        model = linear_model.LinearRegression(n_jobs=-1)
    else:
        if polynomial == USER_INPUT:
            polynomial = ns_parser.n_polynomial
        model = pipeline.make_pipeline(
            preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge())

    model.fit(stock_x, stock_y)
    l_predictions = model.predict(
        df_stock['5. adjusted close'].values[-ns_parser.n_inputs:].reshape(
            1, -1))[0]

    # Prediction data
    l_pred_days = get_next_stock_market_days(
        last_stock_day=df_stock['5. adjusted close'].index[-1],
        n_next_days=ns_parser.n_days)
    df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price')

    # Plotting
    plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2)
    plt.title(
        f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
    )
    plt.xlim(df_stock.index[0],
             get_next_stock_market_days(df_pred.index[-1], 1)[-1])
    plt.xlabel('Time')
    plt.ylabel('Share Price ($)')
    plt.grid(b=True, which='major', color='#666666', linestyle='-')
    plt.minorticks_on()
    plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2)
    plt.plot([df_stock.index[-1], df_pred.index[0]],
             [df_stock['5. adjusted close'].values[-1], df_pred.values[0]],
             lw=1,
             c='tab:green',
             linestyle='--')
    plt.plot(df_pred.index, df_pred, lw=2, c='tab:green')
    plt.axvspan(df_stock.index[-1],
                df_pred.index[-1],
                facecolor='tab:orange',
                alpha=0.2)
    xmin, xmax, ymin, ymax = plt.axis()
    plt.vlines(df_stock.index[-1],
               ymin,
               ymax,
               linewidth=1,
               linestyle='--',
               color='k')
    plt.show()

    # Print prediction data
    print("Predicted share price:")
    df_pred = df_pred.apply(lambda x: f"{x:.2f} $")
    print(df_pred.to_string())
    print("")
def mlp(l_args, s_ticker, s_interval, df_stock):
    parser = argparse.ArgumentParser(prog='mlp',
                                     description="""Multilayer Perceptron. """)

    parser.add_argument('-d', "--days", action="store", dest="n_days", type=check_positive, default=5, 
                        help='prediction days.')
    parser.add_argument('-i', "--input", action="store", dest="n_inputs", type=check_positive, default=40, 
                        help='number of days to use for prediction.')
    parser.add_argument('-e', "--epochs", action="store", dest="n_epochs", type=check_positive, default=200, 
                        help='number of training epochs.')
    parser.add_argument('-j', "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, 
                        help='number of jumps in training data.')
    parser.add_argument('-p', "--pp", action="store", dest="s_preprocessing", default='normalization', 
                        choices=['normalization', 'standardization', 'none'], help='pre-processing data.')
    parser.add_argument('-o', "--optimizer", action="store", dest="s_optimizer", default='adam', 
                        choices=['adam', 'adagrad', 'adadelta', 'adamax', 'ftrl', 'nadam', 'optimizer', 'rmsprop', 'sgd'], help='optimization technique.')
    parser.add_argument('-l', "--loss", action="store", dest="s_loss", default='mae', 
                        choices=['mae', 'mape', 'mse', 'msle'], help='loss function.')

    (ns_parser, l_unknown_args) = parser.parse_known_args(l_args)

    if l_unknown_args:
        print(f"The following args couldn't be interpreted: {l_unknown_args}\n")
        return

    # Pre-process data
    if ns_parser.s_preprocessing == 'standardization':
        scaler = StandardScaler()
        stock_train_data = scaler.fit_transform(np.array(df_stock['5. adjusted close'].values.reshape(-1, 1)))
    elif ns_parser.s_preprocessing == 'normalization':
        scaler = MinMaxScaler()
        stock_train_data = scaler.fit_transform(np.array(df_stock['5. adjusted close'].values.reshape(-1, 1)))
    else: # No pre-processing
        stock_train_data = np.array(df_stock['5. adjusted close'].values.reshape(-1, 1))

    # Split training data for the neural network
    stock_x, stock_y = splitTrain.split_train(stock_train_data, ns_parser.n_inputs, ns_parser.n_days, numJumps=ns_parser.n_jumps)
    stock_x = np.array(stock_x)
    stock_x = np.reshape(stock_x, (stock_x.shape[0], stock_x.shape[1]))
    stock_y = np.array(stock_y)
    stock_y = np.reshape(stock_y, (stock_y.shape[0], stock_y.shape[1]))

    # Build Neural Network model
    model = build_neural_network_model(cfg_nn_models.MultiLayer_Perceptron, ns_parser.n_inputs, ns_parser.n_days)
    model.compile(optimizer=ns_parser.s_optimizer, loss=ns_parser.s_loss)

    # Train our model
    model.fit(stock_x, stock_y, epochs=ns_parser.n_epochs, verbose=1);
    print("")

    print(model.summary())
    print("")

    # Prediction
    yhat = model.predict(stock_train_data[-ns_parser.n_inputs:].reshape(1, ns_parser.n_inputs), verbose=0)

    # Re-scale the data back
    if (ns_parser.s_preprocessing == 'standardization') or (ns_parser.s_preprocessing == 'normalization'):
        y_pred_test_t = scaler.inverse_transform(yhat.tolist())
    else:
        y_pred_test_t = yhat

    l_pred_days = get_next_stock_market_days(last_stock_day=df_stock['5. adjusted close'].index[-1], n_next_days=ns_parser.n_days)
    df_pred = pd.Series(y_pred_test_t[0].tolist(), index=l_pred_days, name='Price')

    # Plotting
    plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=3)
    plt.title(f"MLP on {s_ticker} - {ns_parser.n_days} days prediction")
    plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1])
    plt.xlabel('Time')
    plt.ylabel('Share Price ($)')
    plt.grid(b=True, which='major', color='#666666', linestyle='-')
    plt.minorticks_on()
    plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2)
    plt.plot([df_stock.index[-1], df_pred.index[0]], [df_stock['5. adjusted close'].values[-1], df_pred.values[0]], lw=1, c='tab:green', linestyle='--')
    plt.plot(df_pred.index, df_pred, lw=2, c='tab:green')
    plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor='tab:orange', alpha=0.2)
    xmin, xmax, ymin, ymax = plt.axis()
    plt.vlines(df_stock.index[-1], ymin, ymax, colors='k', linewidth=3, linestyle='--', color='k')
    plt.show()

    # Print prediction data
    print("Predicted share price:")
    df_pred = df_pred.apply(lambda x: f"{x:.2f} $")
    print(df_pred.to_string())
    print("")
예제 #4
0
def k_nearest_neighbors(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="knn",
        description="""
            K nearest neighbors is a simple algorithm that stores all
            available cases and predict the numerical target based on a similarity measure
            (e.g. distance functions).
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-n",
        "--neighbors",
        action="store",
        dest="n_neighbors",
        type=check_positive,
        default=20,
        help="number of neighbors to use on the algorithm.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:
            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=ns_parser.n_inputs + ns_parser.n_days,
            )[-1]:
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Split training data
        stock_x, stock_y = splitTrain.split_train(
            df_stock["5. adjusted close"].values,
            ns_parser.n_inputs,
            ns_parser.n_days,
            ns_parser.n_jumps,
        )

        if not stock_x:
            print("Given the model parameters more training data is needed.\n")
            return

        # Machine Learning model
        knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors)
        knn.fit(stock_x, stock_y)

        # Prediction data
        l_predictions = knn.predict(
            df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape(
                1, -1))[0]
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        s_knn = f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker}"
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: {s_knn} - {ns_parser.n_days} days prediction")
        else:
            plt.title(f"{s_knn} - {ns_parser.n_days} days prediction")
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks([
                df_stock.index[-1],
                df_pred.index[-1] + datetime.timedelta(days=1)
            ])
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks([
                df_stock.index[-1],
                df_pred.index[-1] + datetime.timedelta(days=1)
            ])
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:
                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred,
                                    df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")
예제 #5
0
def k_nearest_neighbors(l_args, s_ticker, s_interval, df_stock):
    parser = argparse.ArgumentParser(
        prog='knn',
        description=
        """ K nearest neighbors is a simple algorithm that stores all 
                                     available cases and predict the numerical target based on a similarity measure 
                                     (e.g. distance functions). """)

    parser.add_argument('-i',
                        "--input",
                        action="store",
                        dest="n_inputs",
                        type=check_positive,
                        default=40,
                        help='number of days to use for prediction.')
    parser.add_argument('-d',
                        "--days",
                        action="store",
                        dest="n_days",
                        type=check_positive,
                        default=5,
                        help='prediction days.')
    parser.add_argument('-j',
                        "--jumps",
                        action="store",
                        dest="n_jumps",
                        type=check_positive,
                        default=1,
                        help='number of jumps in training data.')
    parser.add_argument('-n',
                        "--neighbors",
                        action="store",
                        dest="n_neighbors",
                        type=check_positive,
                        default=20,
                        help='number of neighbors to use on the algorithm.')

    (ns_parser, l_unknown_args) = parser.parse_known_args(l_args)

    if l_unknown_args:
        print(
            f"The following args couldn't be interpreted: {l_unknown_args}\n")
        return

    # Split training data
    stock_x, stock_y = splitTrain.split_train(
        df_stock['5. adjusted close'].values, ns_parser.n_inputs,
        ns_parser.n_days, ns_parser.n_jumps)

    # Machine Learning model
    knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors)
    knn.fit(stock_x, stock_y)

    # Prediction data
    l_predictions = knn.predict(
        df_stock['5. adjusted close'].values[-ns_parser.n_inputs:].reshape(
            1, -1))[0]
    l_pred_days = get_next_stock_market_days(
        last_stock_day=df_stock['5. adjusted close'].index[-1],
        n_next_days=ns_parser.n_days)
    df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price')

    # Plotting
    plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2)
    plt.title(
        f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker} - {ns_parser.n_days} days prediction"
    )
    plt.xlim(df_stock.index[0],
             get_next_stock_market_days(df_pred.index[-1], 1)[-1])
    plt.xlabel('Time')
    plt.ylabel('Share Price ($)')
    plt.grid(b=True, which='major', color='#666666', linestyle='-')
    plt.minorticks_on()
    plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2)
    plt.plot([df_stock.index[-1], df_pred.index[0]],
             [df_stock['5. adjusted close'].values[-1], df_pred.values[0]],
             lw=1,
             c='tab:green',
             linestyle='--')
    plt.plot(df_pred.index, df_pred, lw=2, c='tab:green')
    plt.axvspan(df_stock.index[-1],
                df_pred.index[-1],
                facecolor='tab:orange',
                alpha=0.2)
    xmin, xmax, ymin, ymax = plt.axis()
    plt.vlines(df_stock.index[-1],
               ymin,
               ymax,
               linewidth=1,
               linestyle='--',
               color='k')
    plt.show()

    # Print prediction data
    print("Predicted share price:")
    df_pred = df_pred.apply(lambda x: f"{x:.2f} $")
    print(df_pred.to_string())
    print("")
예제 #6
0
def regression(l_args, s_ticker, df_stock, polynomial):
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="regression",
        description="""
            Regression attempts to model the relationship between
            two variables by fitting a linear/quadratic/cubic/other equation to
            observed data. One variable is considered to be an explanatory variable,
            and the other is considered to be a dependent variable.
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    if polynomial == USER_INPUT:
        parser.add_argument(
            "-p",
            "--polynomial",
            action="store",
            dest="n_polynomial",
            type=check_positive,
            required=True,
            help="polynomial associated with regression.",
        )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:
            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=ns_parser.n_inputs + ns_parser.n_days,
            )[-1]:
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        # Split training data
        stock_x, stock_y = splitTrain.split_train(
            df_stock["5. adjusted close"].values,
            ns_parser.n_inputs,
            ns_parser.n_days,
            ns_parser.n_jumps,
        )

        if not stock_x:
            print("Given the model parameters more training data is needed.\n")
            return

        # Machine Learning model
        if polynomial == LINEAR:
            model = linear_model.LinearRegression(n_jobs=-1)
        else:
            if polynomial == USER_INPUT:
                polynomial = ns_parser.n_polynomial
            model = pipeline.make_pipeline(
                preprocessing.PolynomialFeatures(polynomial),
                linear_model.Ridge())

        model.fit(stock_x, stock_y)
        l_predictions = model.predict(
            df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape(
                1, -1))[0]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(df_future.index,
                        df_future["5. adjusted close"],
                        c="tab:blue",
                        lw=3)
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    df_stock["5. adjusted close"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock.index[-1], df_pred.index[0]],
                [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)
            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock.index[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(df_stock.index[-1],
                     df_pred.index[-1] + datetime.timedelta(days=1))
            plt.xticks(
                [
                    df_stock.index[-1],
                    df_pred.index[-1] + datetime.timedelta(days=1)
                ],
                visible=True,
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)

        else:
            # Print prediction data
            print_pretty_prediction(df_pred,
                                    df_stock["5. adjusted close"].values[-1])
        print("")

    except SystemExit:
        print("")
    except Exception as e:
        print(e)
        print("")
예제 #7
0
def k_nearest_neighbors(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(
        prog="knn",
        description="""
            K nearest neighbors is a simple algorithm that stores all
            available cases and predict the numerical target based on a similarity measure
            (e.g. distance functions).
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-n",
        "--neighbors",
        action="store",
        dest="n_neighbors",
        type=check_positive,
        default=20,
        help="number of neighbors to use on the algorithm.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)

        # Split training data
        stock_x, stock_y = splitTrain.split_train(
            df_stock["5. adjusted close"].values,
            ns_parser.n_inputs,
            ns_parser.n_days,
            ns_parser.n_jumps,
        )

        # Machine Learning model
        knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors)
        knn.fit(stock_x, stock_y)

        # Prediction data
        l_predictions = knn.predict(
            df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape(
                1, -1))[0]
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure()
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        plt.title(
            f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker} - {ns_parser.n_days} days prediction"
        )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")
        plt.ion()
        plt.show()

        # Print prediction data
        print_pretty_prediction(df_pred,
                                df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")
예제 #8
0
def regression(l_args, s_ticker, df_stock, polynomial):
    parser = argparse.ArgumentParser(
        prog="regression",
        description="""
            Regression attempts to model the relationship between
            two variables by fitting a linear/quadratic/cubic/other equation to
            observed data. One variable is considered to be an explanatory variable,
            and the other is considered to be a dependent variable.
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )

    if polynomial == USER_INPUT:
        parser.add_argument(
            "-p",
            "--polynomial",
            action="store",
            dest="n_polynomial",
            type=check_positive,
            required=True,
            help="polynomial associated with regression.",
        )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)

        # Split training data
        stock_x, stock_y = splitTrain.split_train(
            df_stock["5. adjusted close"].values,
            ns_parser.n_inputs,
            ns_parser.n_days,
            ns_parser.n_jumps,
        )

        # Machine Learning model
        if polynomial == LINEAR:
            model = linear_model.LinearRegression(n_jobs=-1)
        else:
            if polynomial == USER_INPUT:
                polynomial = ns_parser.n_polynomial
            model = pipeline.make_pipeline(
                preprocessing.PolynomialFeatures(polynomial),
                linear_model.Ridge())

        model.fit(stock_x, stock_y)
        l_predictions = model.predict(
            df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape(
                1, -1))[0]

        # Prediction data
        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price")

        # Plotting
        plt.figure()
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2)
        plt.title(
            f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction"
        )
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(df_stock.index[-1],
                   ymin,
                   ymax,
                   linewidth=1,
                   linestyle="--",
                   color="k")
        plt.ion()
        plt.show()

        # Print prediction data
        print_pretty_prediction(df_pred,
                                df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")
예제 #9
0
def mlp(l_args, s_ticker, df_stock):
    parser = argparse.ArgumentParser(prog="mlp",
                                     description="""Multilayer Perceptron. """)

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use for prediction.",
    )
    parser.add_argument(
        "-e",
        "--epochs",
        action="store",
        dest="n_epochs",
        type=check_positive,
        default=200,
        help="number of training epochs.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-p",
        "--pp",
        action="store",
        dest="s_preprocessing",
        default="normalization",
        choices=["normalization", "standardization", "none"],
        help="pre-processing data.",
    )
    parser.add_argument(
        "-o",
        "--optimizer",
        action="store",
        dest="s_optimizer",
        default="adam",
        choices=[
            "adam",
            "adagrad",
            "adadelta",
            "adamax",
            "ftrl",
            "nadam",
            "optimizer",
            "rmsprop",
            "sgd",
        ],
        help="optimization technique.",
    )
    parser.add_argument(
        "-l",
        "--loss",
        action="store",
        dest="s_loss",
        default="mae",
        choices=["mae", "mape", "mse", "msle"],
        help="loss function.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, l_args)

        # Pre-process data
        if ns_parser.s_preprocessing == "standardization":
            scaler = StandardScaler()
            stock_train_data = scaler.fit_transform(
                np.array(df_stock["5. adjusted close"].values.reshape(-1, 1)))
        elif ns_parser.s_preprocessing == "normalization":
            scaler = MinMaxScaler()
            stock_train_data = scaler.fit_transform(
                np.array(df_stock["5. adjusted close"].values.reshape(-1, 1)))
        else:  # No pre-processing
            stock_train_data = np.array(
                df_stock["5. adjusted close"].values.reshape(-1, 1))

        # Split training data for the neural network
        stock_x, stock_y = splitTrain.split_train(
            stock_train_data,
            ns_parser.n_inputs,
            ns_parser.n_days,
            numJumps=ns_parser.n_jumps,
        )
        stock_x = np.array(stock_x)
        stock_x = np.reshape(stock_x, (stock_x.shape[0], stock_x.shape[1]))
        stock_y = np.array(stock_y)
        stock_y = np.reshape(stock_y, (stock_y.shape[0], stock_y.shape[1]))

        # Build Neural Network model
        model = build_neural_network_model(cfg_nn_models.MultiLayer_Perceptron,
                                           ns_parser.n_inputs,
                                           ns_parser.n_days)
        model.compile(optimizer=ns_parser.s_optimizer, loss=ns_parser.s_loss)

        # Train our model
        model.fit(stock_x, stock_y, epochs=ns_parser.n_epochs, verbose=1)
        print("")

        print(model.summary())
        print("")

        # Prediction
        yhat = model.predict(
            stock_train_data[-ns_parser.n_inputs:].reshape(
                1, ns_parser.n_inputs),
            verbose=0,
        )

        # Re-scale the data back
        if (ns_parser.s_preprocessing
                == "standardization") or (ns_parser.s_preprocessing
                                          == "normalization"):
            y_pred_test_t = scaler.inverse_transform(yhat.tolist())
        else:
            y_pred_test_t = yhat

        l_pred_days = get_next_stock_market_days(
            last_stock_day=df_stock["5. adjusted close"].index[-1],
            n_next_days=ns_parser.n_days,
        )
        df_pred = pd.Series(y_pred_test_t[0].tolist(),
                            index=l_pred_days,
                            name="Price")

        # Plotting
        plt.figure()
        plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=3)
        plt.title(f"MLP on {s_ticker} - {ns_parser.n_days} days prediction")
        plt.xlim(df_stock.index[0],
                 get_next_stock_market_days(df_pred.index[-1], 1)[-1])
        plt.xlabel("Time")
        plt.ylabel("Share Price ($)")
        plt.grid(b=True, which="major", color="#666666", linestyle="-")
        plt.minorticks_on()
        plt.grid(b=True,
                 which="minor",
                 color="#999999",
                 linestyle="-",
                 alpha=0.2)
        plt.plot(
            [df_stock.index[-1], df_pred.index[0]],
            [df_stock["5. adjusted close"].values[-1], df_pred.values[0]],
            lw=1,
            c="tab:green",
            linestyle="--",
        )
        plt.plot(df_pred.index, df_pred, lw=2, c="tab:green")
        plt.axvspan(df_stock.index[-1],
                    df_pred.index[-1],
                    facecolor="tab:orange",
                    alpha=0.2)
        _, _, ymin, ymax = plt.axis()
        plt.vlines(
            df_stock.index[-1],
            ymin,
            ymax,
            colors="k",
            linewidth=3,
            linestyle="--",
            color="k",
        )
        plt.ion()
        plt.show()

        # Print prediction data
        print_pretty_prediction(df_pred,
                                df_stock["5. adjusted close"].values[-1])
        print("")

    except Exception as e:
        print(e)
        print("")