def _preprocess_split(df_stock, ns_parser): """Preprocess and split training data. :return: (scaler, stock_train_data, stock_x, stock_y) :raises Exception: if more training data is needed.""" # Pre-process data if ns_parser.s_preprocessing == "standardization": scaler = StandardScaler() stock_train_data = scaler.fit_transform( np.array(df_stock["5. adjusted close"].values.reshape(-1, 1)) ) elif ns_parser.s_preprocessing == "normalization": scaler = MinMaxScaler() stock_train_data = scaler.fit_transform( np.array(df_stock["5. adjusted close"].values.reshape(-1, 1)) ) else: # No pre-processing stock_train_data = np.array(df_stock["5. adjusted close"].values.reshape(-1, 1)) # Split training data for the neural network stock_x, stock_y = splitTrain.split_train( stock_train_data, ns_parser.n_inputs, ns_parser.n_days, numJumps=ns_parser.n_jumps, ) if not stock_x: raise Exception("Given the model parameters more training data is needed.") stock_x = np.array(stock_x) stock_y = np.array(stock_y) return scaler, stock_train_data, stock_x, stock_y
def regression(l_args, s_ticker, s_interval, df_stock, polynomial): parser = argparse.ArgumentParser( prog='regression', description="""Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """ ) parser.add_argument('-i', "--input", action="store", dest="n_inputs", type=check_positive, default=40, help='number of days to use for prediction.') parser.add_argument('-d', "--days", action="store", dest="n_days", type=check_positive, default=5, help='prediction days.') parser.add_argument('-j', "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help='number of jumps in training data.') if polynomial == USER_INPUT: parser.add_argument('-p', "--polynomial", action="store", dest="n_polynomial", type=check_positive, required=True, help='polynomial associated with regression.') (ns_parser, l_unknown_args) = parser.parse_known_args(l_args) if l_unknown_args: print( f"The following args couldn't be interpreted: {l_unknown_args}\n") return # Split training data stock_x, stock_y = splitTrain.split_train( df_stock['5. adjusted close'].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps) # Machine Learning model if polynomial == LINEAR: model = linear_model.LinearRegression(n_jobs=-1) else: if polynomial == USER_INPUT: polynomial = ns_parser.n_polynomial model = pipeline.make_pipeline( preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge()) model.fit(stock_x, stock_y) l_predictions = model.predict( df_stock['5. adjusted close'].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock['5. adjusted close'].index[-1], n_next_days=ns_parser.n_days) df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price') # Plotting plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2) plt.title( f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel('Time') plt.ylabel('Share Price ($)') plt.grid(b=True, which='major', color='#666666', linestyle='-') plt.minorticks_on() plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2) plt.plot([df_stock.index[-1], df_pred.index[0]], [df_stock['5. adjusted close'].values[-1], df_pred.values[0]], lw=1, c='tab:green', linestyle='--') plt.plot(df_pred.index, df_pred, lw=2, c='tab:green') plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor='tab:orange', alpha=0.2) xmin, xmax, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle='--', color='k') plt.show() # Print prediction data print("Predicted share price:") df_pred = df_pred.apply(lambda x: f"{x:.2f} $") print(df_pred.to_string()) print("")
def mlp(l_args, s_ticker, s_interval, df_stock): parser = argparse.ArgumentParser(prog='mlp', description="""Multilayer Perceptron. """) parser.add_argument('-d', "--days", action="store", dest="n_days", type=check_positive, default=5, help='prediction days.') parser.add_argument('-i', "--input", action="store", dest="n_inputs", type=check_positive, default=40, help='number of days to use for prediction.') parser.add_argument('-e', "--epochs", action="store", dest="n_epochs", type=check_positive, default=200, help='number of training epochs.') parser.add_argument('-j', "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help='number of jumps in training data.') parser.add_argument('-p', "--pp", action="store", dest="s_preprocessing", default='normalization', choices=['normalization', 'standardization', 'none'], help='pre-processing data.') parser.add_argument('-o', "--optimizer", action="store", dest="s_optimizer", default='adam', choices=['adam', 'adagrad', 'adadelta', 'adamax', 'ftrl', 'nadam', 'optimizer', 'rmsprop', 'sgd'], help='optimization technique.') parser.add_argument('-l', "--loss", action="store", dest="s_loss", default='mae', choices=['mae', 'mape', 'mse', 'msle'], help='loss function.') (ns_parser, l_unknown_args) = parser.parse_known_args(l_args) if l_unknown_args: print(f"The following args couldn't be interpreted: {l_unknown_args}\n") return # Pre-process data if ns_parser.s_preprocessing == 'standardization': scaler = StandardScaler() stock_train_data = scaler.fit_transform(np.array(df_stock['5. adjusted close'].values.reshape(-1, 1))) elif ns_parser.s_preprocessing == 'normalization': scaler = MinMaxScaler() stock_train_data = scaler.fit_transform(np.array(df_stock['5. adjusted close'].values.reshape(-1, 1))) else: # No pre-processing stock_train_data = np.array(df_stock['5. adjusted close'].values.reshape(-1, 1)) # Split training data for the neural network stock_x, stock_y = splitTrain.split_train(stock_train_data, ns_parser.n_inputs, ns_parser.n_days, numJumps=ns_parser.n_jumps) stock_x = np.array(stock_x) stock_x = np.reshape(stock_x, (stock_x.shape[0], stock_x.shape[1])) stock_y = np.array(stock_y) stock_y = np.reshape(stock_y, (stock_y.shape[0], stock_y.shape[1])) # Build Neural Network model model = build_neural_network_model(cfg_nn_models.MultiLayer_Perceptron, ns_parser.n_inputs, ns_parser.n_days) model.compile(optimizer=ns_parser.s_optimizer, loss=ns_parser.s_loss) # Train our model model.fit(stock_x, stock_y, epochs=ns_parser.n_epochs, verbose=1); print("") print(model.summary()) print("") # Prediction yhat = model.predict(stock_train_data[-ns_parser.n_inputs:].reshape(1, ns_parser.n_inputs), verbose=0) # Re-scale the data back if (ns_parser.s_preprocessing == 'standardization') or (ns_parser.s_preprocessing == 'normalization'): y_pred_test_t = scaler.inverse_transform(yhat.tolist()) else: y_pred_test_t = yhat l_pred_days = get_next_stock_market_days(last_stock_day=df_stock['5. adjusted close'].index[-1], n_next_days=ns_parser.n_days) df_pred = pd.Series(y_pred_test_t[0].tolist(), index=l_pred_days, name='Price') # Plotting plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=3) plt.title(f"MLP on {s_ticker} - {ns_parser.n_days} days prediction") plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel('Time') plt.ylabel('Share Price ($)') plt.grid(b=True, which='major', color='#666666', linestyle='-') plt.minorticks_on() plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2) plt.plot([df_stock.index[-1], df_pred.index[0]], [df_stock['5. adjusted close'].values[-1], df_pred.values[0]], lw=1, c='tab:green', linestyle='--') plt.plot(df_pred.index, df_pred, lw=2, c='tab:green') plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor='tab:orange', alpha=0.2) xmin, xmax, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, colors='k', linewidth=3, linestyle='--', color='k') plt.show() # Print prediction data print("Predicted share price:") df_pred = df_pred.apply(lambda x: f"{x:.2f} $") print(df_pred.to_string()) print("")
def k_nearest_neighbors(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit(stock_x, stock_y) # Prediction data l_predictions = knn.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) s_knn = f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker}" # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: {s_knn} - {ns_parser.n_days} days prediction") else: plt.title(f"{s_knn} - {ns_parser.n_days} days prediction") plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks([ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ]) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks([ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ]) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def k_nearest_neighbors(l_args, s_ticker, s_interval, df_stock): parser = argparse.ArgumentParser( prog='knn', description= """ K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """) parser.add_argument('-i', "--input", action="store", dest="n_inputs", type=check_positive, default=40, help='number of days to use for prediction.') parser.add_argument('-d', "--days", action="store", dest="n_days", type=check_positive, default=5, help='prediction days.') parser.add_argument('-j', "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help='number of jumps in training data.') parser.add_argument('-n', "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help='number of neighbors to use on the algorithm.') (ns_parser, l_unknown_args) = parser.parse_known_args(l_args) if l_unknown_args: print( f"The following args couldn't be interpreted: {l_unknown_args}\n") return # Split training data stock_x, stock_y = splitTrain.split_train( df_stock['5. adjusted close'].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps) # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit(stock_x, stock_y) # Prediction data l_predictions = knn.predict( df_stock['5. adjusted close'].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] l_pred_days = get_next_stock_market_days( last_stock_day=df_stock['5. adjusted close'].index[-1], n_next_days=ns_parser.n_days) df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price') # Plotting plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2) plt.title( f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel('Time') plt.ylabel('Share Price ($)') plt.grid(b=True, which='major', color='#666666', linestyle='-') plt.minorticks_on() plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2) plt.plot([df_stock.index[-1], df_pred.index[0]], [df_stock['5. adjusted close'].values[-1], df_pred.values[0]], lw=1, c='tab:green', linestyle='--') plt.plot(df_pred.index, df_pred, lw=2, c='tab:green') plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor='tab:orange', alpha=0.2) xmin, xmax, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle='--', color='k') plt.show() # Print prediction data print("Predicted share price:") df_pred = df_pred.apply(lambda x: f"{x:.2f} $") print(df_pred.to_string()) print("")
def regression(l_args, s_ticker, df_stock, polynomial): parser = argparse.ArgumentParser( add_help=False, prog="regression", description=""" Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) if polynomial == USER_INPUT: parser.add_argument( "-p", "--polynomial", action="store", dest="n_polynomial", type=check_positive, required=True, help="polynomial associated with regression.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model if polynomial == LINEAR: model = linear_model.LinearRegression(n_jobs=-1) else: if polynomial == USER_INPUT: polynomial = ns_parser.n_polynomial model = pipeline.make_pipeline( preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge()) model.fit(stock_x, stock_y) l_predictions = model.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except SystemExit: print("") except Exception as e: print(e) print("")
def k_nearest_neighbors(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit(stock_x, stock_y) # Prediction data l_predictions = knn.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) plt.title( f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def regression(l_args, s_ticker, df_stock, polynomial): parser = argparse.ArgumentParser( prog="regression", description=""" Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) if polynomial == USER_INPUT: parser.add_argument( "-p", "--polynomial", action="store", dest="n_polynomial", type=check_positive, required=True, help="polynomial associated with regression.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) # Machine Learning model if polynomial == LINEAR: model = linear_model.LinearRegression(n_jobs=-1) else: if polynomial == USER_INPUT: polynomial = ns_parser.n_polynomial model = pipeline.make_pipeline( preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge()) model.fit(stock_x, stock_y) l_predictions = model.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) plt.title( f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def mlp(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser(prog="mlp", description="""Multilayer Perceptron. """) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-e", "--epochs", action="store", dest="n_epochs", type=check_positive, default=200, help="number of training epochs.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-p", "--pp", action="store", dest="s_preprocessing", default="normalization", choices=["normalization", "standardization", "none"], help="pre-processing data.", ) parser.add_argument( "-o", "--optimizer", action="store", dest="s_optimizer", default="adam", choices=[ "adam", "adagrad", "adadelta", "adamax", "ftrl", "nadam", "optimizer", "rmsprop", "sgd", ], help="optimization technique.", ) parser.add_argument( "-l", "--loss", action="store", dest="s_loss", default="mae", choices=["mae", "mape", "mse", "msle"], help="loss function.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Pre-process data if ns_parser.s_preprocessing == "standardization": scaler = StandardScaler() stock_train_data = scaler.fit_transform( np.array(df_stock["5. adjusted close"].values.reshape(-1, 1))) elif ns_parser.s_preprocessing == "normalization": scaler = MinMaxScaler() stock_train_data = scaler.fit_transform( np.array(df_stock["5. adjusted close"].values.reshape(-1, 1))) else: # No pre-processing stock_train_data = np.array( df_stock["5. adjusted close"].values.reshape(-1, 1)) # Split training data for the neural network stock_x, stock_y = splitTrain.split_train( stock_train_data, ns_parser.n_inputs, ns_parser.n_days, numJumps=ns_parser.n_jumps, ) stock_x = np.array(stock_x) stock_x = np.reshape(stock_x, (stock_x.shape[0], stock_x.shape[1])) stock_y = np.array(stock_y) stock_y = np.reshape(stock_y, (stock_y.shape[0], stock_y.shape[1])) # Build Neural Network model model = build_neural_network_model(cfg_nn_models.MultiLayer_Perceptron, ns_parser.n_inputs, ns_parser.n_days) model.compile(optimizer=ns_parser.s_optimizer, loss=ns_parser.s_loss) # Train our model model.fit(stock_x, stock_y, epochs=ns_parser.n_epochs, verbose=1) print("") print(model.summary()) print("") # Prediction yhat = model.predict( stock_train_data[-ns_parser.n_inputs:].reshape( 1, ns_parser.n_inputs), verbose=0, ) # Re-scale the data back if (ns_parser.s_preprocessing == "standardization") or (ns_parser.s_preprocessing == "normalization"): y_pred_test_t = scaler.inverse_transform(yhat.tolist()) else: y_pred_test_t = yhat l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(y_pred_test_t[0].tolist(), index=l_pred_days, name="Price") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=3) plt.title(f"MLP on {s_ticker} - {ns_parser.n_days} days prediction") plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, colors="k", linewidth=3, linestyle="--", color="k", ) plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")