def _setup_backtesting(df_stock, ns_parser): """Set up backtesting if enabled :return: (df_stock, df_future), where df_future is None if s_end_date is not set. :raises Exception: if configuration is invalid""" df_future = None if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: raise Exception( "Backtesting not allowed, since End Date is older than Start Date of historical data" ) if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: raise Exception( "Backtesting not allowed, since End Date is too close to Start Date to train model" ) future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days ) if future_index[-1] > datetime.datetime.now(): raise Exception( "Backtesting not allowed, since End Date + Prediction days is in the future" ) df_future = df_stock[future_index[0] : future_index[-1]] df_stock = df_stock[: ns_parser.s_end_date] return df_stock, df_future
def _rescale_data(df_stock, ns_parser, scaler, yhat, idx_loop): """Re-scale the data back and return the prediction dataframe. """ if (ns_parser.s_preprocessing == "standardization") or ( ns_parser.s_preprocessing == "normalization" ): y_pred_test_t = scaler.inverse_transform(yhat.tolist()) else: y_pred_test_t = yhat l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) column_name = f"Price [{idx_loop+1}]" df_pred = pd.Series(y_pred_test_t[0].tolist(), index=l_pred_days, name=column_name) return df_pred
def arima(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=["aic", "aicc", "bic", "hqic", "oob"], help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", type=str, help="arima model order (p,d,q) in format: pdq.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Machine Learning model if ns_parser.s_order: t_order = tuple([int(ord) for ord in list(ns_parser.s_order)]) model = ARIMA(df_stock["5. adjusted close"].values, order=t_order).fit() l_predictions = model.predict( start=len(df_stock["5. adjusted close"]) + 1, end=len(df_stock["5. adjusted close"]) + ns_parser.n_days, ) else: if ns_parser.b_seasonal: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=True, m=5, information_criteria=ns_parser.s_ic, ) else: model = pmdarima.auto_arima( df_stock["5. adjusted close"].values, error_action="ignore", seasonal=False, information_criteria=ns_parser.s_ic, ) l_predictions = model.predict(n_periods=ns_parser.n_days) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if ns_parser.b_results: print(model.summary()) print("") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) if ns_parser.s_order: plt.title( f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1] ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2 ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k" ) plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def insider_activity( stock: pd.DataFrame, ticker: str, start: str, interval: str, num: int, raw: bool, export: str = "", ): """Display insider activity. [Source: Business Insider] Parameters ---------- stock : pd.DataFrame Stock dataframe ticker : str Due diligence ticker symbol start : str Start date of the stock data interval : str Stock data interval num : int Number of latest days of inside activity raw: bool Print to console export : str Export dataframe data to csv,json,xlsx file """ df_ins = businessinsider_model.get_insider_activity(ticker) if start: df_insider = df_ins[start:].copy() # type: ignore else: df_insider = df_ins.copy() if raw: df_insider.index = pd.to_datetime(df_insider.index).date print( tabulate( df_insider.sort_index(ascending=False) .head(n=num) .applymap(lambda x: x.replace(".00", "").replace(",", "")), headers=df_insider.columns, showindex=True, tablefmt="fancy_grid", ) ) else: _, ax = plt.subplots() if interval == "1440min": plt.plot(stock.index, stock["Adj Close"].values, lw=3) else: # Intraday plt.plot(stock.index, stock["Close"].values, lw=3) plt.title(f"{ticker.upper()} (Time Series) and Price Target") plt.xlabel("Time") plt.ylabel("Share Price") df_insider["Trade"] = df_insider.apply( lambda row: (1, -1)[row.Type == "Sell"] * float(row["Shares Traded"].replace(",", "")), axis=1, ) plt.xlim(df_insider.index[0], stock.index[-1]) min_price, max_price = ax.get_ylim() price_range = max_price - min_price shares_range = ( df_insider[df_insider["Type"] == "Buy"] .groupby(by=["Date"]) .sum()["Trade"] .max() - df_insider[df_insider["Type"] == "Sell"] .groupby(by=["Date"]) .sum()["Trade"] .min() ) n_proportion = price_range / shares_range for ind in ( df_insider[df_insider["Type"] == "Sell"].groupby(by=["Date"]).sum().index ): if ind in stock.index: ind_dt = ind else: ind_dt = get_next_stock_market_days(ind, 1)[0] n_stock_price = 0 if interval == "1440min": n_stock_price = stock["Adj Close"][ind_dt] else: n_stock_price = stock["Close"][ind_dt] plt.vlines( x=ind_dt, ymin=n_stock_price + n_proportion * float( df_insider[df_insider["Type"] == "Sell"] .groupby(by=["Date"]) .sum()["Trade"][ind] ), ymax=n_stock_price, colors="red", ls="-", lw=5, ) for ind in ( df_insider[df_insider["Type"] == "Buy"].groupby(by=["Date"]).sum().index ): if ind in stock.index: ind_dt = ind else: ind_dt = get_next_stock_market_days(ind, 1)[0] n_stock_price = 0 if interval == "1440min": n_stock_price = stock["Adj Close"][ind_dt] else: n_stock_price = stock["Close"][ind_dt] plt.vlines( x=ind_dt, ymin=n_stock_price, ymax=n_stock_price + n_proportion * float( df_insider[df_insider["Type"] == "Buy"] .groupby(by=["Date"]) .sum()["Trade"][ind] ), colors="green", ls="-", lw=5, ) plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.gcf().autofmt_xdate() if gtff.USE_ION: plt.ion() plt.show() print("") export_data( export, os.path.dirname(os.path.abspath(__file__)), "act", df_insider, )
def call_arima(self, other_args: List[str]): """Process arima command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, prog="arima", description=""" In statistics and econometrics, and in particular in time series analysis, an autoregressive integrated moving average (ARIMA) model is a generalization of an autoregressive moving average (ARMA) model. Both of these models are fitted to time series data either to better understand the data or to predict future points in the series (forecasting). ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is the order (number of time lags) of the autoregressive model, d is the degree of differencing (the number of times the data have had past values subtracted), and q is the order of the moving-average model. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-i", "--ic", action="store", dest="s_ic", type=str, default="aic", choices=arima_model.ICS, help="information criteria.", ) parser.add_argument( "-s", "--seasonal", action="store_true", default=False, dest="b_seasonal", help="Use weekly seasonal data.", ) parser.add_argument( "-o", "--order", action="store", dest="s_order", default="", type=str, help="arima model order (p,d,q) in format: p,d,q.", ) parser.add_argument( "-r", "--results", action="store_true", dest="b_results", default=False, help="results about ARIMA summary flag.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) ns_parser = parse_known_args_and_warn( parser, other_args, export_allowed=EXPORT_ONLY_FIGURES_ALLOWED ) if ns_parser: # BACKTESTING CHECK if ns_parser.s_end_date: if ns_parser.s_end_date < self.data.index[0]: console.print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=self.data.index[0], n_next_days=5 + ns_parser.n_days, )[-1]: console.print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) arima_view.display_arima( dataset=self.coin, values=self.data[self.target], arima_order=ns_parser.s_order, n_predict=ns_parser.n_days, seasonal=ns_parser.b_seasonal, ic=ns_parser.s_ic, results=ns_parser.b_results, s_end_date=ns_parser.s_end_date, export=ns_parser.export, time_res=self.resolution, )
def call_ets(self, other_args: List[str]): """Process ets command""" parser = argparse.ArgumentParser( add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter, prog="ets", description=""" Exponential Smoothing, see https://otexts.com/fpp2/taxonomy.html Trend='N', Seasonal='N': Simple Exponential Smoothing Trend='N', Seasonal='A': Exponential Smoothing Trend='N', Seasonal='M': Exponential Smoothing Trend='A', Seasonal='N': Holt’s linear method Trend='A', Seasonal='A': Additive Holt-Winters’ method Trend='A', Seasonal='M': Multiplicative Holt-Winters’ method Trend='Ad', Seasonal='N': Additive damped trend method Trend='Ad', Seasonal='A': Exponential Smoothing Trend='Ad', Seasonal='M': Holt-Winters’ damped method Trend component: N: None, A: Additive, Ad: Additive Damped Seasonality component: N: None, A: Additive, M: Multiplicative """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-t", "--trend", action="store", dest="trend", choices=ets_model.TRENDS, default="N", help="Trend component: N: None, A: Additive, Ad: Additive Damped.", ) parser.add_argument( "-s", "--seasonal", action="store", dest="seasonal", choices=ets_model.SEASONS, default="N", help="Seasonality component: N: None, A: Additive, M: Multiplicative.", ) parser.add_argument( "-p", "--periods", action="store", dest="seasonal_periods", type=check_positive, default=5, help="Seasonal periods.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) ns_parser = parse_known_args_and_warn( parser, other_args, export_allowed=EXPORT_ONLY_FIGURES_ALLOWED ) if ns_parser: if ns_parser.s_end_date: if ns_parser.s_end_date < self.data.index[0]: console.print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=self.data.index[0], n_next_days=5 + ns_parser.n_days, )[-1]: console.print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) ets_view.display_exponential_smoothing( ticker=self.coin, values=self.data[self.target], n_predict=ns_parser.n_days, trend=ns_parser.trend, seasonal=ns_parser.seasonal, seasonal_periods=ns_parser.seasonal_periods, s_end_date=ns_parser.s_end_date, export=ns_parser.export, time_res=self.resolution, )
def get_knn_model_data( data: Union[pd.Series, pd.DataFrame], n_input_days: int, n_predict_days: int, n_neighbors: int, test_size: float, end_date: str, no_shuffle: bool, ) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, np.ndarray, Any]: """Perform knn model fitting and predicting on data Parameters ---------- data : Union[pd.Series, pd.DataFrame] Data to fit n_input_days : int Length of input series n_predict_days : int Number of days to predict n_neighbors : int Number of neighbors for nn test_size : float Fraction of data for testing end_date : str End date for backtesting no_shuffle : bool Flag to not shuffle train/test data Returns ------- pd.DataFrame: Dataframe of preditions np.array: Array of validation predictions np.array: Array of validation data np.array: Array of validation dates Any: Scaler for processing data """ ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(data, n_input_days, n_predict_days, test_size, end_date, no_shuffle) if is_error: return pd.DataFrame(), np.array(0), np.array(0), np.array(0), None future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=n_predict_days) console.print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation") # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors) knn.fit( X_train.reshape(X_train.shape[0], X_train.shape[1]), y_train.reshape(y_train.shape[0], y_train.shape[1]), ) preds = knn.predict(X_valid.reshape(X_valid.shape[0], X_valid.shape[1])) forecast_data = knn.predict(forecast_data_input.reshape(1, -1)) forecast_data = scaler.inverse_transform(forecast_data.reshape(1, -1)) forecast_data_df = pd.DataFrame(list(forecast_data.T), index=future_dates) return forecast_data_df, preds, y_valid, y_dates_valid, scaler
def k_nearest_neighbors(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit(stock_x, stock_y) # Prediction data l_predictions = knn.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) s_knn = f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker}" # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: {s_knn} - {ns_parser.n_days} days prediction") else: plt.title(f"{s_knn} - {ns_parser.n_days} days prediction") plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks([ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ]) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks([ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ]) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def regression(l_args, s_ticker, df_stock, polynomial): parser = argparse.ArgumentParser( add_help=False, prog="regression", description=""" Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) if polynomial == USER_INPUT: parser.add_argument( "-p", "--polynomial", action="store", dest="n_polynomial", type=check_positive, required=True, help="polynomial associated with regression.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model if polynomial == LINEAR: model = linear_model.LinearRegression(n_jobs=-1) else: if polynomial == USER_INPUT: polynomial = ns_parser.n_polynomial model = pipeline.make_pipeline( preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge()) model.fit(stock_x, stock_y) l_predictions = model.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except SystemExit: print("") except Exception as e: print(e) print("")
def exponential_smoothing(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="ets", description=""" Exponential Smoothing, see https://otexts.com/fpp2/taxonomy.html Trend='N', Seasonal='N': Simple Exponential Smoothing Trend='N', Seasonal='A': Exponential Smoothing Trend='N', Seasonal='M': Exponential Smoothing Trend='A', Seasonal='N': Holt’s linear method Trend='A', Seasonal='A': Additive Holt-Winters’ method Trend='A', Seasonal='M': Multiplicative Holt-Winters’ method Trend='Ad', Seasonal='N': Additive damped trend method Trend='Ad', Seasonal='A': Exponential Smoothing Trend='Ad', Seasonal='M': Holt-Winters’ damped method Trend component: N: None, A: Additive, Ad: Additive Damped Seasonality component: N: None, A: Additive, M: Multiplicative """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-t", "--trend", action="store", dest="trend", type=check_valid_trend, default="N", help="Trend component: N: None, A: Additive, Ad: Additive Damped.", ) parser.add_argument( "-s", "--seasonal", action="store", dest="seasonal", type=check_valid_seasonal, default="N", help="Seasonality component: N: None, A: Additive, M: Multiplicative.", ) parser.add_argument( "-p", "--periods", action="store", dest="seasonal_periods", type=check_positive, default=5, help="Seasonal periods.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return model, title = get_exponential_smoothing_model( df_stock["5. adjusted close"].values, ns_parser.trend, ns_parser.seasonal, ns_parser.seasonal_periods, ) if model.mle_retvals.success: forecast = model.forecast(ns_parser.n_days) l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(forecast, index=l_pred_days, name="Price") if ~np.isnan(forecast).any(): print(f"\n{title}") print("\nFit model parameters:") for key, value in model.params.items(): print(f"{key} {' '*(18-len(key))}: {value}") print("\nAssess fit model:") print(f"AIC: {round(model.aic, 2)}") print(f"BIC: {round(model.bic, 2)}") print(f"SSE: {round(model.sse, 2)}\n") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) plt.title(f"{title} on {s_ticker}") plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid( b=True, which="minor", color="#999999", linestyle="-", alpha=0.2 ) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2, ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k", ) plt.ion() plt.show() # Print prediction data print_pretty_prediction( df_pred, df_stock["5. adjusted close"].values[-1] ) print("") else: print("RuntimeWarning: invalid value encountered in double_scalars.") else: print("ConvergenceWarning: Optimization failed to converge.") except Exception as e: print(e) print("")
def display_arima( dataset: str, values: Union[pd.DataFrame, pd.Series], arima_order: str, n_predict: int, seasonal: bool, ic: str, results: bool, s_end_date: str = "", export: str = "", time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """View fit ARIMA model Parameters ---------- dataset : str String indicating dataset (for plot title) values : Union[pd.DataFrame, pd.Series] Data to fit arima_order : str String of ARIMA params in form "p,q,d" n_predict : int Days to predict seasonal : bool Flag to use seasonal model ic : str Information Criteria for model evaluation results : bool Flag to display model summary s_end_date : str, optional Specified end date for backtesting comparisons export : str, optional Format to export image time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (1 axis is expected in the list), by default None """ if arima_order: t_order = tuple(int(ord) for ord in arima_order.split(",")) if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict) else: future_index = pd.date_range(s_end_date, periods=n_predict + 1, freq=time_res)[1:] if future_index[-1] > datetime.datetime.now(): console.print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0]:future_index[-1]] # noqa: E203 values = values[:s_end_date] # type: ignore l_predictions, model = arima_model.get_arima_model(values, arima_order, n_predict, seasonal, ic) # Prediction data if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range(values.index[-1], periods=n_predict + 1, freq=time_res)[1:] df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if results: console.print(model.summary()) console.print("") # This plot has 1 axes if external_axes is None: _, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) else: if (not s_end_date and len(external_axes) != 1) or (s_end_date and len(external_axes) != 3): logger.error( "Expected list of 1 axis item or 3 axis items when backtesting" ) console.print("[red]Expected list of 1 axis item " + "or 3 axis items when backtesting./n[/red]") return ax = external_axes[0] ax.plot(values.index, values) # pylint:disable=no-member if arima_order: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {str(t_order)} on {dataset} - {n_predict} step prediction" ) else: ax.set_title( f"ARIMA {str(t_order)} on {dataset} - {n_predict} step prediction" ) else: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {model.order} on {dataset} - {n_predict} step prediction" ) else: plt.title( f"ARIMA {model.order} on {dataset} - {n_predict} step prediction" ) ax.set_xlim(values.index[0], l_pred_days[-1]) ax.set_ylabel("Value") ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.up_color, linestyle="--", ) ax.plot(df_pred.index, df_pred, color=theme.up_color) ax.axvspan(values.index[-1], df_pred.index[-1], alpha=0.2) _, _, ymin, ymax = plt.axis() ax.vlines(values.index[-1], ymin, ymax, linestyle="--") # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) theme.style_primary_axis(ax) if external_axes is None: theme.visualize_output() # BACKTESTING if s_end_date: # This plot has 1 axes if external_axes is None: _, axes = plt.subplots(2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI) (ax2, ax3) = axes else: if len(external_axes) != 3: logger.error("Expected list of one axis item.") console.print("[red]Expected list of 1 axis item./n[/red]") return (_, ax2, ax3) = external_axes ax2.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax2.plot(df_pred.index, df_pred) ax2.scatter( df_future.index, df_future, color=theme.up_color, ) ax2.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) ax2.scatter(df_pred.index, df_pred) ax2.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], linestyle="--", ) ax2.set_title("BACKTESTING: Values") ax2.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax2.set_ylabel("Value") ax2.legend(["Real data", "Prediction data"]) theme.style_primary_axis(ax2) ax3.axhline(y=0, linestyle="--") ax3.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.set_title("BACKTESTING: % Error") ax3.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], ls="--", color=theme.down_color, ) ax3.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax3.set_ylabel("Prediction Error (%)") theme.style_primary_axis(ax3) if external_axes is None: theme.visualize_output() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future.values if gtff.USE_COLOR: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Prediction"].astype(float) df_pred["Dif"] = 100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real print_rich_table( df_pred, headers=["Predicted", "Actual", "% Difference"], index_name="Date", show_index=True, title="ARIMA Model", ) else: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Predicted"].astype(float) df_pred["Dif"] = 100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real print_rich_table( df_pred, headers=["Date", "Predicted", "Actual", "% Difference"], show_index=True, title="ARIMA Model", ) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "arima") console.print("")
def exponential_smoothing(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="ets", description=""" Exponential Smoothing, see https://otexts.com/fpp2/taxonomy.html Trend='N', Seasonal='N': Simple Exponential Smoothing Trend='N', Seasonal='A': Exponential Smoothing Trend='N', Seasonal='M': Exponential Smoothing Trend='A', Seasonal='N': Holt’s linear method Trend='A', Seasonal='A': Additive Holt-Winters’ method Trend='A', Seasonal='M': Multiplicative Holt-Winters’ method Trend='Ad', Seasonal='N': Additive damped trend method Trend='Ad', Seasonal='A': Exponential Smoothing Trend='Ad', Seasonal='M': Holt-Winters’ damped method Trend component: N: None, A: Additive, Ad: Additive Damped Seasonality component: N: None, A: Additive, M: Multiplicative """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-t", "--trend", action="store", dest="trend", type=check_valid_trend, default="N", help="Trend component: N: None, A: Additive, Ad: Additive Damped.", ) parser.add_argument( "-s", "--seasonal", action="store", dest="seasonal", type=check_valid_seasonal, default="N", help="Seasonality component: N: None, A: Additive, M: Multiplicative.", ) parser.add_argument( "-p", "--periods", action="store", dest="seasonal_periods", type=check_positive, default=5, help="Seasonal periods.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if (ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days)[-1]): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Get ETS model model, title = get_exponential_smoothing_model( df_stock["5. adjusted close"].values, ns_parser.trend, ns_parser.seasonal, ns_parser.seasonal_periods, ) if model.mle_retvals.success: forecast = model.forecast(ns_parser.n_days) l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(forecast, index=l_pred_days, name="Price") if ~np.isnan(forecast).any(): print(f"\n{title}") print("\nFit model parameters:") for key, value in model.params.items(): print(f"{key} {' '*(18-len(key))}: {value}") print("\nAssess fit model:") print(f"AIC: {round(model.aic, 2)}") print(f"BIC: {round(model.bic, 2)}") print(f"SSE: {round(model.sse, 2)}\n") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title(f"BACKTESTING: {title} on {s_ticker}") else: plt.title(f"{title} on {s_ticker}") plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [ df_stock["5. adjusted close"].values[-1], df_pred.values[0] ], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2, ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k", ) # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter( df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3, ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [ df_stock["5. adjusted close"].values[-1], df_pred.values[0] ], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim( df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]" ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim( df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction( df_pred, df_stock["5. adjusted close"].values[-1]) print("") else: print( "RuntimeWarning: invalid value encountered in double_scalars." ) else: print("ConvergenceWarning: Optimization failed to converge.") except Exception as e: print(e) print("")
def display_exponential_smoothing( ticker: str, values: Union[pd.DataFrame, pd.Series], n_predict: int, trend: str = "N", seasonal: str = "N", seasonal_periods: int = 5, s_end_date: str = "", export: str = "", time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """Perform exponential smoothing Parameters ---------- ticker : str Dataset being smoothed values : Union[pd.DataFrame, pd.Series] Raw data n_predict : int Days to predict trend : str, optional Trend variable, by default "N" seasonal : str, optional Seasonal variable, by default "N" seasonal_periods : int, optional Number of seasonal periods, by default 5 s_end_date : str, optional End date for backtesting, by default "" export : str, optional Format to export data, by default "" time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (1 axis is expected in the list), by default None """ if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict ) else: future_index = pd.date_range( s_end_date, periods=n_predict + 1, freq=time_res )[1:] if future_index[-1] > datetime.datetime.now(): console.print( "Backtesting not allowed," + " since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0] : future_index[-1]] # noqa: E203 values = values[:s_end_date] # type: ignore # Get ETS model model, title, forecast = ets_model.get_exponential_smoothing_model( values, trend, seasonal, seasonal_periods, n_predict ) if not forecast: console.print("No forecast made. Model did not converge.\n") return if np.isnan(forecast).any(): console.print("Model predicted NaN values. Runtime Error.\n") return if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range( values.index[-1], periods=n_predict + 1, freq=time_res )[1:] df_pred = pd.Series(forecast, index=l_pred_days, name="Price") console.print(f"\n{title}") console.print("\nFit model parameters:") for key, value in model.params.items(): console.print(f"{key} {' '*(18-len(key))}: {value}") console.print("\nAssess fit model:") console.print(f"AIC: {round(model.aic, 2)}") console.print(f"BIC: {round(model.bic, 2)}") console.print(f"SSE: {round(model.sse, 2)}\n") # Plotting # This plot has 1 axes if external_axes is None: _, ax1 = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) else: if (not s_end_date and len(external_axes) != 1) or ( s_end_date and len(external_axes) != 3 ): console.print( "[red]Expected list of 1 axis item " + "or 3 axis items when backtesting./n[/red]" ) return ax1 = external_axes[0] ax1.plot(values.index, values.values) # BACKTESTING if s_end_date: ax1.set_title(f"BACKTESTING: {title} on {ticker}", fontsize=12) else: ax1.set_title(f"{title} on {ticker}", fontsize=12) ax1.set_xlim( values.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) ax1.set_ylabel("Value") ax1.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.down_color, linestyle="--", ) ax1.plot(df_pred.index, df_pred, color=theme.down_color) ax1.axvspan( values.index[-1], df_pred.index[-1], facecolor=theme.down_color, alpha=0.2, ) _, _, ymin, ymax = plt.axis() ax1.vlines( values.index[-1], ymin, ymax, linestyle="--", color=theme.get_colors(reverse=True)[0], ) # BACKTESTING if s_end_date: ax1.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax1.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) theme.style_primary_axis(ax1) if external_axes is None: theme.visualize_output() # BACKTESTING if s_end_date: # This plot has 1 axes if external_axes is None: _, axes = plt.subplots( 2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI ) (ax2, ax3) = axes else: if len(external_axes) != 3: console.print("[red]Expected list of 1 axis item./n[/red]") return (_, ax2, ax3) = external_axes ax2.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax2.plot(df_pred.index, df_pred) ax2.scatter( df_future.index, df_future, color=theme.up_color, ) ax2.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) ax2.scatter(df_pred.index, df_pred) ax2.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], linestyle="--", ) ax2.set_title("BACKTESTING: Values") ax2.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax2.set_ylabel("Value") ax2.legend(["Real data", "Prediction data"]) theme.style_primary_axis(ax2) ax3.axhline(y=0, linestyle="--") ax3.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, ) ax3.set_title("BACKTESTING: % Error") ax3.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], ls="--", color=theme.down_color, ) ax3.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax3.set_ylabel("Prediction Error (%)") theme.style_primary_axis(ax3) if external_axes is None: theme.visualize_output() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if gtff.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply( lambda_price_prediction_backtesting_color, axis=1 ).to_string() ) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "ets")
def k_nearest_neighbors(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["5. adjusted close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit(stock_x, stock_y) # Prediction data l_predictions = knn.predict( df_stock["5. adjusted close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure() plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) plt.title( f"{ns_parser.n_neighbors}-Nearest Neighbors on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def simple_moving_average(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( add_help=False, prog="sma", description=""" Moving Averages are used to smooth the data in an array to help eliminate noise and identify trends. The Simple Moving Average is literally the simplest form of a moving average. Each output value is the average of the previous n values. In a Simple Moving Average, each value in the time period carries equal weight, and values outside of the time period are not included in the average. This makes it less responsive to recent changes in the data, which can be useful for filtering out those changes. """, ) parser.add_argument( "-l", "--length", action="store", dest="n_length", type=check_positive, default=20, help="length of SMA window.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ( ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days )[-1] ): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days ) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0] : future_index[-1]] df_stock = df_stock[: ns_parser.s_end_date] # Prediction data l_predictions = list() for pred_day in range(ns_parser.n_days): if pred_day < ns_parser.n_length: l_ma_stock = df_stock["5. adjusted close"].values[ -ns_parser.n_length + pred_day : ] else: l_ma_stock = list() l_predictions.append(np.mean(np.append(l_ma_stock, l_predictions))) l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: {ns_parser.n_length} Moving Average on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"{ns_parser.n_length} Moving Average on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim( df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1] ) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) df_ma = df_stock["5. adjusted close"].rolling(window=ns_parser.n_length).mean() plt.plot(df_ma.index, df_ma, lw=2, linestyle="--", c="tab:orange") plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan( df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2 ) _, _, ymin, ymax = plt.axis() plt.vlines( df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k" ) # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter( df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3 ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["5. adjusted close"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title("BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply( price_prediction_backtesting_color, axis=1 ).to_string() ) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")
def fbprophet(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( prog="fbprophet", description=""" Facebook Prophet is a forecasting procedure that is fast and provides completely automated forecasts that can be tuned by hand by data scientists and analysts. It was developed by Facebook's data science team and is open source. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) df_stock = df_stock.sort_index(ascending=True) df_stock.reset_index(level=0, inplace=True) df_stock = df_stock[["date", "5. adjusted close"]] df_stock = df_stock.rename(columns={ "date": "ds", "5. adjusted close": "y" }) df_stock["ds"] = pd.to_datetime(df_stock["ds"]) model = Prophet(yearly_seasonality=False, daily_seasonality=False) model.fit(df_stock) l_pred_days = get_next_stock_market_days( last_stock_day=pd.to_datetime(df_stock["ds"].values[-1]), n_next_days=ns_parser.n_days, ) close_prices = model.make_future_dataframe(periods=ns_parser.n_days) forecast = model.predict(close_prices) _, ax = plt.subplots() model.plot(forecast, ax=ax, xlabel="Time", ylabel="Share Price ($)") _, _, ymin, ymax = ax.axis() ax.vlines( df_stock["ds"].values[-1], ymin, ymax, linewidth=2, linestyle="--", color="k", ) plt.axvspan( df_stock["ds"].values[-1], l_pred_days[-1], facecolor="tab:orange", alpha=0.2, ) plt.ylim(ymin, ymax) plt.xlim(df_stock["ds"].values[0], get_next_stock_market_days(l_pred_days[-1], 1)[-1]) plt.title( f"Fb Prophet on {s_ticker} - {ns_parser.n_days} days prediction") plt.ion() plt.show() print("") print("Predicted share price:") df_pred = forecast["yhat"][-ns_parser.n_days:].apply( lambda x: f"{x:.2f} $") df_pred.index = l_pred_days print(df_pred.to_string()) print("") except Exception as e: print(e) print("")
def conv1d_model( data: Union[pd.Series, pd.DataFrame], n_input: int, n_predict: int, learning_rate: float, epochs: int, batch_size: int, test_size: float, n_loops: int, no_shuffle: bool, ) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, np.ndarray, Any]: """Train Conv1D model on data based on config params Parameters ---------- data : Union[pd.Series, pd.DataFrame] Data to fit n_input : int Length of input sequence n_predict : int Length of output to predict learning_rate : float Learning rate for optimizer epochs : int Number of training epochs batch_size : int Model batch size test_size : float Fraction of test size n_loops : int Number of loops to train model no_shuffle : bool Flag to not shuffle data Returns ------- pd.DataFrame Dataframe of predictions np.array Array of validation predictions np.array Array of validation data np.array Array of validation x label data Any Scaler used for data """ ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(data, n_input, n_predict, test_size, "", no_shuffle) if is_error: return pd.DataFrame(), np.array(0), np.array(0), np.array(0), None console.print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation. Model will run {n_loops} loops" ) future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=n_predict) preds = np.zeros((n_loops, X_valid.shape[0], n_predict)) forecast_data = np.zeros((n_loops, n_predict)) for i in range(n_loops): # Build Neural Network model model = build_neural_network_model( cfg_nn_models.Convolutional, n_input, n_predict, ) model.compile( optimizer=optimizers[cfg_nn_models.Optimizer]( learning_rate=learning_rate), loss=cfg_nn_models.Loss, ) model.fit( X_train.reshape(X_train.shape[0], X_train.shape[1], 1), y_train, epochs=epochs, verbose=True, batch_size=batch_size, validation_data=( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1), y_valid, ), callbacks=[es], ) preds[i] = model.predict( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1)).reshape(X_valid.shape[0], n_predict) forecast_data[i] = forecast(forecast_data_input, future_dates, model, scaler).values.flat forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates) return forecast_data_df, preds, y_valid, y_dates_valid, scaler
def display_mc_forecast( data: Union[pd.Series, np.ndarray], n_future: int, n_sims: int, use_log=True, export: str = "", time_res: str = "", ): """Display monte carlo forecasting Parameters ---------- data : Union[pd.Series, np.array] Data to forecast n_future : int Number of days to forecast n_sims : int Number of simulations to run use_log : bool, optional Flag to use lognormal, by default True export: str Format to export data time_res : str Resolution for data, allowing for predicting outside of standard market days """ predicted_values = mc_model.get_mc_brownian(data, n_future, n_sims, use_log) if not time_res or time_res == "1D": future_index = get_next_stock_market_days( data.index[-1], n_next_days=n_future) # type: ignore else: future_index = pd.date_range(data.index[-1], periods=n_future + 1, freq=time_res)[1:] # type: ignore dateFmt = mdates.DateFormatter("%m/%d/%Y") fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI) ax[0].plot(data) ax[0].plot(future_index, predicted_values, alpha=0.3) ax[0].set_title("Data Predictions") ax[0].xaxis.set_major_formatter(dateFmt) ax[0].tick_params(axis="x", labelrotation=45) ax[0].grid("on") sns.histplot(predicted_values[-1, :], ax=ax[1], kde=True) ax[1].set_xlabel("Price") ax[1].axvline(x=data.values[-1], c="k", label="Last Value", lw=3, ls="-") # type: ignore ax[1].set_title(f"Distribution of final values after {n_future} steps.") ax[1].set_xlim(np.min(predicted_values[-1, :]), np.max(predicted_values[-1, :])) ax[1].grid("on") ax[1].legend() fig.tight_layout(pad=2) if gtff.USE_ION: plt.ion() plt.show() export_data(export, os.path.dirname(os.path.abspath(__file__)), "mc") print("")
def insider_activity(other_args: List[str], stock: DataFrame, ticker: str, start: str, interval: str): """Display insider activity Parameters ---------- other_args : List[str] argparse other args - ["-n", "10"] stock : DataFrame Due diligence stock dataframe ticker : str Due diligence ticker symbol start : str Start date of the stock data interval : str Stock data interval """ parser = argparse.ArgumentParser( add_help=False, prog="ins", description= """Prints insider activity over time [Source: Business Insider]""", ) parser.add_argument( "-n", "--num", action="store", dest="n_num", type=check_positive, default=10, help="number of latest insider activity.", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return url_market_business_insider = ( f"https://markets.businessinsider.com/stocks/{ticker.lower()}-stock" ) text_soup_market_business_insider = BeautifulSoup( requests.get(url_market_business_insider, headers={ "User-Agent": get_user_agent() }).text, "lxml", ) d_insider = dict() l_insider_vals = list() for idx, insider_val in enumerate( text_soup_market_business_insider.findAll( "td", {"class": "table__td text-center"})): # print(insider_val.text.strip()) l_insider_vals.append(insider_val.text.strip()) # Add value to dictionary if (idx + 1) % 6 == 0: # Check if we are still parsing insider trading activity if "/" not in l_insider_vals[0]: break d_insider[(idx + 1) // 6] = l_insider_vals l_insider_vals = list() df_insider = pd.DataFrame.from_dict( d_insider, orient="index", columns=[ "Date", "Shares Traded", "Shares Held", "Price", "Type", "Option" ], ) df_insider["Date"] = pd.to_datetime(df_insider["Date"]) df_insider = df_insider.set_index("Date") df_insider = df_insider.sort_index(ascending=True) if start: df_insider = df_insider[start:] # type: ignore _, ax = plt.subplots() if interval == "1440min": plt.plot(stock.index, stock["5. adjusted close"].values, lw=3) else: # Intraday plt.plot(stock.index, stock["4. close"].values, lw=3) plt.title(f"{ticker.upper()} (Time Series) and Price Target") plt.xlabel("Time") plt.ylabel("Share Price ($)") df_insider["Trade"] = df_insider.apply( lambda row: (1, -1)[row.Type == "Sell"] * float(row["Shares Traded"].replace( ",", "")), axis=1, ) plt.xlim(df_insider.index[0], stock.index[-1]) min_price, max_price = ax.get_ylim() price_range = max_price - min_price shares_range = (df_insider[df_insider["Type"] == "Buy"].groupby( by=["Date"]).sum()["Trade"].max() - df_insider[df_insider["Type"] == "Sell"].groupby( by=["Date"]).sum()["Trade"].min()) n_proportion = price_range / shares_range for ind in (df_insider[df_insider["Type"] == "Sell"].groupby( by=["Date"]).sum().index): if ind in stock.index: ind_dt = ind else: ind_dt = get_next_stock_market_days(ind, 1)[0] n_stock_price = 0 if interval == "1440min": n_stock_price = stock["5. adjusted close"][ind_dt] else: n_stock_price = stock["4. close"][ind_dt] plt.vlines( x=ind_dt, ymin=n_stock_price + n_proportion * float(df_insider[df_insider["Type"] == "Sell"].groupby( by=["Date"]).sum()["Trade"][ind]), ymax=n_stock_price, colors="red", ls="-", lw=5, ) for ind in (df_insider[df_insider["Type"] == "Buy"].groupby( by=["Date"]).sum().index): if ind in stock.index: ind_dt = ind else: ind_dt = get_next_stock_market_days(ind, 1)[0] n_stock_price = 0 if interval == "1440min": n_stock_price = stock["5. adjusted close"][ind_dt] else: n_stock_price = stock["4. close"][ind_dt] plt.vlines( x=ind_dt, ymin=n_stock_price, ymax=n_stock_price + n_proportion * float(df_insider[df_insider["Type"] == "Buy"].groupby( by=["Date"]).sum()["Trade"][ind]), colors="green", ls="-", lw=5, ) plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) if gtff.USE_ION: plt.ion() plt.show() l_names = list() for s_name in text_soup_market_business_insider.findAll( "a", {"onclick": "silentTrackPI()"}): l_names.append(s_name.text.strip()) df_insider["Insider"] = l_names print( df_insider.sort_index(ascending=False).head( n=ns_parser.n_num).to_string()) print("") except Exception as e: print(e) print("") return
def display_regression( dataset: str, values: Union[pd.Series, pd.DataFrame], poly_order: int, n_input: int, n_predict: int, n_jumps: int, s_end_date: str = "", export: str = "", time_res: str = "", ): """Display predications for regression models Parameters ---------- dataset : str Title for data values : Union[pd.Series, pd.DataFrame] Data to fit poly_order : int Order of polynomial to fit n_input : int Length of input sequence n_predict : int Length of prediction sequence n_jumps : int Number of jumps in data s_end_date : str, optional Start date for backtesting export : str, optional Format for exporting figures time_res : str Resolution for data, allowing for predicting outside of standard market days """ # BACKTESTING if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict) else: future_index = pd.date_range(s_end_date, periods=n_predict + 1, freq=time_res)[1:] df_future = values[future_index[0]:future_index[-1]] values = values[:s_end_date] # type: ignore l_predictions, _ = regression_model.get_regression_model( values, poly_order, n_input, n_predict, n_jumps) # Prediction data if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range(values.index[-1], periods=n_predict + 1, freq=time_res)[1:] df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) ax.plot(values.index, values, lw=2) # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction" ) else: ax.set_title( f"Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction" ) ax.set_xlim(values.index[0], l_pred_days[-1]) ax.set_xlabel("Time") ax.set_ylabel("Value") ax.grid(b=True, which="major", color="#666666", linestyle="-") ax.minorticks_on() ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) ax.plot(df_pred.index, df_pred, lw=2, c="tab:green") ax.axvspan(values.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() ax.vlines(values.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=1, c="tab:blue", linestyle="--", ) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() export_data(export, os.path.dirname(os.path.abspath(__file__)), "regression") console.print("") # BACKTESTING if s_end_date: fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI) ax0 = ax[0] ax0.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax0.plot(df_pred.index, df_pred, lw=2, c="green") ax0.scatter(df_future.index, df_future, c="tab:blue", lw=3) ax0.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=2, c="tab:blue", ls="--", ) ax0.scatter(df_pred.index, df_pred, c="green", lw=3) ax0.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) ax0.set_title("BACKTESTING: Real data vs Prediction") ax0.set_xlim(values.index[-1], df_pred.index[-1]) ax0.set_xticks([values.index[-1], df_pred.index[-1]]) ax0.set_ylabel("Value") ax0.grid(b=True, which="major", color="#666666", linestyle="-") ax0.minorticks_on() ax0.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax0.legend(["Real data", "Prediction data"]) ax0.set_xticks([]) ax1 = ax[1] ax1.axhline(y=0, color="k", linestyle="--", linewidth=2) ax1.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, lw=2, c="red", ) ax1.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, c="red", lw=5, ) ax1.set_title( "BACKTESTING: Error between Real data and Prediction [%]") ax1.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], lw=2, ls="--", c="red", ) ax1.set_xlim(values.index[-1], df_pred.index[-1]) ax1.set_xticks([values.index[-1], df_pred.index[-1]]) ax1.set_xlabel("Time") ax1.set_ylabel("Prediction Error (%)") ax1.grid(b=True, which="major", color="#666666", linestyle="-") ax1.minorticks_on() ax1.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax1.legend(["Real data", "Prediction data"]) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if gtff.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) console.print("")
def display_exponential_smoothing( ticker: str, values: Union[pd.DataFrame, pd.Series], n_predict: int, trend: str = "N", seasonal: str = "N", seasonal_periods: int = 5, s_end_date: str = "", export: str = "", time_res: str = "", ): """Perform exponential smoothing Parameters ---------- ticker : str Dataset being smoothed values : Union[pd.DataFrame, pd.Series] Raw data n_predict : int Days to predict trend : str, optional Trend variable, by default "N" seasonal : str, optional Seasonal variable, by default "N" seasonal_periods : int, optional Number of seasonal periods, by default 5 s_end_date : str, optional End date for backtesting, by default "" export : str, optional Format to export data, by default "" time_res : str Resolution for data, allowing for predicting outside of standard market days """ if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict) else: future_index = pd.date_range(s_end_date, periods=n_predict + 1, freq=time_res)[1:] if future_index[-1] > datetime.datetime.now(): console.print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0]:future_index[-1]] values = values[:s_end_date] # type: ignore # Get ETS model model, title, forecast = ets_model.get_exponential_smoothing_model( values, trend, seasonal, seasonal_periods, n_predict) if not forecast: console.print("No forecast made. Model did not converge.\n") return if np.isnan(forecast).any(): console.print("Model predicted NaN values. Runtime Error.\n") return if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range(values.index[-1], periods=n_predict + 1, freq=time_res)[1:] df_pred = pd.Series(forecast, index=l_pred_days, name="Price") console.print(f"\n{title}") console.print("\nFit model parameters:") for key, value in model.params.items(): console.print(f"{key} {' '*(18-len(key))}: {value}") console.print("\nAssess fit model:") console.print(f"AIC: {round(model.aic, 2)}") console.print(f"BIC: {round(model.bic, 2)}") console.print(f"SSE: {round(model.sse, 2)}\n") # Plotting fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) ax.plot(values.index, values.values, lw=2) # BACKTESTING if s_end_date: ax.set_title(f"BACKTESTING: {title} on {ticker}") else: ax.set_title(f"{title} on {ticker}") ax.set_xlim( values.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1], ) ax.set_xlabel("Time") ax.set_ylabel("Share Price ($)") ax.grid(b=True, which="major", color="#666666", linestyle="-") ax.minorticks_on() ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) ax.plot(df_pred.index, df_pred, lw=2, c="tab:green") ax.axvspan( values.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2, ) _, _, ymin, ymax = plt.axis() ax.vlines( values.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k", ) dateFmt = mdates.DateFormatter("%m/%d/%Y") ax.xaxis.set_major_formatter(dateFmt) ax.tick_params(axis="x", labelrotation=45) # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() fig.tight_layout() plt.show() # BACKTESTING if s_end_date: dateFmt = mdates.DateFormatter("%m-%d") fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI) ax0 = ax[0] ax0.plot( df_future.index, df_future, lw=2, c="tab:blue", ls="--", ) ax0.plot(df_pred.index, df_pred, lw=2, c="green") ax0.scatter( df_future.index, df_future, c="tab:blue", lw=3, ) ax0.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=2, c="tab:blue", ls="--", ) ax0.scatter(df_pred.index, df_pred, c="green", lw=3) ax0.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) ax0.set_title("BACKTESTING: Prices") ax0.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax0.set_ylabel("Share Price ($)") ax0.grid(b=True, which="major", color="#666666", linestyle="-") ax0.legend(["Real data", "Prediction data"]) ax1 = ax[1] ax1.axhline(y=0, color="k", linestyle="--", linewidth=2) ax1.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, lw=2, c="red", ) ax1.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, c="red", lw=5, ) ax1.set_title("BACKTESTING: % Error") ax1.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], lw=2, ls="--", c="red", ) ax1.set_xlim( values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) ax1.set_xlabel("Time") ax1.set_ylabel("Prediction Error (%)") ax1.grid(b=True, which="major", color="#666666", linestyle="-") ax1.legend(["Real data", "Prediction data"]) ax0.xaxis.set_major_formatter(dateFmt) ax0.tick_params(axis="x", labelrotation=45) ax1.xaxis.set_major_formatter(dateFmt) ax1.tick_params(axis="x", labelrotation=45) if gtff.USE_ION: plt.ion() fig.tight_layout() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if gtff.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "ets") console.print("")
def display_regression( dataset: str, values: Union[pd.Series, pd.DataFrame], poly_order: int, n_input: int, n_predict: int, n_jumps: int, s_end_date: str = "", export: str = "", time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """Display predications for regression models Parameters ---------- dataset : str Title for data values : Union[pd.Series, pd.DataFrame] Data to fit poly_order : int Order of polynomial to fit n_input : int Length of input sequence n_predict : int Length of prediction sequence n_jumps : int Number of jumps in data s_end_date : str, optional Start date for backtesting export : str, optional Format for exporting figures time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (1 axis is expected in the list), by default None """ # BACKTESTING if s_end_date: if not time_res: future_index = get_next_stock_market_days( last_stock_day=s_end_date, n_next_days=n_predict ) else: future_index = pd.date_range( s_end_date, periods=n_predict + 1, freq=time_res )[1:] df_future = values[future_index[0] : future_index[-1]] # noqa: E203 values = values[:s_end_date] # type: ignore l_predictions, _ = regression_model.get_regression_model( list(values.values), poly_order, n_input, n_predict, n_jumps ) # Prediction data if not time_res: l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) else: l_pred_days = pd.date_range( values.index[-1], periods=n_predict + 1, freq=time_res )[1:] df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting # This plot has 1 axes if external_axes is None: _, ax1 = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) else: if (not s_end_date and len(external_axes) != 1) or ( s_end_date and len(external_axes) != 3 ): logger.error("Expected list of 1 axis or 3 axes when backtesting.") console.print( "[red]Expected list of 1 axis or 3 axes when backtesting./n[/red]" ) return ax1 = external_axes[0] ax1.plot(values.index, values) # BACKTESTING if s_end_date: ax1.set_title( f"BACKTESTING: Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction", fontsize=12, ) else: ax1.set_title( f"Regression (polynomial {poly_order}) on {dataset} - {n_predict} step prediction" ) ax1.set_xlim(values.index[0], l_pred_days[-1]) ax1.set_ylabel("Value") ax1.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.down_color, linestyle="--", ) ax1.plot(df_pred.index, df_pred, color=theme.down_color) ax1.axvspan(values.index[-1], df_pred.index[-1], alpha=0.2) _, _, ymin, ymax = plt.axis() ax1.vlines(values.index[-1], ymin, ymax, linestyle="--") # BACKTESTING if s_end_date: ax1.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax1.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) theme.style_primary_axis(ax1) if external_axes is None: theme.visualize_output() export_data(export, os.path.dirname(os.path.abspath(__file__)), "regression") console.print("") # BACKTESTING if s_end_date: # This plot has 1 axes if external_axes is None: _, axes = plt.subplots( 2, 1, sharex=True, figsize=plot_autoscale(), dpi=PLOT_DPI ) (ax2, ax3) = axes else: if len(external_axes) != 3: logger.error("Expected list of three axis items.") console.print("[red]Expected list of 3 axis items./n[/red]") return (_, ax2, ax3) = external_axes ax2.plot( df_future.index, df_future, color=theme.up_color, linestyle="--", ) ax2.plot(df_pred.index, df_pred, color=theme.down_color, marker="o") ax2.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], color=theme.up_color, linestyle="--", ) ax2.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], color=theme.down_color, linestyle="--", marker="o", ) ax2.set_title("BACKTESTING: Real data vs Prediction", fontsize=12) ax2.set_xlim(values.index[-1], df_pred.index[-1]) ax2.set_ylabel("Value") ax2.legend(["Real data", "Prediction data"]) ax3.axhline(y=0, linestyle="--", color=theme.up_color) ax3.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, color=theme.down_color, marker="o", ) ax3.set_title( "BACKTESTING: Error between Real data and Prediction [%]", fontsize=12 ) ax3.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], linestyle="--", color=theme.down_color, ) ax3.set_xlim(values.index[-1], df_pred.index[-1]) ax3.set_xlabel("Time") ax3.set_ylabel("Error (%)") ax3.legend(["Real data", "Prediction data"]) theme.style_primary_axis(ax2) theme.style_primary_axis(ax3) if external_axes is None: theme.visualize_output() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future if rich_config.USE_COLOR: patch_pandas_text_adjustment() console.print("Time Real [$] x Prediction [$]") console.print( df_pred.apply( lambda_price_prediction_backtesting_color, axis=1 ).to_string() ) else: console.print(df_pred[["Real", "Prediction"]].round(2).to_string()) console.print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) console.print("")
def call_regression(self, other_args: List[str]): """Process linear command""" parser = argparse.ArgumentParser( add_help=False, prog="regression", description=""" Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) parser.add_argument( "-p", "--polynomial", action="store", dest="n_polynomial", type=check_positive, default=1, help="polynomial associated with regression.", ) if ( other_args and "-h" not in other_args and ("-p" not in other_args or "--polynomial" not in other_args) ): other_args.insert(0, "-p") ns_parser = parse_known_args_and_warn( parser, other_args, export_allowed=EXPORT_ONLY_FIGURES_ALLOWED ) if ns_parser: # BACKTESTING CHECK if ns_parser.s_end_date: if ns_parser.s_end_date < self.data.index[0]: console.print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=self.data.index[0], n_next_days=5 + ns_parser.n_days, )[-1]: console.print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) regression_view.display_regression( dataset=self.coin, values=self.data[self.target], poly_order=ns_parser.n_polynomial, n_input=ns_parser.n_inputs, n_predict=ns_parser.n_days, n_jumps=ns_parser.n_jumps, s_end_date=ns_parser.s_end_date, export=ns_parser.export, time_res=self.resolution, )
def k_nearest_neighbors(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Train KNN model Parameters ---------- other_args: List[str] List of argparse arguments s_ticker: str Ticker df_stock: pd.DataFrame Dataframe of stock prices Returns ------- """ parser = argparse.ArgumentParser( add_help=False, prog="knn", description=""" K nearest neighbors is a simple algorithm that stores all available cases and predict the numerical target based on a similarity measure (e.g. distance functions). """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use as input for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-n", "--neighbors", action="store", dest="n_neighbors", type=check_positive, default=20, help="number of neighbors to use on the algorithm.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select for testing", ) parser.add_argument( "-t", "--test_size", default=0.2, dest="valid_split", type=float, help="Percentage of data to validate in sample", ) parser.add_argument( "-p", "--pp", action="store", dest="s_preprocessing", default="none", choices=["normalization", "standardization", "minmax", "none"], help="pre-processing data.", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(df_stock["5. adjusted close"], ns_parser) if is_error: print("Error preparing data") return print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation") future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=ns_parser.n_days) # Machine Learning model knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors) knn.fit( X_train.reshape(X_train.shape[0], X_train.shape[1]), y_train.reshape(y_train.shape[0], y_train.shape[1]), ) preds = knn.predict(X_valid.reshape(X_valid.shape[0], X_valid.shape[1])) forecast_data = knn.predict(forecast_data_input.reshape(1, -1)) forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates) print_pretty_prediction(forecast_data_df[0], df_stock["5. adjusted close"].values[-1]) plot_data_predictions( df_stock, preds, y_valid, y_dates_valid, scaler, f"KNN Model with {ns_parser.n_neighbors} Neighbors on {s_ticker}", forecast_data_df, 1, ) except Exception as e: print(e) print("")
def conv1d(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Train a 1D Convolutional Neural Net (1D CNN) Parameters ---------- other_args:List[str] Argparse arguments s_ticker: str Stock ticker df_stock: pd.DataFrame Dataframe of stock prices """ try: ns_parser = parse_args( prog="conv1d", description="""1D CNN.""", other_args=other_args, ) if not ns_parser: return ( X_train, X_valid, y_train, y_valid, _, _, _, y_dates_valid, forecast_data_input, dates_forecast_input, scaler, is_error, ) = prepare_scale_train_valid_test(df_stock["5. adjusted close"], ns_parser) if is_error: return print( f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}. Using {X_valid.shape[0]} sequences " f" of length {X_valid.shape[1]} for validation. Model will run {ns_parser.n_loops} loops" ) future_dates = get_next_stock_market_days(dates_forecast_input[-1], n_next_days=ns_parser.n_days) preds = np.zeros( (ns_parser.n_loops, X_valid.shape[0], ns_parser.n_days)) forecast_data = np.zeros((ns_parser.n_loops, ns_parser.n_days)) for i in range(ns_parser.n_loops): # Build Neural Network model model = build_neural_network_model( cfg_nn_models.Convolutional, ns_parser.n_inputs, ns_parser.n_days, ) model.compile( optimizer=optimizers[cfg_nn_models.Optimizer](lr=ns_parser.lr), loss=cfg_nn_models.Loss, ) model.fit( X_train.reshape(X_train.shape[0], X_train.shape[1], 1), y_train, epochs=ns_parser.n_epochs, verbose=True, batch_size=ns_parser.n_batch_size, validation_data=( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1), y_valid, ), callbacks=[es], ) preds[i] = model.predict( X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1)).reshape(X_valid.shape[0], ns_parser.n_days) forecast_data[i] = forecast(forecast_data_input, future_dates, model, scaler).values.flat forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates) if ns_parser.n_loops > 1: forecast_data_df["Median"] = forecast_data_df.median(axis=1) print_pretty_prediction(forecast_data_df["Median"], df_stock["5. adjusted close"].values[-1]) else: print_pretty_prediction(forecast_data_df[0], df_stock["5. adjusted close"].values[-1]) plot_data_predictions( df_stock, np.median(preds, axis=0), y_valid, y_dates_valid, scaler, f"Conv1D Model on {s_ticker}", forecast_data_df, ns_parser.n_loops, ) print("") except Exception as e: print(e) traceback.print_exc() print("") finally: restore_env()
def display_arima( dataset: str, values: Union[pd.DataFrame, pd.Series], arima_order: str, n_predict: int, seasonal: bool, ic: str, results: bool, s_end_date: str = "", export: str = "", ): """View fit ARIMA model Parameters ---------- dataset : str String indicating dataset (for plot title) values : Union[pd.DataFrame, pd.Series] Data to fit arima_order : str String of ARIMA params in form "p,q,d" n_predict : int Days to predict seasonal : bool Flag to use seasonal model ic : str Information Criteria for model evaluation results : bool Flag to display model summary s_end_date : str, optional Specified end date for backtesting comparisons export : str, optional Format to export image """ if arima_order: t_order = tuple(int(ord) for ord in arima_order.split(",")) if s_end_date: future_index = get_next_stock_market_days(last_stock_day=s_end_date, n_next_days=n_predict) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = values[future_index[0]:future_index[-1]] values = values[:s_end_date] # type: ignore l_predictions, model = arima_model.get_arima_model(values, arima_order, n_predict, seasonal, ic) # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=values.index[-1], n_next_days=n_predict, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") if results: print(model.summary()) print("") # Plotting fig, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) ax.plot(values.index, values, lw=2) # pylint:disable=no-member if arima_order: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {str(t_order)} on {dataset} - {n_predict} days prediction" ) else: ax.set_title( f"ARIMA {str(t_order)} on {dataset} - {n_predict} days prediction" ) else: # BACKTESTING if s_end_date: ax.set_title( f"BACKTESTING: ARIMA {model.order} on {dataset} - {n_predict} days prediction" ) else: plt.title( f"ARIMA {model.order} on {dataset} - {n_predict} days prediction" ) ax.set_xlim(values.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) ax.set_xlabel("Time") ax.set_ylabel("Value") ax.grid(b=True, which="major", color="#666666", linestyle="-") ax.minorticks_on() ax.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) ax.plot(df_pred.index, df_pred, lw=2, c="tab:green") ax.axvspan(values.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() ax.vlines(values.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if s_end_date: ax.plot( df_future.index, df_future.values, lw=2, c="tab:blue", ls="--", ) plt.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=1, c="tab:blue", linestyle="--", ) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if s_end_date: fig, ax = plt.subplots(1, 2, figsize=plot_autoscale(), dpi=PLOT_DPI) ax0 = ax[0] ax0.plot( df_future.index, df_future.values, lw=2, c="tab:blue", ls="--", ) ax0.plot(df_pred.index, df_pred, lw=2, c="green") ax0.scatter(df_future.index, df_future, c="tab:blue", lw=3) ax0.plot( [values.index[-1], df_future.index[0]], [ values.values[-1], df_future.values[0], ], lw=2, c="tab:blue", ls="--", ) ax0.scatter(df_pred.index, df_pred, c="green", lw=3) ax0.plot( [values.index[-1], df_pred.index[0]], [values.values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) ax0.set_title("BACKTESTING: Real data Prediction") ax0.set_xlim(values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) ax0.set_xticks( [values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)]) ax0.set_ylabel("Value") ax0.grid(b=True, which="major", color="#666666", linestyle="-") ax0.minorticks_on() ax0.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax0.legend(["Real data", "Prediction data"]) ax0.set_xticks([]) ax1 = ax[1] ax1.axhline(y=0, color="k", linestyle="--", linewidth=2) ax1.plot( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, lw=2, c="red", ) ax1.scatter( df_future.index, 100 * (df_pred.values - df_future.values) / df_future.values, c="red", lw=5, ) ax1.set_title( "BACKTESTING: Error between Real data and Prediction [%]") ax1.plot( [values.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future.values[0]) / df_future.values[0], ], lw=2, ls="--", c="red", ) ax1.set_xlim(values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) ax1.set_xticks( [values.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)]) ax1.set_xlabel("Time") ax1.set_ylabel("Prediction Error (%)") ax1.grid(b=True, which="major", color="#666666", linestyle="-") ax1.minorticks_on() ax1.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) ax1.legend(["Real data", "Prediction data"]) fig.tight_layout() if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future.values if gtff.USE_COLOR: if gtff.USE_TABULATE_DF: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Prediction"].astype(float) df_pred["Dif"] = (100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real) print( tabulate( df_pred, headers=[ "Date", "Predicted", "Actual", "% Difference" ], showindex=True, floatfmt=".2f", tablefmt="fancy_grid", )) else: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: if gtff.USE_TABULATE_DF: df_pred["Real"] = df_pred["Real"].astype(float) df_pred["Prediction"] = df_pred["Predicted"].astype(float) df_pred["Dif"] = (100 * (df_pred.Prediction - df_pred.Real) / df_pred.Real) print( tabulate( df_pred, headers=[ "Date", "Predicted", "Actual", "% Difference" ], showindex=True, floatfmt=".2f", tablefmt="fancy_grid", )) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, values.values[-1]) export_data(export, os.path.dirname(os.path.abspath(__file__)), "arima") print("")
def display_mc_forecast( data: Union[pd.DataFrame, pd.Series], n_future: int, n_sims: int, use_log=True, fig_title: str = "", export: str = "", time_res: str = "", external_axes: Optional[List[plt.Axes]] = None, ): """Display monte carlo forecasting Parameters ---------- data : Union[pd.Series, np.array] Data to forecast n_future : int Number of days to forecast n_sims : int Number of simulations to run use_log : bool, optional Flag to use lognormal, by default True fig_title : str Figure title export: str Format to export data time_res : str Resolution for data, allowing for predicting outside of standard market days external_axes : Optional[List[plt.Axes]], optional External axes (2 axis is expected in the list), by default None """ predicted_values = mc_model.get_mc_brownian(data, n_future, n_sims, use_log) if not time_res or time_res == "1D": future_index = get_next_stock_market_days(data.index[-1], n_next_days=n_future) # type: ignore else: future_index = pd.date_range(data.index[-1], periods=n_future + 1, freq=time_res)[1:] # type: ignore # This plot has 1 axis if external_axes is None: _, (ax1, ax2) = plt.subplots(2, 1, figsize=plot_autoscale(), dpi=PLOT_DPI) else: if len(external_axes) != 2: console.print("[red]Expected list of one axis item./n[/red]") return (ax1, ax2) = external_axes ax1.plot(data) ax1.plot(future_index, predicted_values, alpha=0.3) start_timestamp = data.index[0] end_timestamp = future_index[-1] ax1.set_xlim(start_timestamp, end_timestamp) ax1.set_title(f"{fig_title} Data Predictions") sns.histplot(predicted_values[-1, :], ax=ax2, kde=True) ax2.set_xlabel("Final Value") ax2.axvline( x=data.values[-1], color=theme.down_color, label="Last Value", linestyle="-" ) ax2.set_title(f"Distribution of final values after {n_future} steps.") ax2.set_xlim(np.min(predicted_values[-1, :]), np.max(predicted_values[-1, :])) ax2.legend() theme.style_primary_axis(ax1) theme.style_primary_axis(ax2) if external_axes is None: theme.visualize_output() export_data(export, os.path.dirname(os.path.abspath(__file__)), "mc")
def simple_moving_average(l_args, s_ticker, df_stock): parser = argparse.ArgumentParser( prog="sma", description=""" Moving Averages are used to smooth the data in an array to help eliminate noise and identify trends. The Simple Moving Average is literally the simplest form of a moving average. Each output value is the average of the previous n values. In a Simple Moving Average, each value in the time period carries equal weight, and values outside of the time period are not included in the average. This makes it less responsive to recent changes in the data, which can be useful for filtering out those changes. """, ) parser.add_argument( "-l", "--length", action="store", dest="n_length", type=check_positive, default=20, help="length of SMA window.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) try: ns_parser = parse_known_args_and_warn(parser, l_args) # Prediction data l_predictions = list() for pred_day in range(ns_parser.n_days): if pred_day < ns_parser.n_length: l_ma_stock = df_stock["5. adjusted close"].values[ -ns_parser.n_length + pred_day:] else: l_ma_stock = list() l_predictions.append(np.mean(np.append(l_ma_stock, l_predictions))) l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["5. adjusted close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.plot(df_stock.index, df_stock["5. adjusted close"], lw=2) plt.title( f"{ns_parser.n_length} Moving Average on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) df_ma = df_stock["5. adjusted close"].rolling( window=ns_parser.n_length).mean() plt.plot(df_ma.index, df_ma, lw=2, linestyle="--", c="tab:orange") plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["5. adjusted close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") plt.ion() plt.show() # Print prediction data print_pretty_prediction(df_pred, df_stock["5. adjusted close"].values[-1]) print("") except Exception as e: print(e) print("")