def get_sharpe_ratio(symbol, start_date=config.start_date, end_date=config.end_date): """Returns the sharpe ratio of the given symbol Parameters: symbol : str start_date : date, optional end_date : date, optional Returns: float The sharpe ratio of the given symbol """ if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=False): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] # return (df["Close"].add(df["Dividends"].cumsum()) / df["Close"].add(df["Dividends"].cumsum()).shift(1)).mean() / ((df["Close"].add(df["Dividends"].cumsum()) / df["Close"].add(df["Dividends"].cumsum()).shift(1)).std() * np.sqrt(252)) return df["Close"].add( df["Dividends"].cumsum()).pct_change().mean() / df["Close"].add( df["Dividends"].cumsum()).pct_change().std() * np.sqrt(252)
def get_performance(symbol, start_date=config.start_date, end_date=config.end_date): """Returns the overall performance of the given symbol Parameters: symbol : str start_date : date, optional end_date : date, optional Returns: float The overall performance of the given symbol """ if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=False): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] return df["Close"].add(df["Dividends"].cumsum())[-1] / df["Close"][0]
def get_cgar(symbol, start_date=config.start_date, end_date=config.end_date): """Returns the compound annual growth rate of the given symbol Parameters: symbol : str start_date : date, optional end_date : date, optional Returns: float The compound annual growth rate of the given symbol """ if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=False): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] # Formula normally has a -1 at the end return (df["Close"].add(df["Dividends"].cumsum())[-1] / df["Close"][0])**(1 / ((df.index[-1] - df.index[0]).days) / 252)
def get_price_on_date(self, symbol, date, time="Close"): """Gets the price of the given symbol on the given date Parameters: symbol : str date : datetime time : str Which column to use to determine price. Valid times are "Open" and "Close" Returns: float The price of the given symbol on the given date """ start_time = timer() if symbol in self.price_files: df = self.price_files[symbol] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=False): prices.download_data_from_yahoo(symbol, start_date=self.start_date, end_date=self.end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[self.start_date:self.end_date] self.price_files[symbol] = df price = df.loc[date][time] if date in df.index else self.get_price_on_date(symbol, utils.add_business_days(date, -1), time=time) self.times[get_price_time] = self.times[get_price_time] + timer() - start_time return price
def get_num_conseq_increase_decrease(symbol, refresh=False, start_date=config.start_date, end_date=config.end_date): if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] return { "ConseqIncrease": ((df["Close"].diff() > 0) & (df["Close"].diff().shift(1) > 0)).sum(), "ConseqDecrease": ((df["Close"].diff() < 0) & (df["Close"].diff().shift(1) < 0)).sum(), "Reversal": ((df["Close"].diff() < 0) & (df["Close"].diff().shift(1) > 0)).sum() + ((df["Close"].diff() > 0) & (df["Close"].diff().shift(1) < 0)).sum() }
def get_dividends(self, symbol, date): """Adds dividends to the portfolio for the given symbol on the given date Parameters: symbol : str date : datetime Returns: float The dividends added """ start_time = timer() if symbol in self.price_files: df = self.price_files[symbol] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=False): prices.download_data_from_yahoo(symbol, start_date=self.start_date, end_date=self.end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[self.start_date:self.end_date] self.price_files[symbol] = df dividend = self.portfolio[symbol] * df.loc[date]["Dividends"] if date in df.index and "Dividends" in df.columns else 0 if dividend != 0: self.cash += dividend self.total_dividends += dividend self.log.loc[date][actions_column_name] = self.log.loc[date][actions_column_name] + "Dividend: {} {} Shares totaling {:.2f} ".format(symbol, self.portfolio[symbol], dividend) # TODO: move this into update_winners_losers self.cost_basis[symbol] -= df.loc[date]["Dividends"] self.times[get_dividend_time] = self.times[get_dividend_time] + timer() - start_time return dividend
def get_annualized_performance(symbol, start_date=config.start_date, end_date=config.end_date): """Returns the annualized performance of the given symbol Parameters: symbol : str start_date : date, optional end_date : date, optional Returns: float The annualized performance of the given symbol """ if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=False): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] # Not sure about this formula, seems weird. Formula normally has a -1 at the end # return (1 + (df["Close"].add(df["Dividends"].cumsum())[-1] / df["Close"][0])) ** (365 / (df.index[-1] - df.index[0]).days) # exponent equivalent to (252 / len(df.index)) return (df["Close"].add(df["Dividends"].cumsum())[-1] / df["Close"][0]) / (len(df.index) / 252) + 1
def rsi(symbol, period=default_period, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the relative strength indexe for the given symbol, saves this data in a .csv file, and returns this data The RSI is a leading momentum indicator. Parameters: symbol : str period : int, optional refresh : bool, optional start_date : date, optional end_date : date, optional Returns: dataframe A dataframe containing the relative strength index for the given symbol """ if not utils.refresh(utils.get_file_path( config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if ("RSI" + str(period)) not in df.columns: delta = df["Close"].diff() up, down = delta.copy(), delta.copy() up[up < 0], down[down > 0] = 0, 0 # df["RSI" + str(period)] = 100 - (100 / (1 + up.rolling(period).mean() / down.abs().rolling(period).mean())) # sma rsi df["RSI" + str(period)] = 100 - ( 100 / (1 + up.ewm(span=period, min_periods=period).mean() / down.abs().ewm(span=period, min_periods=period).mean()) ) # ema rsi utils.debug(df["RSI" + str(period)]) df.to_csv( utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol)) return df["RSI" + str(period)]
def ema(symbol, period, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the exponential moving agerage for the given symbol, saves this data in a .csv file, and returns this data The EMA is a lagging trend indicator. Parameters: symbol : str period : int refresh : bool, optional start_date : date, optional end_date : date, optional Returns: dataframe A dataframe containing the exponential moving agerage for the given symbol """ if not utils.refresh(utils.get_file_path( config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if ("EMA" + str(period)) not in df.columns: df["EMA" + str(period)] = df["Close"].ewm(span=period, min_periods=period, adjust=False).mean() utils.debug(df["EMA" + str(period)]) df.to_csv( utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol)) return df["EMA" + str(period)]
def get_beta(symbol_a, symbol_b, start_date=config.start_date, end_date=config.end_date): """Returns the beta of symbol_a to symbol_b Parameters: symbol_a : str symbol_b : str start_date : date, optional end_date : date, optional Returns: float The beta of symbol_a to symbol_b """ if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol_a), refresh=False): prices.download_data_from_yahoo(symbol_a, start_date=start_date, end_date=end_date) df_a = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol_a), index_col="Date", parse_dates=["Date"])[start_date:end_date] a = df_a["Close"].add(df_a["Dividends"].cumsum()).pct_change()[1:] if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol_b), refresh=False): prices.download_data_from_yahoo(symbol_b, start_date=start_date, end_date=end_date) df_b = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol_b), index_col="Date", parse_dates=["Date"])[start_date:end_date] b = df_b["Close"].add(df_b["Dividends"].cumsum()).pct_change()[1:] # rolling beta # df["Beta"] = pd.rolling_cov(df_b["Close"].add(df_a["Dividends"].cumsum()), df_b["Close"].add(df_b["Dividends"].cumsum()), window=window) / pd.rolling_var(df_b["Close"].add(df_b["Dividends"].cumsum()), window=window) beta = np.cov(a, b)[0][1] / np.var( b) # Alternately, np.var(b) -> np.cov(a, b)[1][1] return beta
def macd(symbol, period=default_periods, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the exponential moving agerage for the given symbol, saves this data in a .csv file, and returns this data The EMA is a lagging trend indicator. Parameters: symbol : str period : int\ refresh : bool, optional start_date : date, optional end_date : date, optional Returns: dataframe A dataframe containing the exponential moving agerage for the given symbol """ if not utils.refresh(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if len(period) != 3: raise ValueError("MACD requires 3 periods") macd_column_name = "MACD" + str(period[1]) + "-" + str(period[2]) signal_column_name = "MACD" + str(period[0]) if macd_column_name not in df.columns or signal_column_name not in df.columns: if macd_column_name not in df.columns: ''' # Intermediate steps, can uncomment this part if I want to keep the steps slow_column_name = "EMA" + str(period[1]) if slow_column_name not in df.columns: df[slow_column_name] = df["Close"].ewm(span=period[1], min_periods=period[1], adjust=False).mean() fast_column_name = "EMA" + str(period[2]) if fast_column_name not in df.columns: df[fast_column_name] = df["Close"].ewm(span=period[2], min_periods=period[2], adjust=False).mean() df[macd_column_name] = df[slow_column_name] - df[fast_column_name] ''' df[macd_column_name] = df["Close"].ewm(span=period[1], min_periods=period[1], adjust=False).mean() - df["Close"].ewm(span=period[2], min_periods=period[2], adjust=False).mean() utils.debug(df[macd_column_name]) if signal_column_name not in df.columns: df[signal_column_name] = df[macd_column_name].ewm(span=period[0], min_periods=period[0], adjust=False).mean() utils.debug(df[signal_column_name]) df.to_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol)) return df[[macd_column_name, signal_column_name]]
def plot_macd(symbol, period=default_periods, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the macd for the given symbol, saves this data in a .csv file, and plots this data The MACD is a lagging trend indicator. Parameters: symbol : str period : int or list of int, optional Must contain 3 values. First value is signal line, second is fast line, third is slow line.\ refresh : bool, optional start_date : date, optional end_date : date, optional Returns: figure, axes A figure and axes containing the macd for the given symbol """ if not utils.refresh(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if len(period) != 3: raise ValueError("MACD requires 3 periods") if len(df) < period[-1]: raise ta.InsufficientDataException("Not enough data to compute a period length of " + str(period)) fig, ax = plt.subplots(2, figsize=config.figsize) ax[0].plot(df.index, df["Close"], label="Price") utils.prettify_ax(ax[0], title=symbol + "Price", start_date=start_date, end_date=end_date) macd_column_name = "MACD" + str(period[1]) + "-" + str(period[2]) signal_column_name = "MACD" + str(period[0]) if macd_column_name not in df.columns or signal_column_name not in df.columns: df = df.join(macd(symbol, period, refresh=False, start_date=start_date, end_date=end_date)) # if len(df) > period[0] and len(df) > period[1] and len(df) > period[2]: # to prevent AttributeError when the column is all None ax[1].plot(df.index, df[macd_column_name], label="MACD") ax[1].plot(df.index, df[signal_column_name], label="Signal") ax[1].plot(df.index, (df[macd_column_name] - df[signal_column_name]), label="Histogram") # Can't overlay a histogram with line plots so the histogram has to also be a line plot # ax[1].bar(df.index, np.histogram(np.isfinite(df[signal_column_name] - df[macd_column_name])), normed=True, alpha=config.alpha) # ValueError: incompatible sizes: argument 'height' must be length 3876 or scalar utils.prettify_ax(ax[1], title=symbol + "MACD", center=True, start_date=start_date, end_date=end_date) utils.prettify_fig(fig) fig.savefig(utils.get_file_path(config.ta_graphs_path, get_signal_name(period) + graph_filename, symbol=symbol)) utils.debug(fig) return fig, ax
def generate_signals(symbol=default_symbols, period=default_periods, refresh=False, start_date=config.start_date, end_date=config.end_date): short_vol_symbol = symbol[0] long_vol_symbol = symbol[1] volforecast(period=period, refresh=refresh, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=""), index_col="Date", parse_dates=["Date"])[start_date:end_date] if not utils.refresh(utils.get_file_path(config.ta_data_path, table_filename, symbol=short_vol_symbol), refresh=refresh): short_vol = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=short_vol_symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=short_vol_symbol), refresh=refresh): prices.download_data_from_yahoo(short_vol_symbol, start_date=start_date, end_date=end_date) short_vol = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=short_vol_symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if not utils.refresh(utils.get_file_path(config.ta_data_path, table_filename, symbol=long_vol_symbol), refresh=refresh): long_vol = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=long_vol_symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=long_vol_symbol), refresh=refresh): prices.download_data_from_yahoo(long_vol_symbol, start_date=start_date, end_date=end_date) long_vol = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=long_vol_symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] signal_column_name = get_signal_name() if signal_column_name not in short_vol.columns or signal_column_name not in long_vol.columns: short_vol_conditions = [ ((df[signal_column_name].shift(1) > 0) & (df[signal_column_name] < 0)), # near term volatility crossed below expected volatility, short VIX ((df[signal_column_name].shift(1) < 0) & (df[signal_column_name] > 0)), # near term volatility crossed above expected volatility, long VIX False, False ] long_vol_conditions = [ ((df[signal_column_name].shift(1) < 0) & (df[signal_column_name] > 0)), # near term volatility crossed above expected volatility, long VIX ((df[signal_column_name].shift(1) > 0) & (df[signal_column_name] < 0)), # near term volatility crossed below expected volatility, short VIX False, False ] short_vol[signal_column_name] = np.select(short_vol_conditions, ta.signals, default=ta.default_signal) long_vol[signal_column_name] = np.select(long_vol_conditions, ta.signals, default=ta.default_signal) utils.debug(short_vol[signal_column_name]) utils.debug(long_vol[signal_column_name]) short_vol.to_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=short_vol_symbol)) long_vol.to_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=long_vol_symbol)) return short_vol[signal_column_name], long_vol[signal_column_name]
def volforecast(period=default_periods, refresh=False, start_date=config.start_date, end_date=config.end_date): # if start_date < datetime.date(2018, 3, 1): # raise ta.InsufficientDataException("UVXY and SVXY had their leveraged changes on Feb 27 2018, data before than will not apply now") # if start_date < datetime.date(2011, 11, 1): # raise ta.InsufficientDataException("UVXY and SVXY inception on Oct 7 2011") # if start_date < datetime.date(2009, 10, 1): # raise ta.InsufficientDataException("VIX3M inception on Sept 18 2009, VIX6M inception on Jan 3 2008") if not utils.refresh(utils.get_file_path(config.ta_data_path, table_filename, symbol=""), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=""), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=""), refresh=refresh): df = pd.DataFrame() # don't refresh any volatility indices, yahoo doesn't work for them if implied_vol_symbol in config.broken_symbols or not utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=implied_vol_symbol), refresh=refresh): iv = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=implied_vol_symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: prices.download_data_from_yahoo(implied_vol_symbol, start_date=start_date, end_date=end_date) iv = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=implied_vol_symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if not utils.refresh(utils.get_file_path(config.prices_data_path, table_filename, symbol=historical_vol_symbol), refresh=refresh): hv = pd.read_csv(utils.get_file_path(config.prices_data_path, table_filename, symbol=historical_vol_symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: prices.download_data_from_yahoo(historical_vol_symbol, start_date=start_date, end_date=end_date) hv = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=historical_vol_symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] hv = pd.DataFrame({"Close": np.log(hv["Close"] / hv["Close"].shift(1)).rolling(period[0]).std() * 100 * np.sqrt(252)}) ''' voldiff = pd.DataFrame({"VolForecast": (hv["Close"] - iv["Close"]).rolling(period[1]).mean(), "ImpliedVolatility": iv["Close"], "HistoricalVolatility": hv["Close"]}) ''' # this get_signal_name() column name doesn't incorporate period[0] and period[1] if get_signal_name() not in df.columns: df[get_signal_name()] = (hv["Close"] - iv["Close"]).rolling(period[1]).mean() if implied_vol_symbol + "SMA" + str(period[1]) not in df.columns: df[implied_vol_symbol + "SMA" + str(period[1])] = iv["Close"] if "HistoricalVolatility" + "SMA" + str(period[0]) not in df.columns: df["HistoricalVolatility" + "SMA" + str(period[0])] = hv["Close"] df.to_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol="")) return df[get_signal_name()]
def get_longest_conseq_increase_decrease(symbol, refresh=False, start_date=config.start_date, end_date=config.end_date): if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] df["Diff"] = df["Close"].diff() df["Shift"] = df["Diff"].shift() longest_counts = {"LongestConseqIncrease": 0, "LongestConseqDecrease": 0} increasing = False count = 0 for index, row in df.iterrows(): if row["Diff"] > 0 and row["Shift"] > 0 and increasing == True: count += 1 if count > longest_counts["LongestConseqIncrease"]: longest_counts["LongestConseqIncrease"] = count if row["Diff"] > 0 and row["Shift"] > 0 and increasing == False: increasing = True count = 1 elif row["Diff"] < 0 and row["Shift"] < 0 and increasing == False: count += 1 if count > longest_counts["LongestConseqDecrease"]: longest_counts["LongestConseqDecrease"] = count elif row["Diff"] < 0 and row["Shift"] < 0 and increasing == True: increasing = False count = 1 return longest_counts
def is_date_in_bounds(self, symbol, date): """Returns true if the date is out of bounds for the symbol, else false Parameters: symbol : str date : datetime Returns: bool Returns true if the date is out of bounds for the symbol, else false """ if symbol in self.price_files: df = self.price_files[symbol] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=False): prices.download_data_from_yahoo(symbol, start_date=self.start_date, end_date=self.end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[self.start_date:self.end_date] self.price_files[symbol] = df if df.index[0] <= date <= df.index[-1]: return True return False
def plot_volforecast(symbol=default_symbols, period=default_periods, refresh=False, start_date=config.start_date, end_date=config.end_date): short_vol_symbol = symbol[0] long_vol_symbol = symbol[1] if not utils.refresh(utils.get_file_path(config.ta_data_path, table_filename, symbol=""), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=""), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: volforecast(period=period, refresh=refresh, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=""), index_col="Date", parse_dates=["Date"])[start_date:end_date] if not utils.refresh(utils.get_file_path(config.prices_data_path, table_filename, symbol=short_vol_symbol), refresh=refresh): short_vol = pd.read_csv(utils.get_file_path(config.prices_data_path, table_filename, symbol=short_vol_symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: prices.download_data_from_yahoo(short_vol_symbol, start_date=start_date, end_date=end_date) short_vol = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=short_vol_symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if not utils.refresh(utils.get_file_path(config.prices_data_path, table_filename, symbol=long_vol_symbol), refresh=refresh): long_vol = pd.read_csv(utils.get_file_path(config.prices_data_path, table_filename, symbol=long_vol_symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: prices.download_data_from_yahoo(long_vol_symbol, start_date=start_date, end_date=end_date) long_vol = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=long_vol_symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] fig, ax = plt.subplots(3, figsize=config.figsize) ax[0].plot(df.index, df[get_signal_name()], label=get_signal_name()) ax[1].plot(df.index, short_vol["Close"], label=short_vol_symbol) ax[2].plot(df.index, long_vol["Close"], label=long_vol_symbol) utils.prettify_ax(ax[0], title=get_signal_name(), center=True, start_date=start_date, end_date=end_date) utils.prettify_ax(ax[1], title=short_vol_symbol, start_date=start_date, end_date=end_date) utils.prettify_ax(ax[2], title=long_vol_symbol, start_date=start_date, end_date=end_date) utils.prettify_fig(fig) fig.savefig(utils.get_file_path(config.ta_graphs_path, graph_filename, symbol=get_signal_name())) utils.debug(fig) return fig, ax
def rebalance(symbol=default_symbols, refresh=False, start_date=config.start_date, end_date=config.end_date): if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol[0]), refresh=refresh): prices.download_data_from_yahoo(symbol[0], start_date=start_date, end_date=end_date) index_df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol[0]), usecols=["Date", "Close", "Dividends"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol[1]), refresh=refresh): prices.download_data_from_yahoo(symbol[1], start_date=start_date, end_date=end_date) bonds_df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol[1]), usecols=["Date", "Close", "Dividends"], index_col="Date", parse_dates=["Date"])[start_date:end_date] best_sharpe_ratio = -100 # technically should be -infi best_portfolio_balance = -1 for i in range(0, increments + 1): portfolio = pd.DataFrame({ "Close": (index_df["Close"].add( index_df["Dividends"].cumsum()).pct_change().tail(lookback) * i / increments).add((bonds_df["Close"].add( bonds_df["Dividends"].cumsum()).pct_change().tail(lookback) * (increments - i) / increments)) }) sharpe_ratio = portfolio["Close"].pct_change().mean() / ( (portfolio["Close"].pct_change().std() * np.sqrt(252)) **(2.5 if modified else 1)) * ( 1000000 if modified else 100 ) # the last multiplier is to make the numbers more readable print( "The ratio of {:.2f} {}, {:.2f} {} had a sharpe ratio of {:.10f}". format(i / increments, index, (increments - i) / increments, bonds, sharpe_ratio)) if sharpe_ratio > best_sharpe_ratio: best_sharpe_ratio = sharpe_ratio best_portfolio_balance = i print("The best ratio was: {} {}, {} {} with a sharpe ratio of {}".format( best_portfolio_balance / increments, index, (increments - best_portfolio_balance) / increments, bonds, best_sharpe_ratio)) print("Date was: " + index_df.last_valid_index().strftime("%Y-%m-%d")) return best_portfolio_balance / increments
def plot_ema(symbol, period=default_periods, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the exponential moving agerage for each period for the given symbol, saves this data in a .csv file, and plots this data The EMA is a lagging trend indicator. Parameters: symbol : str period : int or list of int, optional refresh : bool, optional start_date : date, optional end_date : date, optional Returns: figure, axes A figure and axes containing the exponential moving agerage for the given symbol """ if not utils.refresh(utils.get_file_path( config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if isinstance(period, int): period = [period] period.sort() if len(df) < period[-1]: raise ta.InsufficientDataException( "Not enough data to compute a period length of " + str(period[-1])) fig, ax = plt.subplots(figsize=config.figsize) ax.plot(df.index, df["Close"], label="Price") for p in period: column_name = "EMA" + str(p) if column_name not in df.columns: df = df.join( ema(symbol, p, refresh=False, start_date=start_date, end_date=end_date)) # if len(df) > p: # to prevent AttributeError when the column is all None ax.plot(df.index, df[column_name], label=column_name) utils.prettify_ax(ax, title=symbol + "EMA" + "-".join(str(p) for p in period), start_date=start_date, end_date=end_date) utils.prettify_fig(fig) fig.savefig( utils.get_file_path(config.ta_graphs_path, "-".join(str(p) for p in period) + graph_filename, symbol=symbol)) utils.debug(fig) return fig, ax
def test_download_data_from_yahoo_index(self): p.download_data_from_yahoo(config.index)
def update_dates_file(start_date=config.start_date, end_date=config.end_date): prices.download_data_from_yahoo("SPY", start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol="SPY"), index_col="Date", parse_dates=["Date"])[start_date:end_date] df.to_csv(utils.get_file_path(config.simulation_data_path, dates_table_filename), columns=[])
def run(self): """Runs the simulation """ start_time = timer() for symbol in self.symbols.copy(): try: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=self.refresh): # print("Downloading data for " + symbol) prices.download_data_from_yahoo(symbol, start_date=self.start_date, end_date=self.end_date) except RemoteDataError: # print("Invalid symbol: " + symbol) self.symbols.remove(symbol) for bench in self.benchmark.copy(): try: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=bench), refresh=self.refresh): prices.download_data_from_yahoo(bench, start_date=self.start_date, end_date=self.end_date) except RemoteDataError: # print("Invalid symbol: " + bench) self.symbols.remove(bench) self.times[download_data_time] = self.times[download_data_time] + timer() - start_time self.generate_signals() self.log.loc[self.dates[0]][cash_column_name, portfolio_column_name, actions_column_name, portfolio_value_column_name] = [self.cash, self.portfolio, "Initial ", self.portfolio_value(self.dates[0])] try: for date in self.dates: # print(date, flush=True) for symbol in self.portfolio.copy(): self.get_dividends(symbol, date) if not self.is_date_in_bounds(symbol, date): self.sell(symbol, date, sell_size=0) for symbol in self.symbols: # might need a .copy() here but probably not signal = self.read_signal(symbol, date) if not self.is_date_in_bounds(symbol, date) and signal != ta.default_signal: # This should never happen print("Read a signal when no price exists for this date: {} {} {} {}".format(symbol, date, self.signal_name, signal)) raise IndexError if signal == ta.buy_signal: self.buy(symbol, date, self.purchase_size) if signal == ta.sell_signal: self.sell(symbol, date, sell_size=0) if signal == ta.soft_buy_signal and self.soft_signals: self.buy(symbol, date, self.purchase_size) if signal == ta.soft_sell_signal and self.soft_signals: self.sell(symbol, date, sell_size=0) self.log.loc[date][cash_column_name, portfolio_column_name, portfolio_value_column_name, total_commission_column_name, total_dividend_column_name] \ = [self.cash, self.format_portfolio(date), self.portfolio_value(date), self.total_commissions, self.total_dividends] # Why does this line require str(self.portfolio)? # self.log.loc[date][cash_column_name, portfolio_column_name, portfolio_value_column_name] = [self.cash, str(self.portfolio), self.total_value(date)] self.update_purchase_size(date) self.update_stop_loss(date) except (AttributeError, KeyError) as e: if self.fail_gracefully: print(e) self.log = self.log.loc[self.log[self.log.index] < date] else: raise for symbol in self.portfolio.copy(): self.update_winners_losers(symbol, date, Operation.Sell) self.plot_against_benchmark(self.log, self.benchmark) self.times[total_time] = self.times[total_time] + timer() - start_time print() print(self.filename) print("Times: {}".format(self.format_times())) print(self.get_price_on_date.cache_info()) print(str(psutil.Process(os.getpid()).memory_info().rss / float(2 ** 20)) + "mb") print("Dividends: {:.2f}".format(self.total_dividends)) print("Commissions: {:.2f}".format(self.total_commissions)) print("Total trades: {}".format(self.total_trades)) print("Performance: {}".format(self.get_performance())) print("Winners/Losers: {}".format(self.winners_losers)) print("Max Drawdown {:.2f}%".format(self.get_max_drawdown())) print("Sharpe ratios: {}".format(self.get_sharpe_ratios())) print("Betas: {}".format(self.get_betas())) self.log.loc[date][cash_column_name, portfolio_column_name, portfolio_value_column_name, actions_column_name] = \ [self.cash, self.format_portfolio(date), self.portfolio_value(date), "Final: Performance: {} Times: {} Cache: {} Total Dividends: {:.2f} Total Commissions: {:.2f} Total Trades: {} Winners/Losers {} Max Drawdown {:.2f}% Sharpe Ratio {} Beta: {}" .format(self.get_performance(), self.format_times(), self.get_price_on_date.cache_info(), self.total_dividends, self.total_commissions, self.total_trades, self.winners_losers, self.get_max_drawdown(), self.get_sharpe_ratios(), self.get_betas())] self.log.to_csv(utils.get_file_path(config.simulation_data_path, self.filename + simulation_table_filename)) self.log = self.log.loc[self.log[actions_column_name] != ""] # self.log.dropna(subset=[actions_column_name], inplace=True) self.log.to_csv(utils.get_file_path(config.simulation_data_path, self.filename + simulation_actions_only_table_filename))
def plot_bb(symbol, period=default_period, std=default_std, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the bollinger bands for each period for the given symbol, saves this data in a .csv file, and plots this data The BB is a lagging volatility indicator. Parameters: symbol : str period : int, optional std : int, optional refresh : bool, optional start_date : date, optional end_date : date, optional Returns: figure, axes A figure and axes containing the bollinger bands for the given symbol """ if not utils.refresh(utils.get_file_path( config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if len(df) < period: raise ta.InsufficientDataException( "Not enough data to compute a period length of " + str(period)) fig, ax = plt.subplots(figsize=config.figsize) ax.plot(df.index, df["Close"], label="Price") if "Lower" not in df.columns or "Upper" not in df.columns: df = df.join( bb(symbol, period, std, refresh=False, start_date=start_date, end_date=end_date)) # if len(df) > p: # to prevent AttributeError when the column is all None ax.plot(df.index, df["Lower"], label="Lower", color="skyblue") ax.plot(df.index, df["Upper"], label="Upper", color="skyblue") ax.fill_between(df.index, df["Lower"], df["Upper"], color='lightskyblue') utils.prettify_ax(ax, title=symbol + "BB", start_date=start_date, end_date=end_date) utils.prettify_fig(fig) fig.savefig( utils.get_file_path(config.ta_graphs_path, get_signal_name(period, std) + graph_filename, symbol=symbol)) utils.debug(fig) return fig, ax
def bb(symbol, period=default_period, std=default_std, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the bollinger bands for the given symbol, saves this data in a .csv file, and returns this data The BB is a lagging volatility indicator. Parameters: symbol : str period : int, optional std : int, optional refresh : bool, optional start_date : date, optional end_date : date, optional Returns: dataframe A dataframe containing the bollinger bands for the given symbol """ if not utils.refresh(utils.get_file_path( config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if "Lower" not in df.columns or "Upper" not in df.columns: df["Mid"] = df["Close"].rolling(window=period, min_periods=period).mean() df["Std"] = df["Close"].rolling(window=period, min_periods=period).std() df["Lower"] = df["Mid"] - std * df["Std"] df["Upper"] = df["Mid"] + std * df["Std"] utils.debug(df["Lower"]) utils.debug(df["Upper"]) df.to_csv( utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol)) return [df["Lower"], df["Upper"]]
def plot_rsi(symbol, period=default_period, thresholds=default_thresholds, refresh=False, start_date=config.start_date, end_date=config.end_date): """Calculates the relative strength index for each period for the given symbol, saves this data in a .csv file, and plots this data The RSI is a leading momentum indicator. Parameters: symbol : str period : int, optional thresholds: dict Must contain keys "Low" and "High", both with a value between 0 and 100 refresh : bool, optional start_date : date, optional end_date : date, optional Returns: figure, axes A figure and axes containing the relative strength index for the given symbol """ if not utils.refresh(utils.get_file_path( config.ta_data_path, table_filename, symbol=symbol), refresh=refresh): df = pd.read_csv(utils.get_file_path(config.ta_data_path, table_filename, symbol=symbol), index_col="Date", parse_dates=["Date"])[start_date:end_date] else: if utils.refresh(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), refresh=refresh): prices.download_data_from_yahoo(symbol, start_date=start_date, end_date=end_date) df = pd.read_csv(utils.get_file_path(config.prices_data_path, prices.price_table_filename, symbol=symbol), usecols=["Date", "Close"], index_col="Date", parse_dates=["Date"])[start_date:end_date] if len(df) < period: raise ta.InsufficientDataException( "Not enough data to compute a period length of " + str(period)) fig, ax = plt.subplots(2, figsize=config.figsize) ax[0].plot(df.index, df["Close"], label="Price") utils.prettify_ax(ax[0], title=symbol + "Price", start_date=start_date, end_date=end_date) if "RSI" + str(period) not in df.columns: df = df.join( rsi(symbol, period, refresh=False, start_date=start_date, end_date=end_date)) # if len(df) > period: # to prevent AttributeError when the column is all None ax[1].plot(df.index, df["RSI" + str(period)], label="RSI" + str(period)) ax[1].plot(df.index, [thresholds["Low"]] * len(df.index), label="Oversold", color="red") ax[1].plot(df.index, [50] * len(df.index), color="black") ax[1].plot(df.index, [thresholds["High"]] * len(df.index), label="Overbought", color="red") utils.prettify_ax(ax[1], title=symbol + "RSI" + str(period), start_date=start_date, end_date=end_date) utils.prettify_fig(fig) fig.savefig( utils.get_file_path(config.ta_graphs_path, get_signal_name(period) + graph_filename, symbol=symbol)) utils.debug(fig) return fig, ax