def get_price_data(ticker_ls, end_date, look_back_mths): """ Return a dataframe of daily price data (adjusted close price), sourced from Yahoo Finance Args: ticker_ls: <list> of tickers end_date: <dt.datetime> of end date to apply look back months to look_back_mths: <int> number of months from end date to define the starting date to pull data from Returns: <pd.DataFrame> containing the price data """ start_date = (end_date - relativedelta(months=look_back_mths)) df_price_data = pd.DataFrame( index=pd.date_range(start=start_date.strftime(const.DATE_STR_FORMAT), end=end_date.strftime(const.DATE_STR_FORMAT), freq='B'), columns=ticker_ls) for asset in ticker_ls: df_price_data[asset] = extract_data(asset, start_date.strftime(const.DATE_STR_FORMAT), end_date.strftime(const.DATE_STR_FORMAT))[const.ADJ_CLOSE] df_price_data.fillna(method='ffill', inplace=True) return df_price_data
def __init__(self, ticker1, start_date, end_date=dt.datetime.today().strftime('%Y-%m-%d')): """ Args: ticker1: <str> ticker of stock to run regression against VSTOXX on start_date: <str> YYYY-MM-DD format end_date: <str> YYYY-MM-DD format, defaults to today """ self.ticker1_data = extract_data(ticker1, start_date, end_date) vstoxx_url = r'http://www.stoxx.com/download/historical_values/h_vstoxx.txt' cur_dir = os.path.dirname(os.path.realpath(__file__)) vstoxx_file = os.path.join(cur_dir, 'data/vstoxx.txt') urlretrieve(vstoxx_url, vstoxx_file) self.ticker2_data = pd.read_csv(vstoxx_file, index_col=0, header=2, parse_dates=True, sep=',', dayfirst=True) self.ticker2_data = self.ticker2_data['V2TX'].to_frame() self.ticker1_nme = ticker1 self.ticker2_nme = 'V2TX'
def test_simple_stock_data_plot(self): """ Tests successful run of SimpleStockDataPlot.py """ from SimpleStockDataPlot import plot_bar_volume, PlotStockData, extract_data ticker = 'AAPL' start_date = '2020-03-01' end_date = '2021-03-01' plot_bar_volume(ticker, start_date, end_date) stock_data = PlotStockData(ticker, start_date, end_date) stock_data() df = extract_data(ticker, start_date, end_date)
def __init__(self, ticker, start_date, end_date=dt.datetime.today().strftime('%Y-%m-%d'), res_path=False): """ Create candlestick plot of given stock Args: ticker: <str> stock ticker start_date: <str> YYYY-MM-DD start date for the data pull and display end_date: <str> YYYY-MM-DD end date for the data pull and display. Defaults to today if not provided. res_path: <str> path to results folder to save image of the resulting plot. If False, will not save image. """ self.ticker = ticker self.start_date = start_date self.end_date = end_date self.res_path = res_path self.data = extract_data(ticker, start_date, end_date)
def get_stock_return_data(self): """ Get the return data of the self.ticker stock from Yahoo Finance """ str_date_ls = self.factor_data.index.strftime(const.DATE_STR_FORMAT) end_date = str_date_ls[-1] df = extract_data(self.ticker, start_date=False, end_date=end_date)[const.ADJ_CLOSE].to_frame() df.fillna(method='ffill', inplace=True) df.rename(columns={const.ADJ_CLOSE: self.ticker}, inplace=True) # monthly data only and last calendar date per month & calculate returns df = df.resample('M').last().pct_change()[1:] # only keep factor_data for dates following the earliest ticker return data date available self.factor_data = self.factor_data[self.factor_data.index. get_loc(df.index[0]):] return df
def plot_corr_mat(ls_tickers, start_date, end_date, res_path=False): """ Plot heatmap showing pearson's correlation matrix between inputted stocks Args: ls_tickers: <list> of tickers to produce correlation matrix start_date: <str> YYYY-MM-DD start date for data end_date: <str> YYYY-MM-DD end date for data res_path: <str> output path to save plot to, defaults to False i.e. not saved """ df_res_ls = [] # pull data for ticker in ls_tickers: df = extract_data(ticker, start_date, end_date) df[consts.TICKER] = ticker df_res_ls.append(df) df = pd.concat(df_res_ls) df.reset_index(inplace=True) # pivot to reformat data to have date as index, tickers as the columns, and adjusted close as the values ls_pivot = [consts.DATE, consts.TICKER, consts.ADJ_CLOSE] df = df[ls_pivot].pivot(*ls_pivot) # calculate pearson correlation df = df.corr(method='pearson') # plotting fig = plt.figure() seaborn.heatmap(df, cmap='RdYlGn', annot=True) if res_path: plt.savefig(os.path.join(res_path, 'corr_mat_{}_{}'.format(start_date, end_date))) # .show() must go after .savefig() or else saved figure will be blank plt.show()
def backtesting(benchmark_index, signal_tolerance, start_date, end_date=datetime.today().strftime('%Y-%m-%d')): """ Backtest performance of a stock if following a signal based strategy to buy if the monthly rolling average exceeds a signal tolerance over the annual rolling average. Rules for sell and hold follow simularly. Args: benchmark_index: <str> ticker for the stock of interest signal_tolerance: <int> tolerance controlling the buy, sell, hold actions start_date: <str> YYYY-MM-DD start date for data end_date: <str> YYYY-MM-DD end date for data. Defaults to today if not provided. """ benchmark_historic_data = extract_data(ticker=benchmark_index, start_date=start_date, end_date=end_date) # Get the rolling average annual and per month benchmark_historic_data[ COL_NUM_TRADE_DAYS_YR_TREND] = benchmark_historic_data[CLOSE].rolling( NUM_TRADE_DAYS_PER_YR).mean() benchmark_historic_data[ COL_NUM_TRADE_DAYS_MONTH_TREND] = benchmark_historic_data[ CLOSE].rolling(NUM_TRADE_DAYS_PER_MONTH).mean() # work around the annual trend for standard deviation todo think about this, instead of using signal tolerance # standard_dev = np.floor(benchmark_historic_data[COL_NUM_TRADE_DAYS_YR_TREND].std()) # trend difference COL_NAME_DIFF = '{}-{}_difference'.format(NUM_TRADE_DAYS_PER_MONTH, NUM_TRADE_DAYS_PER_YR) benchmark_historic_data[COL_NAME_DIFF] = \ benchmark_historic_data[COL_NUM_TRADE_DAYS_MONTH_TREND] - benchmark_historic_data[COL_NUM_TRADE_DAYS_YR_TREND] # assuming that we neglect transaction costs and market liquidity benchmark_historic_data.loc[ abs(benchmark_historic_data[COL_NAME_DIFF]) <= signal_tolerance, SIGNAL_VAL] = 0 benchmark_historic_data.loc[ benchmark_historic_data[COL_NAME_DIFF] > signal_tolerance, SIGNAL_VAL] = 1 benchmark_historic_data.loc[ benchmark_historic_data[COL_NAME_DIFF] < -signal_tolerance, SIGNAL_VAL] = -1 # exhaustive mapping for signal name benchmark_historic_data[SIGNAL_NAME] = benchmark_historic_data[ SIGNAL_VAL].map(SIGNALS_DICT) print(benchmark_historic_data[SIGNAL_NAME].value_counts()) show_line_plot(benchmark_historic_data[SIGNAL_VAL], title='Trading Day Signals', x_label='Date', y_label='Signal', plot_arr=True) # Compare the returns benchmark_historic_data, original_returns_col_name = log_return( benchmark_historic_data, CLOSE) # apply proposed trading signals, shift up one day, returns based on t-1 benchmark_historic_data['Custom'] = benchmark_historic_data[ original_returns_col_name] * benchmark_historic_data[SIGNAL_VAL].shift( 1) benchmark_historic_data[[original_returns_col_name, 'Custom']] = benchmark_historic_data[[ original_returns_col_name, 'Custom' ]].cumsum() benchmark_historic_data.rename( columns={original_returns_col_name: 'Market'}, inplace=True) show_line_plot(benchmark_historic_data[['Market', 'Custom']], title='Backtesting of {}'.format(benchmark_index), x_label='Date', y_label='Cumulative Returns', plot_arr=True)