def main(): # DB INFO FILE - host, user, password, db_name db_credential_info_p = "\\" + "database_info.txt" # create our instance variables for host, username, password and database name db_host, db_user, db_password, db_name = cm.load_db_credential_info( db_credential_info_p) conn = psycopg2.connect(host=db_host, database=db_name, user=db_user, password=db_password) cur_path = os.getcwd() ## these parameters impact file name and sub-folder to gather data from params = "_TimeLimit_30" results_file = cur_path + "\\PairsResults" + params + "\\MasterResults.txt" # load results_file to pandas df df_res = pd.read_table(results_file, delimiter=",", names=('Trade_Id', 'Entry_Date', 'Position', 'Ticker1', 'Ticker2', 'Pos1', 'Pos2', 'Ratio', 'Exit_Date', 'Avg_Day', 'Max_Day', 'Min_Day', 'Tr_Length', 'Total_PnL'), index_col=False) # TRADE SATISTICS - compute and output trade stats trade_statistics = trade_stats('trade', df_res, 'Total_PnL') # DAILY STATISTICS dly_stats_df = df_res[['Trade_Id', 'Entry_Date', 'Ticker1', 'Ticker2']] # back test start_date and end_date as datetime objects start_yr = 2006 end_yr = 2017 mth_ = 12 start_dt_day = cm.fetch_last_day_mth(start_yr, mth_, conn) end_dt_day = cm.fetch_last_day_mth(end_yr, mth_, conn) start_dt = datetime.date(start_yr, mth_, start_dt_day) end_dt = datetime.date(end_yr, mth_, end_dt_day) daily_pnl, daily_statistics = daily_stats(dly_stats_df, start_dt, end_dt, params) # write our data to text file f_name = "daily_results" + params f_name2 = "model_daily_stats" + params f_name3 = "model_trade_stats" + params cm.write_results_text_file(f_name, daily_pnl) cm.write_results_text_file(f_name2, daily_statistics) cm.write_results_text_file(f_name3, trade_statistics)
def main(): db_credential_info_p = "\\" + "database_info.txt" db_host, db_user, db_password, db_name = CM.load_db_credential_info(db_credential_info_p) conn = psycopg2.connect(host=db_host,database=db_name, user=db_user, password=db_password) year_list = [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016] for year_ in year_list: # find the last trading day for a given year curr_year = year_ start_year = curr_year - 3 last_tr_day_start = CM.fetch_last_day_mth(start_year, conn) last_tr_day_end = CM.fetch_last_day_mth(curr_year, conn) # create datetime objects of the date range start_date = datetime.date(start_year,12,last_tr_day_start) end_date = datetime.date(curr_year,12,last_tr_day_end) # # retrieve a unique list of our tickers # stock_tuple = CM.load_db_tickers_start_date(start_date, conn) # stock_list = [] # # for row in stock_tuple: # stock_list.append(row[0]) # # # find SPY ETF index in our stock_list & remove it # spy_idx = stock_list.index('SPY') # stock_list.pop(spy_idx) # DEFINE OUR STOCK LIST FOR THE EFFICIENT FRONTIER #stock_list = ['SPY','IEF','GSG', 'AAPL', 'MSFT', 'MMM'] stock_list = ['NOC', 'AAPL', 'MSFT', 'MMM'] # load each ticker with the dates provided loaded_data = CM.load_df_stock_data_array_index_date(stock_list, start_date, end_date, conn) # merge our list of pd dataframes (each index is a DF of a stock) into one df merged_df = pd.concat(loaded_data, axis=1) daily_rtns = merged_df.pct_change() daily_rtns_log = np.log(1 + daily_rtns) correlation_df = daily_rtns_log.corr() sns.heatmap(correlation_df, annot = True, xticklabels=correlation_df.columns.values, yticklabels=correlation_df.columns.values, cmap = "Greens") max_sharpe_weights, max_return_weights, max_volatility_weights, min_volatility_weights = efficient_frontier(merged_df, stock_list, curr_year, iterations = 50000) # let's write our dict returned data to individual text files write_dict_to_text("max_sharpe/max_sharpe_weights_{}.txt".format(curr_year), max_sharpe_weights) write_dict_to_text("max_rtn/max_return_weights_{}.txt".format(curr_year), max_return_weights) write_dict_to_text("max_vol/max_volatility_weights_{}.txt".format(curr_year), max_volatility_weights) write_dict_to_text("min_vol/min_volatility_weights_{}.txt".format(curr_year), min_volatility_weights)
def backtest_momentum(ticker_dict_by_year, conn): """ return a dictionary where each key is our year, value is list of average returns of stocks args: ticker_dict: dictionary of keys [year] and values [list of tickers to hold in portfolio] conn: a Postgres DB connection object returns: dictionary where each key is our year, value is list of average returns of stocks """ annual_collector = {} for key, value in ticker_dict_by_year.items(): # find the last trading day for our years range year_start = key - 1 year = key print("Working on {} momentum portfolio".format(year)) last_tr_day_start = cm.fetch_last_day_mth(year_start, conn) last_tr_day_end = cm.fetch_last_day_mth(year, conn) mth = 12 trd_start_dt = datetime.date(year_start, mth, last_tr_day_start) trd_end_dt = datetime.date(year, mth, last_tr_day_end) # need to convert list of tickers to tuple of tickers tuple_ticker_values = tuple(value) year_data = cm.load_df_stock_data_array(tuple_ticker_values, trd_start_dt, trd_end_dt, conn) for ticker_data in year_data: ticker = ticker_data.columns[1] # annual return """DEBUG THIS HERE""" annual_return = [ (ticker_data[ticker].iloc[-1] - ticker_data[ticker].iloc[0]) / ticker_data[ticker].iloc[0] ] print('Ticker {} annual return {}'.format(ticker, annual_return)) if year not in annual_collector: annual_collector[year] = annual_return else: annual_collector[year] = annual_collector[year] + annual_return return annual_collector
def main(): skip_etfs = True # create a path version of our text file db_credential_info_p = "\\" + "database_info.txt" # create our instance variables for host, username, password and database name db_host, db_user, db_password, db_name = cm.load_db_credential_info( db_credential_info_p) conn = psycopg2.connect(host=db_host, database=db_name, user=db_user, password=db_password) year_array = list(range(2004, 2015)) for yr in year_array: # create a pairs file for each two year chunk in our range year = yr end_year = year + 2 # find the last trading day for our years range last_tr_day_start = cm.fetch_last_day_mth(year, conn) last_tr_day_end = cm.fetch_last_day_mth(end_year, conn) # date range to pull data from start_dt = datetime.date(year, 12, last_tr_day_start) end_dt = datetime.date(end_year, 12, last_tr_day_end) start_dt_str = start_dt.strftime("%Y%m%d") end_dt_str = end_dt.strftime("%Y%m%d") # list of stocks and their sector list_of_stocks = cm.load_db_tickers_sectors(start_dt, conn) # dict: key = sector with values = array of all tickers pertaining to a sector sector_dict = cm.build_dict_of_arrays(list_of_stocks) # write these arrays to text files later passed_pairs = {} #all_failed_pairs = [] for sector, ticker_arr in sector_dict.items(): if skip_etfs and sector != "ETF": # we need to append SPY to each sub_array to ensure that cointegrated pairs # don't include a 3rd variable in why they are cointegrated ticker_arr.append('SPY') data_array_of_dfs = cm.load_df_stock_data_array( ticker_arr, start_dt, end_dt, conn) merged_data = cm.data_array_merge(data_array_of_dfs) scores, pvalues, pairs = cm.find_cointegrated_pairs( merged_data) # seaborn heatmap for each sector within each range of time # uncomment this section to print out seaborn heatmaps in iPython console # confidence_level = 1 - 0.01 # m = [0,0.2,0.4,0.6,0.8,1] # plt.figure(figsize=(min(10,len(pvalues)), min(10,len(pvalues)))) # seaborn.heatmap(pvalues, xticklabels=ticker_arr, # yticklabels=ticker_arr, cmap='RdYlGn_r', # mask = (pvalues >= confidence_level)) # plt.show() new_pairs = cm.remove_ticker('SPY', pairs) passed_pairs[sector] = new_pairs print("Complete sector {0} for date range: {1}-{2}".format( sector, start_dt_str, end_dt_str)) f_name = "coint_method_pairs_{0}".format(end_dt_str) cm.write_dict_text(f_name, passed_pairs)
def main(): # main function to isolate pairs skip_etfs = True db_credential_info_p = "\\" + "database_info.txt" # create our instance variables for host, username, password and database name db_host, db_user, db_password, db_name = cm.load_db_credential_info( db_credential_info_p) conn = psycopg2.connect(host=db_host, database=db_name, user=db_user, password=db_password) # original year_array = list(range(2004, 2015)) # year_array = list(range(2004, 2006)) # collect each year's stocks to hold. key = year, values = list of tickers ticker_dict_by_year = {} for yr in year_array: # create a pairs file for each one year chunk in our range year = yr end_year = year + 1 # find the last trading day for our years range last_tr_day_start = cm.fetch_last_day_mth(year, conn) last_tr_day_end = cm.fetch_last_day_mth(end_year, conn) # date range to pull data from start_dt = datetime.date(year, 12, last_tr_day_start) end_dt = datetime.date(end_year, 12, last_tr_day_end) start_dt_str = start_dt.strftime("%Y%m%d") end_dt_str = end_dt.strftime("%Y%m%d") # list of stocks and their sector list_of_stocks = cm.load_db_tickers_sectors(start_dt, conn) # dict: key = sector with values = array of all tickers pertaining to a sector sector_dict = cm.build_dict_of_arrays(list_of_stocks) for sector, ticker_arr in sector_dict.items(): if skip_etfs and sector != "ETF": # for next_year's portfolio next_year = end_year + 1 data_array_of_dfs = cm.load_df_stock_data_array( ticker_arr, start_dt, end_dt, conn) merged_data = cm.data_array_merge(data_array_of_dfs) return_data_series = ( merged_data.iloc[-1] - merged_data.iloc[0]) / merged_data.iloc[0] top_five = return_data_series.nlargest(5).index.tolist() if next_year not in ticker_dict_by_year: ticker_dict_by_year[next_year] = top_five else: ticker_dict_by_year[ next_year] = ticker_dict_by_year[next_year] + top_five print("Done {}: {}".format(end_year, sector)) # annual returns of all stocks per year portfolio_performance = backtest_momentum(ticker_dict_by_year, conn) # file name to output f_name = "factor_momentum_annual_results" + ".txt" # let's start outputting data file_to_write = open(f_name, 'w') for year, returns_arr in portfolio_performance.items(): str_rtns_list = ','.join(str(e) for e in returns_arr) file_to_write.write('{},{}\n'.format(year, str_rtns_list)) print("LETS CHECK PERFORMANCE")