def lead_lag_disc_regression(returns_data, period, lookback, shift): # Creating frequent returns data week_chunks = np.array_split(returns_data.copy().cumsum(), returns_data.shape[0] / period) PC_proj_weekly = pd.DataFrame( [] * returns_data.shape[0], columns=returns_data.columns, index=[week.index[0] for week in week_chunks]) for week in week_chunks: PC_proj_weekly.loc[week.index[0]] = week.sum() # Linear regression on weekly returns with 1 weeks shifted predictors PC_proj_weekly_ll_one = PC_proj_weekly.copy() PC_proj_weekly_ll_one.iloc[:, 0] = PC_proj_weekly_ll_one.iloc[:, 0].shift( periods=shift) rolling_lr(PC_proj_weekly_ll_one.iloc[:, 0], PC_proj_weekly_ll_one.iloc[:, 1:], lookback, intercept=False)
week_chunks = np.array_split(simp_avg_daily_percent.copy(), simp_avg_daily_percent.shape[0] / 5) simp_avg_daily_pc_week_sum = pd.DataFrame( [] * simp_avg_daily_percent.shape[0], columns=simp_avg_daily_percent.columns, index=[week.index[0] for week in week_chunks]) for week in week_chunks: simp_avg_daily_pc_week_sum.loc[week.index[0]] = week.sum() # Linear regression on weekly returns with 1 weeks shifted predictors simp_avg_daily_pc_weekly_ll = simp_avg_daily_pc_week_sum.copy() simp_avg_daily_pc_weekly_ll.iloc[:, 0] = simp_avg_daily_pc_week_sum.iloc[:, 0].shift( periods =1) test = rolling_lr(simp_avg_daily_pc_weekly_ll.iloc[:, 0], simp_avg_daily_pc_weekly_ll.iloc[:, 1:], 150, intercept=False) # test = rolling_lasso(simp_avg_daily_pc_weekly_ll.iloc[:,0], simp_avg_daily_pc_weekly_ll.iloc[:,1:], 150, intercept = False, alph = 0) # test = rolling_ridge(simp_avg_daily_pc_weekly_ll.iloc[:,0], simp_avg_daily_pc_weekly_ll.iloc[:,1:], 150, intercept = False, alph = 2) # series_plot([simp_avg_daily_pc_weekly_ll.iloc[:,0], simp_avg_daily_pc_weekly_ll['WTI Crude Oil']],'Oil Price against commodities') #%% # # series_plot([commodities_2013.mean(axis=1) - trimmed_dates['NYMEX WTI Crude Oil']],'Oil Price against commodities') # # series_plot([commodities_2013.mean(axis=1) - trimmed_dates['NYMEX WTI Crude Oil']],'Oil Price against commodities') # series_plot([commodities_2013.mean(axis=1) - trimmed_dates['ICE US Dollar Index']],'Oil Price against commodities') # series_plot([commodities_2013.mean(axis=1) - trimmed_dates['NYMEX WTI Crude Oil']],'Oil Price against commodities')
# Create column for average of currency basket cur_commod_avg = pd.DataFrame(trimmed_dates[[ 'CME Australian Dollar AUD', 'CME Mexican Peso', 'CME Canadian Dollar CAD' ]].mean(axis=1), columns=['Commodity Currencies Simp Avg']) # Create empty residual dataframe residual_df = pd.DataFrame().reindex_like(commodities_2013) # Perfrom rolling regression and fill residual dataframe for each contract in commodities basket for i, contract in enumerate(commodities_2013): # Take prediction from rolling linear regression pred = rolling_lr(pd.DataFrame(commodities_2013[contract]), cur_commod_avg, lookback=200, intercept=False)[1] # Set residual column for current contract residual_df[contract] = commodities_2013[contract] - pred['Prediction'] # Output progress print('{} residuals completed {}/{}'.format(contract, i + 1, commodities_2013.shape[1])) residual_df = residual_df.fillna(method='ffill') #%% Trading strategy: # Long bottom three negative residuals, Short top three postive residuals. # Create empty signals df signal_df = pd.DataFrame([0]).reindex_like(residual_df) signal_df = signal_df.fillna(0)
from lin_reg_analysis import rolling_lr, commodities_2013 from preprocessing import trimmed_dates, df_dict from PlottingFunctions import series_plot import pandas as pd import numpy as np #%% # Create average currency column cur_commod_avg = pd.DataFrame(trimmed_dates[[ 'CME Australian Dollar AUD', 'CME Mexican Peso', 'CME Canadian Dollar CAD' ]].mean(axis=1), columns=['Commodity Currencies Simp Avg']) # Create empty beta df beta_df = pd.DataFrame().reindex_like(commodities_2013) # Perfrom rolling regression and fill residual df for i, contract in enumerate(commodities_2013): beta = rolling_lr(pd.DataFrame(commodities_2013[contract]), pd.DataFrame(commodities_2013.mean(axis=1)), lookback=150, intercept=False) beta_df[contract] = beta[0] print('{} betas computed {}/{}'.format(contract, i + 1, commodities_2013.shape[1])) # %% # Initialise balance and price series live_prices = df_dict['Close'].fillna(method='backfill') # .copy().fillna(method = 'ffill') # Split into monthly chunks monthly_beta = np.array_split(beta_df.dropna(), 150) month_index = [month.index[0] for month in monthly_beta] # Create df of monthly signals for each contract