def stockdata(): start = '2017-10-25' end = '2017-10-27' # collecting stockdata stocks = DataReader("AMZN", 'yahoo', start, end)['Adj Close'] # calculating yields yields = stocks/stocks.shift(1)-1 df_yields = yields.drop(yields.index[0]) print(stocks) #stockdata_file = stockdata.to_csv('stockdata_aapl.csv') return df_yields#,stockdata_file
10).mean() + 2 * df['Close'].rolling(10).std() df['Boll_Down_10_2'] = df['Close'].rolling( 10).mean() - 2 * df['Close'].rolling(10).std() # Donchian channels - rolling maximum and minimum prices during the same periods as moving avg for channel_period in [5, 10, 20, 50, 100, 200]: up_name = 'Don_Ch_Up_%d' % (channel_period) down_name = 'Don_Ch_Down_%d' % (channel_period) df[up_name] = df['High'].rolling(channel_period).max() df[down_name] = df['Low'].rolling(channel_period).min() # Shifted into time lags, 1-10 days prior newdata = df['Close'].to_frame() for lag in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]: shift = lag shifted = df.shift(shift) shifted.columns = [ str.format('%s_shift_by_%d' % (column, shift)) for column in shifted.columns ] newdata = pd.concat((newdata, shifted), axis=1) # Future Days - target days to predict forward_lag = 5 newdata['target'] = newdata['Close'].shift(-forward_lag) newdata = newdata.drop('Close', axis=1) newdata = newdata.dropna() pprint.pprint(newdata, width=80) X = newdata.drop('target', axis=1) Y = newdata['target']
def EventStudies(): # Define list of stocks to conduct event analysis on. symbols_list = ['AES', 'AET', 'AFL', 'AVP', 'CLX', 'GM', '^GSPC'] # Start and End dates dt_start = dt.datetime(2012, 1,1) dt_end = dt.datetime(2015, 1,1) # Download historical Adjusted Closing prices using Pandas downloader for Yahoo data = DataReader(symbols_list, 'yahoo', dt_start, dt_end)['Adj Close'] # Create dataframe data_ret which includes returns data_ret = data/data.shift(1) - 1 # Define event threshold variable daily_diff daily_diff = 0.03 # Positive event if daily stock return > market return by daily_diff # Negative event if daily stock return < market return by daily_diff # otherwise no event has occurred. # Create an events data frame data_events, where columns = names of all stocks, and rows = daily dates events_col = symbols_list[:] # Use [:] to deep copy the list events_col.remove('^GSPC') # We dont't need to create events for the S&P500 events_index = data_ret.index # Copy the date index from data_ret to the events data frame data_events = pd.DataFrame(index=events_index, columns=events_col) # Fill in data_events with 1 for positive events, -1 for negative events, and NA otherwise. for i in events_col: data_events[i] = np.where((data_ret[i] - data_ret['^GSPC']) > daily_diff, 1, np.where((data_ret[i] - data_ret['^GSPC']) < -daily_diff, -1, np.nan)) # Calculate abnormal returns based on market model (R_it = a_i + B_i*R_mt + e_it) # Define estimation period L1: the greater, the more accurate the model L1 = 30 # Define window for forward and backward looking period. Should be less than L1, window = 20 # Create 2 dictionaries of dictionaries (for positive and negative events) to store the # abnormal returns (AR) values of each window day, for each stock. pos_dict = defaultdict(dict) neg_dict = defaultdict(dict) # For each stock, locate each event and calculate abnormal return for previous window days and future window days for s in events_col: pos_event_dates = data_events[s][data_events[s] == 1].index.tolist() neg_event_dates = data_events[s][data_events[s] == -1].index.tolist() # Create dictionary for each stock to store the AR values of each window day for each event pos_dict_s = defaultdict(dict) neg_dict_s = defaultdict(dict) for pos_event in pos_event_dates: date_loc = data_ret.index.get_loc(pos_event) # Go to beginning of backward window and calculate AR from backward till forward window. date_loc = date_loc - window if date_loc > L1 and date_loc <= len(data_ret) - (2*window+1): index_range = (2*window) + 1 # Create dictionairy to store the AR values for each day of this event pos_dict_s_event = OrderedDict() for d in range(index_range): date_loc2 = date_loc + d # Parameters to estimate market model u_i = data_ret[s][date_loc2-L1 : date_loc2-1].mean() u_m = data_ret['^GSPC'][date_loc2-L1 : date_loc2-1].mean() R_i = data_ret.ix[date_loc2, s] R_m = data_ret.ix[date_loc2,'^GSPC'] beta_i = ((R_i-u_i)*(R_m - u_m))/(R_m - u_m)**2 alpha_i = u_i - (beta_i*u_m) var_err = (1/(L1 -2))*(R_i - alpha_i - (beta_i*R_m))**2 AR_i = R_i - alpha_i - (beta_i*R_m) pos_dict_s_event[date_loc2] = AR_i pos_dict_s[pos_event] = pos_dict_s_event pos_dict[s] = pos_dict_s for neg_event in neg_event_dates: date_loc = data_ret.index.get_loc(neg_event) # Go to beginning of backward window and calculate AR from backward till forward window. date_loc = date_loc - window if date_loc > L1 and date_loc <= len(data_ret) - (2*window+1): index_range = (2*window) + 1 # Create dictionairy to store the AR values for each day of this event neg_dict_s_event = OrderedDict() for d in range(index_range): date_loc2 = date_loc + d # Parameters to estimate market model u_i = data_ret[s][date_loc2-L1 : date_loc2-1].mean() u_m = data_ret['^GSPC'][date_loc2-L1 : date_loc2-1].mean() R_i = data_ret.ix[date_loc2, s] R_m = data_ret.ix[date_loc2, '^GSPC'] beta_i = ((R_i-u_i)*(R_m - u_m))/(R_m - u_m)**2 alpha_i = u_i - (beta_i*u_m) var_err = (1/(L1 -2))*(R_i - alpha_i - (beta_i*R_m))**2 AR_i = R_i - alpha_i - (beta_i*R_m) neg_dict_s_event[date_loc2] = AR_i neg_dict_s[neg_event] = neg_dict_s_event neg_dict[s] = neg_dict_s # Create empty Abnormal Returns data frame abret_col = symbols_list[:] # Use [:] to deep copy the list abret_col.remove('^GSPC') # We dont't need to calculate abnormal returns for the S&P500 abret_index = range(-window, window+1) pos_data_abret = pd.DataFrame(index=abret_index, columns=abret_col) neg_data_abret = pd.DataFrame(index=abret_index, columns=abret_col) for h in abret_col: if h in pos_dict.keys(): for z in abret_index: pos_data_abret[h][z] = np.mean([x.values()[z+window] for x in pos_dict[h].values()]) for f in abret_col: if f in neg_dict.keys(): for v in abret_index: neg_data_abret[f][v] = np.mean([x.values()[v+window] for x in neg_dict[f].values()]) # Create Cumulative Abnormal Return (CAR) Tables pos_CAR and neg_CAR pos_CAR = pos_data_abret.cumsum() neg_CAR = neg_data_abret.cumsum() # Plot pos_CAR and neg_CAR plt.clf() plt.plot(pos_CAR) plt.legend(pos_CAR) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('PositiveCAR_All.png', format='png') plt.clf() plt.plot(neg_CAR) plt.legend(neg_CAR) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('NegativeCAR_All.png', format='png') # Sum CAR for positive and negative events to plot only the aggregate CAR pos_CAR['SUM'] = pos_CAR.sum(axis=1) neg_CAR['SUM'] = neg_CAR.sum(axis=1) plt.clf() plt.plot(pos_CAR['SUM']) plt.legend(pos_CAR['SUM']) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('PositiveCAR_SUM.png', format='png') plt.clf() plt.plot(neg_CAR['SUM']) plt.legend(neg_CAR['SUM']) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('NegativeCAR_SUM.png', format='png')
def EventStudies(): # Define list of stocks to conduct event analysis on. symbols_list = ['AES', 'AET', 'AFL', 'AVP', 'CLX', 'GM', '^GSPC'] # Start and End dates dt_start = dt.datetime(2012, 1, 1) dt_end = dt.datetime(2015, 1, 1) # Download historical Adjusted Closing prices using Pandas downloader for Yahoo data = DataReader(symbols_list, 'yahoo', dt_start, dt_end)['Adj Close'] # Create dataframe data_ret which includes returns data_ret = data / data.shift(1) - 1 # Define event threshold variable daily_diff daily_diff = 0.03 # Positive event if daily stock return > market return by daily_diff # Negative event if daily stock return < market return by daily_diff # otherwise no event has occurred. # Create an events data frame data_events, where columns = names of all stocks, and rows = daily dates events_col = symbols_list[:] # Use [:] to deep copy the list events_col.remove( '^GSPC') # We dont't need to create events for the S&P500 events_index = data_ret.index # Copy the date index from data_ret to the events data frame data_events = pd.DataFrame(index=events_index, columns=events_col) # Fill in data_events with 1 for positive events, -1 for negative events, and NA otherwise. for i in events_col: data_events[i] = np.where( (data_ret[i] - data_ret['^GSPC']) > daily_diff, 1, np.where((data_ret[i] - data_ret['^GSPC']) < -daily_diff, -1, np.nan)) # Calculate abnormal returns based on market model (R_it = a_i + B_i*R_mt + e_it) # Define estimation period L1: the greater, the more accurate the model L1 = 30 # Define window for forward and backward looking period. Should be less than L1, window = 20 # Create 2 dictionaries of dictionaries (for positive and negative events) to store the # abnormal returns (AR) values of each window day, for each stock. pos_dict = defaultdict(dict) neg_dict = defaultdict(dict) # For each stock, locate each event and calculate abnormal return for previous window days and future window days for s in events_col: pos_event_dates = data_events[s][data_events[s] == 1].index.tolist() neg_event_dates = data_events[s][data_events[s] == -1].index.tolist() # Create dictionary for each stock to store the AR values of each window day for each event pos_dict_s = defaultdict(dict) neg_dict_s = defaultdict(dict) for pos_event in pos_event_dates: date_loc = data_ret.index.get_loc(pos_event) # Go to beginning of backward window and calculate AR from backward till forward window. date_loc = date_loc - window if date_loc > L1 and date_loc <= len(data_ret) - (2 * window + 1): index_range = (2 * window) + 1 # Create dictionairy to store the AR values for each day of this event pos_dict_s_event = OrderedDict() for d in range(index_range): date_loc2 = date_loc + d # Parameters to estimate market model u_i = data_ret[s][date_loc2 - L1:date_loc2 - 1].mean() u_m = data_ret['^GSPC'][date_loc2 - L1:date_loc2 - 1].mean() R_i = data_ret.ix[date_loc2, s] R_m = data_ret.ix[date_loc2, '^GSPC'] beta_i = ((R_i - u_i) * (R_m - u_m)) / (R_m - u_m)**2 alpha_i = u_i - (beta_i * u_m) var_err = (1 / (L1 - 2)) * (R_i - alpha_i - (beta_i * R_m))**2 AR_i = R_i - alpha_i - (beta_i * R_m) pos_dict_s_event[date_loc2] = AR_i pos_dict_s[pos_event] = pos_dict_s_event pos_dict[s] = pos_dict_s for neg_event in neg_event_dates: date_loc = data_ret.index.get_loc(neg_event) # Go to beginning of backward window and calculate AR from backward till forward window. date_loc = date_loc - window if date_loc > L1 and date_loc <= len(data_ret) - (2 * window + 1): index_range = (2 * window) + 1 # Create dictionairy to store the AR values for each day of this event neg_dict_s_event = OrderedDict() for d in range(index_range): date_loc2 = date_loc + d # Parameters to estimate market model u_i = data_ret[s][date_loc2 - L1:date_loc2 - 1].mean() u_m = data_ret['^GSPC'][date_loc2 - L1:date_loc2 - 1].mean() R_i = data_ret.ix[date_loc2, s] R_m = data_ret.ix[date_loc2, '^GSPC'] beta_i = ((R_i - u_i) * (R_m - u_m)) / (R_m - u_m)**2 alpha_i = u_i - (beta_i * u_m) var_err = (1 / (L1 - 2)) * (R_i - alpha_i - (beta_i * R_m))**2 AR_i = R_i - alpha_i - (beta_i * R_m) neg_dict_s_event[date_loc2] = AR_i neg_dict_s[neg_event] = neg_dict_s_event neg_dict[s] = neg_dict_s # Create empty Abnormal Returns data frame abret_col = symbols_list[:] # Use [:] to deep copy the list abret_col.remove( '^GSPC') # We dont't need to calculate abnormal returns for the S&P500 abret_index = range(-window, window + 1) pos_data_abret = pd.DataFrame(index=abret_index, columns=abret_col) neg_data_abret = pd.DataFrame(index=abret_index, columns=abret_col) for h in abret_col: if h in pos_dict.keys(): for z in abret_index: pos_data_abret[h][z] = np.mean( [x.values()[z + window] for x in pos_dict[h].values()]) for f in abret_col: if f in neg_dict.keys(): for v in abret_index: neg_data_abret[f][v] = np.mean( [x.values()[v + window] for x in neg_dict[f].values()]) # Create Cumulative Abnormal Return (CAR) Tables pos_CAR and neg_CAR pos_CAR = pos_data_abret.cumsum() neg_CAR = neg_data_abret.cumsum() # Plot pos_CAR and neg_CAR plt.clf() plt.plot(pos_CAR) plt.legend(pos_CAR) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('PositiveCAR_All.png', format='png') plt.clf() plt.plot(neg_CAR) plt.legend(neg_CAR) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('NegativeCAR_All.png', format='png') # Sum CAR for positive and negative events to plot only the aggregate CAR pos_CAR['SUM'] = pos_CAR.sum(axis=1) neg_CAR['SUM'] = neg_CAR.sum(axis=1) plt.clf() plt.plot(pos_CAR['SUM']) plt.legend(pos_CAR['SUM']) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('PositiveCAR_SUM.png', format='png') plt.clf() plt.plot(neg_CAR['SUM']) plt.legend(neg_CAR['SUM']) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('NegativeCAR_SUM.png', format='png')
import numpy as np import pandas as pd import matplotlib.pyplot as plt from gurobipy import * from pandas_datareader.data import DataReader from datetime import datetime stocks = ['ISP', 'CSV', 'RGC', 'WMS', 'GYB', 'KCC', 'BPL', 'WTW', 'GS', 'SPR'] ls_key = 'Adj Close' start = datetime(2014,1,1) end = datetime(2016,12,23) # fetch daily adjusted close and drop na, WMS go public on 2014-07-25 price = DataReader(stocks, 'yahoo', start, end)[ls_key].dropna()[stocks] # Calculate log return rtn = np.log(price) - np.log(price.shift(1)) meanDailyReturns = rtn.mean() covMatrix = rtn.cov() # Calculate performance def performance(weights, meanReturns, covMatrix): portReturn = np.sum(meanReturns*weights) portStdDev = np.sqrt(np.dot(weights.T, np.dot(covMatrix, weights))) return portReturn * 252, portStdDev * np.sqrt(252), portReturn/portStdDev * np.sqrt(252) # Visualize Efficient Frontier numPortfolios = 10000 results=np.zeros((3,numPortfolios)) for i in xrange(numPortfolios): weights = np.random.random(len(stocks)) weights /= np.sum(weights) results[0,i], results[1,i], results[2,i] = performance(weights, meanDailyReturns, covMatrix) plt.scatter(results[1,], results[0,], c=results[2,])
plt.legend(['price', 'quarter average']) # %% shifting fig, ax = plt.subplots(3, sharex=True) amazon['Close'].plot(ax=ax[0]) amazon['Close'].shift(365).plot(ax=ax[1]) amazon['Close'].shift(-365).plot(ax=ax[2]) ax[0].legend(['input']) ax[1].legend(['shift by 365']) ax[2].legend(['shift by -365']) # %% ROI ROI = 100 * (amazon.shift(16) / amazon - 1) ROI['Close'].plot() # %% rolling windows amazon = amazon.sort_index() rolling = amazon['Close'].rolling(120) df = pd.DataFrame({ 'input': amazon['Close'], 'rolling_mean': rolling.mean(), 'rolling_std': rolling.std() }) fig, ax = plt.subplots(2, sharex=True) amazon['Close'].plot(ax=ax[0]) amazon['Close'].rolling(120).mean().plot(ax=ax[0], logy=True) amazon['Close'].rolling(120).std().plot(ax=ax[1], logy=True)
resampled_uber = raw_uber.resample('BM').mean() #%% Plot resampled and raw close data on one plot raw_uber['Close'].plot() resampled_uber['Close'].plot(style='--', color='green') #%% Plot raw close values with shift on one plot fig, ax = plt.subplots(3, sharex=True) raw_uber['Close'].plot(ax=ax[0]) raw_uber['Close'].shift(100).plot(ax=ax[1]) raw_uber['Close'].shift(-100).plot(ax=ax[2]) ax[0].legend(['Input']) ax[1].legend(['Shift by 100 days']) ax[2].legend(['Shift by -100 days']) #%% Calculate ROI index ROI = 100 * (raw_uber.shift(15) / raw_uber - 1) #%% Plot ROI index ROI['Close'].plot() #%% Plot monthy mean, close values and dayly std on one graph fig, ax = plt.subplots(2, sharex=True) raw_uber['Close'].plot(ax=ax[0]) raw_uber['Close'].rolling(window=30).mean().plot(ax=ax[0]) raw_uber['Close'].pct_change().rolling(16).std().plot(ax=ax[1]) ax[0].legend(['price', 'rolling mean']) ax[1].legend(['rolling_std'])