def ks_statistic_calc(fund_ts_past, fund_ts_month): seq1 = deepcopy(fund_ts_past.values) seq2 = deepcopy(fund_ts_month.values) tsu.returnize0(seq1) tsu.returnize0(seq2) (ks, p) = scst.ks_2samp(seq1, seq2) return ks, p
def dataFeatures(dData, sym, count = 100): source = dData['close'][sym].values + 0 tsu.returnize0(source) retval = np.zeros((len(dData['close'][sym].index), count)) for i in range(100,len(dData['close'][sym].index)): retval[i,:] = dData['close'][sym][(i - 100):i] return retval
def getMarketRel(dData, sRel='$SPX'): ''' @summary: Calculates market relative data. @param dData - Dictionary containing data to be used, requires specific naming: open/high/low/close/volume @param sRel - Stock ticker to make the data relative to, $SPX is default. @return: Dictionary of market relative values ''' if sRel not in dData['close'].columns: raise KeyError('Market relative stock %s not found in getMR()' % sRel) dRet = {} ''' Make all data market relative, except for volume ''' for sKey in dData.keys(): ''' Don't calculate market relative volume, but still copy it over ''' if sKey == 'volume': dRet['volume'] = dData['volume'] continue dfMarkRel = dData[sKey].copy() #Get returns tsu.returnize0(dfMarkRel.values) # Subtract market returns and make them 1-based, stocks start at 100 dfMarkRel = (dfMarkRel - dfMarkRel[sRel]) + 1. dfMarkRel.ix[0, :] = 100. dfMarkRel = dfMarkRel.cumprod(axis=0) ''' Do not change market stock ''' dfMarkRel[sRel] = dData[sKey][sRel] ''' Add dataFrame to dictionary to return, move to next key ''' dRet[sKey] = dfMarkRel return dRet
def simulate(symbols, allocations, startday, endday): """ @symbols: list of symbols @allocations: list of weights @startday: ... @endday: ... """ timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') close = dataobj.get_data(timestamps, symbols, "close", verbose=False) close = close.values norm_close = close / close[0, :] allocations = allocations / np.sum(allocations) portfolio_value = np.dot(norm_close, allocations) portfolio_return = portfolio_value.copy() tsu.returnize0(portfolio_return) sharpe = tsu.get_sharpe_ratio(portfolio_return) accum = portfolio_value[-1] / portfolio_value[0] average = np.mean(portfolio_return) stddev = np.std(portfolio_return) result = {"sharpe":sharpe, "cumulative_return":accum, "average":average, "stddev":stddev} return result
def ks_statistic_calc(fund_ts_past, fund_ts_month): seq1 = deepcopy(fund_ts_past.values) seq2 = deepcopy(fund_ts_month.values) tsu.returnize0(seq1) tsu.returnize0(seq2) (ks, p) = scipy.stats.ks_2samp(seq1, seq2) return ks, p
def getMarketRel( dData, sRel='$SPX' ): ''' @summary: Calculates market relative data. @param dData - Dictionary containing data to be used, requires specific naming: open/high/low/close/volume @param sRel - Stock ticker to make the data relative to, $SPX is default. @return: Dictionary of market relative values ''' if sRel not in dData['close'].columns: raise KeyError( 'Market relative stock %s not found in getMR()'%sRel ) dRet = {} ''' Make all data market relative, except for volume ''' for sKey in dData.keys(): ''' Don't calculate market relative volume, but still copy it over ''' if sKey == 'volume': dRet['volume'] = dData['volume'] continue dfAbsolute = dData[sKey] dfRelative = pand.DataFrame( index=dfAbsolute.index, columns=dfAbsolute.columns, data=np.zeros(dfAbsolute.shape) ) ''' Get returns and strip off the market returns ''' naRets = dfAbsolute.values.copy() tsu.returnize0( naRets ) naMarkRets = naRets[:, list(dfAbsolute.columns).index(sRel) ] for i, sStock in enumerate(dfAbsolute.columns): ''' Don't change the 'market' stock ''' if sStock == sRel: dfRelative.values[:,i] = dfAbsolute.values[:,i] continue naMarkRel = (naRets[:,i] - naMarkRets) + 1.0 ''' Find the first non-nan value and start the price at 100 ''' for j in range(0, dfAbsolute.values.shape[0]): if pand.isnull( dfAbsolute.values[j][i] ): dfRelative.values[j][i] = float('nan') continue dfRelative.values[j][i] = 100 break ''' Now fill prices out using market relative returns ''' for j in range(j+1, dfAbsolute.values.shape[0]): dfRelative.values[j][i] = dfRelative.values[j-1][i] * naMarkRel[j] ''' Add dataFrame to dictionary to return, move to next key ''' dRet[sKey] = dfRelative return dRet
def getMarketRel(dData, sRel='$SPX'): ''' @summary: Calculates market relative data. @param dData - Dictionary containing data to be used, requires specific naming: open/high/low/close/volume @param sRel - Stock ticker to make the data relative to, $SPX is default. @return: Dictionary of market relative values ''' if sRel not in dData['close'].columns: raise KeyError('Market relative stock %s not found in getMR()' % sRel) dRet = {} ''' Make all data market relative, except for volume ''' for sKey in dData.keys(): ''' Don't calculate market relative volume, but still copy it over ''' if sKey == 'volume': dRet['volume'] = dData['volume'] continue dfAbsolute = dData[sKey] dfRelative = pand.DataFrame(index=dfAbsolute.index, columns=dfAbsolute.columns, data=np.zeros(dfAbsolute.shape)) ''' Get returns and strip off the market returns ''' naRets = dfAbsolute.values.copy() tsu.returnize0(naRets) naMarkRets = naRets[:, list(dfAbsolute.columns).index(sRel)] for i, sStock in enumerate(dfAbsolute.columns): ''' Don't change the 'market' stock ''' if sStock == sRel: dfRelative.values[:, i] = dfAbsolute.values[:, i] continue naMarkRel = (naRets[:, i] - naMarkRets) + 1.0 ''' Find the first non-nan value and start the price at 100 ''' for j in range(0, dfAbsolute.values.shape[0]): if pand.isnull(dfAbsolute.values[j][i]): dfRelative.values[j][i] = float('nan') continue dfRelative.values[j][i] = 100 break ''' Now fill prices out using market relative returns ''' for j in range(j + 1, dfAbsolute.values.shape[0]): dfRelative.values[j][i] = dfRelative.values[ j - 1][i] * naMarkRel[j] ''' Add dataFrame to dictionary to return, move to next key ''' dRet[sKey] = dfRelative return dRet
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNSEdays(startday,endday,timeofday) endOfQuarter=getQuarterEndDates(timestamps) dataobj = da.DataAccess('NSEData') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market tsu.returnize0(close.values) # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(mktneutDM) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Analysis Quarter End prices of stocks for symbol in symbols: for i in endOfQuarter: np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event return np_eventmat
def findEvents(data, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNSEdays(startday,endday,timeofday) dataobj = da.DataAccess('NSEData') if verbose: print __name__ + " reading data" # Reading the Data symbols=getSymbols(data, marketSymbol) eventDetails=getDates(data) close = dataobj.get_data(timestamps, symbols, closefield) #Completing the Data - Removing the NaN values from the Matrix close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market tsu.returnize0(close.values) # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(mktneutDM) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Analysing Stock Prices before and after the occurence of an event. # Stocks are analysed on specific dates as per data provided in csv files. for stock in eventDetails: stockName=stock[0] for i in range(1,len(stock)): np_eventmat[stockName][stock[i]] = 1.0 #overwriting by the bit, marking the event return np_eventmat
def getMarketRel( dData, sRel='$SPX' ): ''' @summary: Calculates market relative data. @param dData - Dictionary containing data to be used, requires specific naming: open/high/low/close/volume @param sRel - Stock ticker to make the data relative to, $SPX is default. @return: Dictionary of market relative values ''' # the close dataframe is assumed to be in the dictionary data # otherwise the function will NOT WORK! if sRel not in dData['close'].columns: raise KeyError( 'Market relative stock %s not found in getMR()'%sRel ) dRet = {} dfClose = dData['close'].copy() dfCloseMark = dfClose.copy() tsu.returnize0( dfCloseMark.values ) dfCloseMark = (dfCloseMark - dfCloseMark[sRel]) + 1. dfCloseMark.ix[0, :] = 100. dfCloseMark = dfCloseMark.cumprod(axis=0) #print dfCloseMark #Make all data market relative, except for volume for sKey in dData.keys(): # Don't calculate market relative volume, but still copy it over if sKey == 'volume': dRet['volume'] = dData['volume'] continue dfKey = dData[sKey] dfRatio = dfKey/dfClose #Add dataFrame to dictionary to return, move to next key dRet[sKey] = dfCloseMark * dfRatio #Comment the line below to convert the sRel as well, uncomment it #to keep the relative symbol's raw data dRet[sKey][sRel] = dData[sKey][sRel] #print dRet return dRet
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix close = (close.fillna(method='ffill')).fillna(method='backfill') #CLOSE_UNCHANGED!!!! close_unchanged = dataobj.get_data(timestamps, symbols, closefield) close_unchanged = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market tsu.returnize0(close.values) SPYValues=close[marketSymbol] # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(mktneutDM) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Market falls more than 3% plus the stock falls 5% more than the Market # Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event. # And if the market falls 5%, then the stock should fall more than 10% to mark the event. with open (args.orders, "w") as outFile: writer = csv.writer(outFile) for symbol in symbols: for i in range(1,len(mktneutDM[symbol])): if SPYValues[i]<-0.03 and mktneutDM[symbol][i] < -0.05 : ft.featAroon(close_unchanged[symbol][i], bDown=False) writer.writerow([timestamps[i].year,timestamps[i].month,timestamps[i].day, symbol, "Buy", 100]) writer.writerow([timestamps[i+5].year,timestamps[i+5].month,timestamps[i+5].day, symbol, "Sell", 100])
def ks_statistic(fund_ts): fund_ts = deepcopy(fund_ts) if len(fund_ts.values) > 60: seq1 = fund_ts.values[0:-60] seq2 = fund_ts.values[-60:] tsu.returnize0(seq1) tsu.returnize0(seq2) (ks, p) = scst.ks_2samp(seq1, seq2) return ks, p # elif len(fund_ts.values) > 5: # seq1 = fund_ts.values[0:-5] # seq2 = fund_ts.values[-5:] # (ks, p) = scst.ks_2samp(seq1, seq2) # return ks, p ks = -1 p = -1 return ks, p
def ks_statistic(fund_ts): fund_ts = deepcopy(fund_ts) if len(fund_ts.values) > 60: seq1 = fund_ts.values[0:-60] seq2 = fund_ts.values[-60:] tsu.returnize0(seq1) tsu.returnize0(seq2) (ks, p) = scipy.stats.ks_2samp(seq1, seq2) return ks, p # elif len(fund_ts.values) > 5: # seq1 = fund_ts.values[0:-5] # seq2 = fund_ts.values[-5:] # (ks, p) = scipy.stats.ks_2samp(seq1, seq2) # return ks, p ks = -1 p = -1 return ks, p
def getMarketRel(dData, sRel='$SPX'): ''' @summary: Calculates market relative data. @param dData - Dictionary containing data to be used, requires specific naming: open/high/low/close/volume @param sRel - Stock ticker to make the data relative to, $SPX is default. @return: Dictionary of market relative values ''' # the close dataframe is assumed to be in the dictionary data # otherwise the function will NOT WORK! if sRel not in dData['close'].columns: raise KeyError('Market relative stock %s not found in getMR()' % sRel) dRet = {} dfClose = dData['close'].copy() dfCloseMark = dfClose.copy() tsu.returnize0(dfCloseMark.values) dfCloseMark = (dfCloseMark - dfCloseMark[sRel]) + 1. dfCloseMark.ix[0, :] = 100. dfCloseMark = dfCloseMark.cumprod(axis=0) #print dfCloseMark #Make all data market relative, except for volume for sKey in dData.keys(): # Don't calculate market relative volume, but still copy it over if sKey == 'volume': dRet['volume'] = dData['volume'] continue dfKey = dData[sKey] dfRatio = dfKey / dfClose #Add dataFrame to dictionary to return, move to next key dRet[sKey] = dfCloseMark * dfRatio #Comment the line below to convert the sRel as well, uncomment it #to keep the relative symbol's raw data dRet[sKey][sRel] = dData[sKey][sRel] #print dRet return dRet
def findEvents(symbols, startday, endday, marketSymbol, verbose=False): # Reading the Data for the list of Symbols. timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market tsu.returnize0(close.values) SPYValues = close[marketSymbol] # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(mktneutDM) for sym in symbols: for time in timestamps: np_eventmat[sym][time] = np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Market falls more than 3% plus the stock falls 5% more than the Market # Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event. # And if the market falls 5%, then the stock should fall more than 10% to mark the event. for symbol in symbols: for i in range(1, len(mktneutDM[symbol])): if SPYValues[i] < -0.03 and mktneutDM[symbol][ i] < -0.05: # When market fall is more than 3% and also the stock compared to market is also fell by more than 5%. np_eventmat[symbol][ i] = 1.0 #overwriting by the bit, marking the event return np_eventmat
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market tsu.returnize0(close.values) SPYValues=close[marketSymbol] # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(mktneutDM) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Market falls more than 3% plus the stock falls 5% more than the Market # Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event. # And if the market falls 5%, then the stock should fall more than 10% to mark the event. for symbol in symbols: for i in range(1,len(mktneutDM[symbol])): if SPYValues[i]<-0.03 and mktneutDM[symbol][i] < -0.05 : # When market fall is more than 3% and also the stock compared to market is also fell by more than 5%. np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event return np_eventmat
def getMarketRel( dData, sRel='$SPX' ): ''' @summary: Calculates market relative data. @param dData - Dictionary containing data to be used, requires specific naming: open/high/low/close/volume @param sRel - Stock ticker to make the data relative to, $SPX is default. @return: Dictionary of market relative values ''' if sRel not in dData['close'].columns: raise KeyError( 'Market relative stock %s not found in getMR()'%sRel ) dRet = {} ''' Make all data market relative, except for volume ''' for sKey in dData.keys(): ''' Don't calculate market relative volume, but still copy it over ''' if sKey == 'volume': dRet['volume'] = dData['volume'] continue dfMarkRel = dData[sKey].copy() #Get returns tsu.returnize0( dfMarkRel.values ) # Subtract market returns and make them 1-based, stocks start at 100 dfMarkRel = (dfMarkRel - dfMarkRel[sRel]) + 1. dfMarkRel.ix[0, :] = 100. dfMarkRel = dfMarkRel.cumprod(axis=0) ''' Do not change market stock ''' dfMarkRel[sRel] = dData[sKey][sRel] ''' Add dataFrame to dictionary to return, move to next key ''' dRet[sKey] = dfMarkRel return dRet
def featMomentum(dData, lLookback=20, b_human=False): ''' @summary: N day cumulative return (based on 1) indicator @param dData: Dictionary of data to use @param lLookback: Number of days to look in the past @param b_human: if true return dataframe to plot @return: DataFrame array containing values ''' if b_human: for sym in dData['close']: x = 1000 / dData['close'][sym][0] dData['close'][sym] = dData['close'][sym] * x return dData['close'] dfPrice = dData['close'].copy() #Calculate Returns tsu.returnize0(dfPrice.values) #Calculate rolling sum dfRet = pand.rolling_sum(dfPrice, lLookback) return dfRet
def findEvents(self, symbols, columnIndexes, verbose=False): eventDetails=self.eventDetailsData # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNSEdays(self.startday,self.endday,timeofday) stockDates=self.getDates(eventDetails, timestamps, columnIndexes) dataobj = da.DataAccess('NSEData') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, self.closefield) #Completing the Data - Removing the NaN values from the Matrix close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market tsu.returnize0(close.values) NSEValues=close[self.marketSymbol] # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% mktneutDM = close - close[self.marketSymbol] np_eventmat = copy.deepcopy(mktneutDM) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Analysing Stock Prices before and after the occurence of an event. # Stocks are analysed on specific dates as per data provided in csv files accessed from Google Docs. if(self.analyzeLookbackDays): for stock in stockDates: stockName=stock[0] for i in range(1,len(stock)): if(self.analyzeStock(stockName, stock[i], mktneutDM)): np_eventmat[stockName][stock[i]] = 1.0 #overwriting by the bit, marking the event else: if((self.marketChangeMin!='NA' and self.marketChangeMax!='NA') or (self.stockChangeMin!='NA' and self.stockChangeMax!='NA')): np_eventmat=self.getRangeChangeMatrix(stockDates, NSEValues, mktneutDM, np_eventmat) elif(self.marketValueChange!='NA' or self.stockValueChange!='NA'): np_eventmat=self.getValueChangeMatrix(stockDates, NSEValues, mktneutDM, np_eventmat) else: for stock in stockDates: stockName=stock[0] for i in range(1,len(stock)): np_eventmat[stockName][stock[i]] = 1.0 #overwriting by the bit, marking the event return np_eventmat
def main(): ''' Main Function''' # Reading the portfolio na_portfolio = np.loadtxt('tutorial3portfolio.csv', dtype='S5,f4', delimiter=',', comments="#", skiprows=1) print na_portfolio # Sorting the portfolio by symbol name na_portfolio = sorted(na_portfolio, key=lambda x: x[0]) print na_portfolio # Create two list for symbol names and allocation ls_port_syms = [] lf_port_alloc = [] for port in na_portfolio: ls_port_syms.append(port[0]) lf_port_alloc.append(port[1]) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') ls_all_syms = c_dataobj.get_all_symbols() # Bad symbols are symbols present in portfolio but not in all syms ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms)) if len(ls_bad_syms) != 0: print "Portfolio contains bad symbols : ", ls_bad_syms for s_sym in ls_bad_syms: i_index = ls_port_syms.index(s_sym) ls_port_syms.pop(i_index) lf_port_alloc.pop(i_index) # Reading the historical data. dt_end = dt.datetime(2011, 1, 1) dt_start = dt_end - dt.timedelta(days=1095) # Three years # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Copying close price into separate dataframe to find rets df_rets = d_data['close'].copy() # Filling the data. df_rets = df_rets.fillna(method='ffill') df_rets = df_rets.fillna(method='bfill') # Numpy matrix of filled data values na_rets = df_rets.values # returnize0 works on ndarray and not dataframes. tsu.returnize0(na_rets) # Estimate portfolio returns na_portrets = np.sum(na_rets * lf_port_alloc, axis=1) na_port_total = np.cumprod(na_portrets + 1) na_component_total = np.cumprod(na_rets + 1, axis=0) # Plotting the results plt.clf() fig = plt.figure() fig.add_subplot(111) plt.plot(ldt_timestamps, na_component_total, alpha=0.4) plt.plot(ldt_timestamps, na_port_total) ls_names = ls_port_syms ls_names.append('Portfolio') plt.legend(ls_names) plt.ylabel('Cumulative Returns') plt.xlabel('Date') fig.autofmt_xdate(rotation=45) plt.savefig('tutorial3.pdf', format='pdf')
marketdata = pd.DataFrame(marketdata, index=timestamps, columns=[benchmark]) # adding portfolio column marketdata['portfolio'] = values['portfolio'] names = [benchmark, 'portfolio'] pricedata = marketdata.values #extract values #normalizing benchmark value to fund initial value pricedata[:, 0] = pricedata[:, 0]/pricedata[0,0] * pricedata[0,1] plt.clf() #erase open graph plt.plot(timestamps, pricedata) plt.legend(names, loc=3) plt.ylabel('Fund Value') plt.xlabel('Date') plt.savefig('analysis.pdf', format='pdf') # daily returns prices = marketdata['portfolio'].values returns = copy.deepcopy(prices) tsu.returnize0(returns) #returns = prices[1:]/prices[:-1] - 1 print "Sharpe Ratio: " + str(np.sqrt(250)*returns.mean()/returns.std()) print "Total return(%): " + str(100*prices[-1]/prices[0] - 100) print "Std. deviation of daily returns(%): " + str(100*returns.std())
def main(): """Main Function""" # S&P 100 ls_symbols = [ "AAPL", "ABT", "ACN", "AEP", "ALL", "AMGN", "AMZN", "APC", "AXP", "BA", "BAC", "BAX", "BHI", "BK", "BMY", "BRK.B", "CAT", "C", "CL", "CMCSA", "COF", "COP", "COST", "CPB", "CSCO", "CVS", "CVX", "DD", "DELL", "DIS", "DOW", "DVN", "EBAY", "EMC", "EXC", "F", "FCX", "FDX", "GD", "GE", "GILD", "GOOG", "GS", "HAL", "HD", "HNZ", "HON", "HPQ", "IBM", "INTC", "JNJ", "JPM", "KFT", "KO", "LLY", "LMT", "LOW", "MA", "MCD", "MDT", "MET", "MMM", "MO", "MON", "MRK", "MS", "MSFT", "NKE", "NOV", "NSC", "NWSA", "NYX", "ORCL", "OXY", "PEP", "PFE", "PG", "PM", "QCOM", "RF", "RTN", "SBUX", "SLB", "HSH", "SO", "SPG", "T", "TGT", "TWX", "TXN", "UNH", "UPS", "USB", "UTX", "VZ", "WAG", "WFC", "WMB", "WMT", "XOM", ] # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess("Yahoo") ls_all_syms = c_dataobj.get_all_symbols() # Bad symbols are symbols present in portfolio but not in all syms ls_bad_syms = list(set(ls_symbols) - set(ls_all_syms)) for s_sym in ls_bad_syms: i_index = ls_symbols.index(s_sym) ls_symbols.pop(i_index) # Start and End date of the charts dt_end = dt.datetime(2010, 1, 1) dt_start = dt_end - dt.timedelta(days=365) dt_test = dt_end + dt.timedelta(days=365) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) ldt_timestamps_test = du.getNYSEdays(dt_end, dt_test, dt_timeofday) # Reading just the close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") df_close_test = c_dataobj.get_data(ldt_timestamps_test, ls_symbols, "close") # Filling the data for missing NAN values df_close = df_close.fillna(method="ffill") df_close = df_close.fillna(method="bfill") df_close_test = df_close_test.fillna(method="ffill") df_close_test = df_close_test.fillna(method="bfill") # Copying the data values to a numpy array to get returns na_data = df_close.values.copy() na_data_test = df_close_test.values.copy() # Getting the daily returns tsu.returnize0(na_data) tsu.returnize0(na_data_test) # Calculating the frontier. (lf_returns, lf_std, lna_portfolios, na_avgrets, na_std) = getFrontier(na_data) (lf_returns_test, lf_std_test, unused, unused, unused) = getFrontier(na_data_test) # Plotting the efficient frontier plt.clf() plt.plot(lf_std, lf_returns, "b") plt.plot(lf_std_test, lf_returns_test, "r") # Plot where the efficient frontier would be the following year lf_ret_port_test = [] lf_std_port_test = [] for na_portfolio in lna_portfolios: na_port_rets = np.dot(na_data_test, na_portfolio) lf_std_port_test.append(np.std(na_port_rets)) lf_ret_port_test.append(np.average(na_port_rets)) plt.plot(lf_std_port_test, lf_ret_port_test, "k") # Plot indivisual stock risk/return as green + for i, f_ret in enumerate(na_avgrets): plt.plot(na_std[i], f_ret, "g+") # # Plot some arrows showing transistion of efficient frontier # for i in range(0, 101, 10): # plt.arrow(lf_std[i], lf_returns[i], lf_std_port_test[i] - lf_std[i], # lf_ret_port_test[i] - lf_returns[i], color='k') # Labels and Axis plt.legend(["2009 Frontier", "2010 Frontier", "Performance of '09 Frontier in 2010"], loc="lower right") plt.title("Efficient Frontier For S&P 100 ") plt.ylabel("Expected Return") plt.xlabel("StDev") plt.savefig("tutorial8.pdf", format="pdf")
def study(self,filename,method="mean", \ plotMarketNeutral = True, \ plotErrorBars = False, \ plotEvents = False, \ marketSymbol='SPY'): """ Creates an event study plot the marketSymbol must exist in the data if plotMarketNeutral is True This method plots the average of market neutral cumulative returns, along with error bars The X-axis is the relative time frame from -self.lookback_days to self.lookforward_days Size of error bar on each side of the mean value on the i relative day = abs(mean @ i - standard dev @ i) parameters : filename. Example filename="MyStudy.pdf" """ #plt.clf() #plt.plot(self.close.values) #plt.legend(self.close.columns) #plt.ylim(0,2) #plt.draw() #savefig('test1.pdf',format='pdf') # compute 0 centered daily returns self.dailyret = self.close.copy() tsu.returnize0(self.dailyret.values) # make it market neutral if plotMarketNeutral: # assuming beta = 1 for all stocks --this is unrealistic.but easily fixable. self.mktneutDM = self.dailyret - self.dailyret[marketSymbol] # remove the market column from consideration del(self.mktneutDM[marketSymbol]) del(self.eventMatrix[marketSymbol]) else: self.mktneutDM = self.dailyret # Wipe out events which are on the boundary. self.eventMatrix.values[0:self.lookback_days,:] = NaN self.eventMatrix.values[-self.lookforward_days:,:] = NaN # prepare to build impact matrix rets = self.mktneutDM.values events = self.eventMatrix.values numevents = nansum(events) numcols = events.shape[1] # create a blank impact matrix impact = np.zeros((self.total_days,numevents)) currcol = 0 # step through each column in event matrix for col in range(0,events.shape[1]): if (self.verbose and col%20==0): print __name__ + " study: " + str(col) + " of " + str(numcols) # search each column for events for row in range(0,events.shape[0]): # when we find an event if events[row,col]==1.0: # copy the daily returns in to the impact matrix impact[:,currcol] = \ rets[row-self.lookback_days:\ row+self.lookforward_days+1,\ col] currcol = currcol+1 # now compute cumulative daily returns impact = cumprod(impact+1,axis=0) impact = impact / impact[0,:] # normalize everything to the time of the event impact = impact / impact[self.lookback_days,:] # prepare data for plot studystat = mean(impact,axis=1) studystd = std(impact,axis=1) studyrange = range(-self.lookback_days,self.lookforward_days+1) # plot baby plt.clf() if (plotEvents): # draw a line for each event plt.plot(studyrange,\ impact,alpha=0.1,color='#FF0000') # draw a horizontal line at Y = 1.0 plt.axhline(y=1.0,xmin=-self.lookback_days,xmax=self.lookforward_days+1,\ color='#000000') if plotErrorBars==True: # draw errorbars if user wants them plt.errorbar(studyrange[self.lookback_days:],\ studystat[self.lookback_days:],\ yerr=studystd[self.lookback_days:],\ ecolor='#AAAAFF',\ alpha=0.1) plt.plot(studyrange,studystat,color='#0000FF',linewidth=3,\ label='mean') # set the limits of the axes to appropriate ranges plt.ylim(min(min(studystat),0.5),max(max(studystat),1.2)) plt.xlim(min(studyrange)-1,max(studyrange)+1) # draw titles and axes if plotMarketNeutral: plt.title(('mean of '+ str(int(numevents))+ ' events')) else: plt.title(('market relative mean of '+ \ str(int(numevents))+ ' events')) plt.xlabel('Days') plt.ylabel('Cumulative Abnormal Returns') plt.draw() savefig(filename,format='pdf')
def main(): ''' Main Function''' # Reading the csv file. na_data = np.loadtxt('example-data.csv', delimiter=',', skiprows=1) na_price = na_data[:, 3:] # Default np.loadtxt datatype is float. na_dates = np.int_(na_data[:, 0:3]) # Dates should be int ls_symbols = ['$SPX', 'XOM', 'GOOG', 'GLD'] # Printing the first 5 rows print "First 5 rows of Price Data:" print na_price[:5, :] print print "First 5 rows of Dates:" print na_dates[:5, :] # Creating the timestamps from dates read ldt_timestamps = [] for i in range(0, na_dates.shape[0]): ldt_timestamps.append(dt.date(na_dates[i, 0], na_dates[i, 1], na_dates[i, 2])) # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_price) plt.legend(ls_symbols) plt.ylabel('Adjusted Close') plt.xlabel('Date') plt.savefig('adjustedclose.pdf', format='pdf') # Normalizing the prices to start at 1 and see relative returns na_normalized_price = na_price / na_price[0, :] # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_normalized_price) plt.legend(ls_symbols) plt.ylabel('Normalized Close') plt.xlabel('Date') plt.savefig('normalized.pdf', format='pdf') # Copy the normalized prices to a new ndarry to find returns. na_rets = na_normalized_price.copy() # Calculate the daily returns of the prices. (Inplace calculation) tsu.returnize0(na_rets) # Plotting the plot of daily returns plt.clf() plt.plot(ldt_timestamps[0:50], na_rets[0:50, 0]) # $SPX 50 days plt.plot(ldt_timestamps[0:50], na_rets[0:50, 1]) # XOM 50 days plt.axhline(y=0, color='r') plt.legend(['$SPX', 'XOM']) plt.ylabel('Daily Returns') plt.xlabel('Date') plt.savefig('rets.pdf', format='pdf') # Plotting the scatter plot of daily returns between XOM VS $SPX plt.clf() plt.scatter(na_rets[:, 0], na_rets[:, 1], c='blue') plt.ylabel('XOM') plt.xlabel('$SPX') plt.savefig('scatterSPXvXOM.pdf', format='pdf') # Plotting the scatter plot of daily returns between $SPX VS GLD plt.clf() plt.scatter(na_rets[:, 0], na_rets[:, 3], c='blue') # $SPX v GLD plt.ylabel('GLD') plt.xlabel('$SPX') plt.savefig('scatterSPXvGLD.pdf', format='pdf')
dtStart = dtEnd - dt.timedelta(days=365) dtTest = dtEnd + dt.timedelta(days=365) timeofday = dt.timedelta(hours=16) ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, timeofday) ldtTimestampTest = du.getNYSEdays(dtEnd, dtTest, timeofday) dmClose = norgateObj.get_data(ldtTimestamps, lsSymbols, "close") dmTest = norgateObj.get_data(ldtTimestampTest, lsSymbols, "close") naData = dmClose.values.copy() naDataTest = dmTest.values.copy() tsu.fillforward(naData) tsu.fillbackward(naData) tsu.returnize0(naData) tsu.fillforward(naDataTest) tsu.fillbackward(naDataTest) tsu.returnize0(naDataTest) ''' Get efficient frontiers ''' (lfReturn, lfStd, lnaPortfolios, naAvgRets, naStd) = getFrontier(naData) (lfReturnTest, lfStdTest, unused, unused, unused) = getFrontier(naDataTest) plt.clf() fig = plt.figure() ''' Plot efficient frontiers ''' plt.plot(lfStd, lfReturn, 'b') plt.plot(lfStdTest, lfReturnTest, 'r') ''' Plot where efficient frontier WOULD be the following year ''' lfRetTest = []
# Plot the normalized closing data # plt.clf() normdat = pricedat / pricedat[0, :] plt.plot(newtimestamps, normdat) plt.legend(symbols) plt.ylabel('Normalized Close') plt.xlabel('Date') savefig('normalized.pdf', format='pdf') # # Plot daily returns # plt.clf() plt.cla() tsu.returnize0(normdat) plt.plot(newtimestamps[0:50], normdat[0:50, 3]) # $SPX 50 days plt.plot(newtimestamps[0:50], normdat[0:50, 4]) # XOM 50 days plt.axhline(y=0, color='r') plt.legend(['$SPX', 'XOM']) plt.ylabel('Daily Returns') plt.xlabel('Date') savefig('rets.pdf', format='pdf') # # Scatter plot # plt.clf() plt.cla() plt.scatter(normdat[:, 3], normdat[:, 4], c='blue') # $SPX v XOM plt.ylabel('XOM')
def study(self,filename,method="mean", \ plotMarketNeutral = True, \ plotErrorBars = False, \ plotEvents = False, \ marketSymbol='$SPX'): """ Creates an event study plot the marketSymbol must exist in the data if plotMarketNeutral is True This method plots the average of market neutral cumulative returns, along with error bars The X-axis is the relative time frame from -self.lookback_days to self.lookforward_days Size of error bar on each side of the mean value on the i relative day = abs(mean @ i - standard dev @ i) parameters : filename. Example filename="MyStudy.pdf" """ #plt.clf() #plt.plot(self.close.values) #plt.legend(self.close.columns) #plt.ylim(0,2) #plt.draw() #savefig('test1.pdf',format='pdf') # compute 0 centered daily returns self.dailyret = self.close.copy() tsu.returnize0(self.dailyret.values) # make it market neutral if plotMarketNeutral: # assuming beta = 1 for all stocks --this is unrealistic.but easily fixable. self.mktneutDM = self.dailyret - self.dailyret[marketSymbol] # remove the market column from consideration del(self.mktneutDM[marketSymbol]) del(self.eventMatrix[marketSymbol]) else: self.mktneutDM = self.dailyret # Wipe out events which are on the boundary. self.eventMatrix.values[0:self.lookback_days,:] = NaN self.eventMatrix.values[-self.lookforward_days:,:] = NaN # prepare to build impact matrix rets = self.mktneutDM.values events = self.eventMatrix.values numevents = nansum(events) numcols = events.shape[1] # create a blank impact matrix impact = np.zeros((self.total_days,numevents)) currcol = 0 # step through each column in event matrix for col in range(0,events.shape[1]): if (self.verbose and col%20==0): print __name__ + " study: " + str(col) + " of " + str(numcols) # search each column for events for row in range(0,events.shape[0]): # when we find an event if events[row,col]==1.0: # copy the daily returns in to the impact matrix impact[:,currcol] = \ rets[row-self.lookback_days:\ row+self.lookforward_days+1,\ col] currcol = currcol+1 # now compute cumulative daily returns impact = cumprod(impact+1,axis=0) impact = impact / impact[0,:] # normalize everything to the time of the event impact = impact / impact[self.lookback_days,:] # prepare data for plot studystat = mean(impact,axis=1) studystd = std(impact,axis=1) studyrange = range(-self.lookback_days,self.lookforward_days+1) # plot baby plt.clf() if (plotEvents): # draw a line for each event plt.plot(studyrange,\ impact,alpha=0.1,color='#FF0000') # draw a horizontal line at Y = 1.0 plt.axhline(y=1.0,xmin=-self.lookback_days,xmax=self.lookforward_days+1,\ color='#000000') if plotErrorBars==True: # draw errorbars if user wants them plt.errorbar(studyrange[self.lookback_days:],\ studystat[self.lookback_days:],\ yerr=studystd[self.lookback_days:],\ ecolor='#AAAAFF',\ alpha=0.1) plt.plot(studyrange,studystat,color='#0000FF',linewidth=3,\ label='mean') # set the limits of the axes to appropriate ranges plt.ylim(min(min(studystat),0.5),max(max(studystat),1.2)) plt.xlim(min(studyrange)-1,max(studyrange)+1) # draw titles and axes if plotMarketNeutral: plt.title(('market relative mean of '+ \ str(int(numevents))+ ' events')) else: plt.title(('mean of '+ str(int(numevents))+ ' events')) plt.xlabel('Days') plt.ylabel('Cumulative Abnormal Returns') plt.draw() # original code # savefig(filename,format='pdf') # original code # modified code savefig(filename,format='jpg')
def main(): """ Main Function""" # List of symbols ls_symbols = ["AAPL", "GLD", "GOOG", "$SPX", "XOM"] # Start and End date of the charts dt_start = dt.datetime(2006, 1, 1) dt_end = dt.datetime(2010, 12, 31) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess("Yahoo") # Keys to be read from the data, it is good to read everything in one go. ls_keys = ["open", "high", "low", "close", "volume", "actual_close"] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Getting the numpy ndarray of close prices. na_price = d_data["close"].values # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_price) plt.legend(ls_symbols) plt.ylabel("Adjusted Close") plt.xlabel("Date") plt.savefig("adjustedclose.pdf", format="pdf") # Normalizing the prices to start at 1 and see relative returns na_normalized_price = na_price / na_price[0, :] # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_normalized_price) plt.legend(ls_symbols) plt.ylabel("Normalized Close") plt.xlabel("Date") plt.savefig("normalized.pdf", format="pdf") # Copy the normalized prices to a new ndarry to find returns. na_rets = na_normalized_price.copy() # Calculate the daily returns of the prices. (Inplace calculation) # returnize0 works on ndarray and not dataframes. tsu.returnize0(na_rets) # Plotting the plot of daily returns plt.clf() plt.plot(ldt_timestamps[0:50], na_rets[0:50, 3]) # $SPX 50 days plt.plot(ldt_timestamps[0:50], na_rets[0:50, 4]) # XOM 50 days plt.axhline(y=0, color="r") plt.legend(["$SPX", "XOM"]) plt.ylabel("Daily Returns") plt.xlabel("Date") plt.savefig("rets.pdf", format="pdf") # Plotting the scatter plot of daily returns between XOM VS $SPX plt.clf() plt.scatter(na_rets[:, 3], na_rets[:, 4], c="blue") plt.ylabel("XOM") plt.xlabel("$SPX") plt.savefig("scatterSPXvXOM.pdf", format="pdf") # Plotting the scatter plot of daily returns between $SPX VS GLD plt.clf() plt.scatter(na_rets[:, 3], na_rets[:, 1], c="blue") # $SPX v GLD plt.ylabel("GLD") plt.xlabel("$SPX") plt.savefig("scatterSPXvGLD.pdf", format="pdf")
portsyms.pop(index) portalloc.pop(index) #configure the time and read the data #first, set the time boundaries endday = dt.datetime(2011, 1, 1) startday = endday - dt.timedelta(days=1095) # 3years back timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) close = dataobj.get_data(timestamps, portsyms, "close") # a quick backtest rets = close.values.copy() tsu.fillforward(rets) tsu.returnize0(rets) # what is the returnize0 method? # get the daily returns and total returns # don't understand how to do the back test portrets = sum(rets * portalloc, axis=1) # from the pylab porttot = cumprod(portrets + 1) # method from the pylab, used to cal the cumulated ret componenttot = cumprod(rets + 1, axis=0) # plot the plt.clf() fig = plt.figure() fig.add_subplot(111) plt.plot(timestamps, componenttot) plt.plot(timestamps, porttot) names = portsyms
dtStart = dtEnd - dt.timedelta(days=365) dtTest = dtEnd + dt.timedelta(days=365) timeofday=dt.timedelta(hours=16) ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, timeofday ) ldtTimestampTest = du.getNYSEdays( dtEnd, dtTest, timeofday ) dmClose = norgateObj.get_data(ldtTimestamps, lsSymbols, "close") dmTest = norgateObj.get_data(ldtTimestampTest, lsSymbols, "close") naData = dmClose.values.copy() naDataTest = dmTest.values.copy() tsu.fillforward(naData) tsu.fillbackward(naData) tsu.returnize0(naData) tsu.fillforward(naDataTest) tsu.fillbackward(naDataTest) tsu.returnize0(naDataTest) ''' Get efficient frontiers ''' (lfReturn, lfStd, lnaPortfolios, naAvgRets, naStd) = getFrontier( naData) (lfReturnTest, lfStdTest, unused, unused, unused) = getFrontier( naDataTest) plt.clf() fig = plt.figure() ''' Plot efficient frontiers ''' plt.plot(lfStd,lfReturn, 'b') plt.plot(lfStdTest,lfReturnTest, 'r')
# Plot the normalized closing data # plt.clf() normdat = pricedat/pricedat[0,:] plt.plot(newtimestamps,normdat) plt.legend(symbols) plt.ylabel('Normalized Close') plt.xlabel('Date') savefig('Normalized.jpg',format='jpg') ## ## Plot daily returns ## plt.clf() plt.cla() tsu.returnize0(normdat) plt.plot(newtimestamps[0:50],normdat[0:50,0]) # $NSE 50 days plt.plot(newtimestamps[0:50],normdat[0:50,1]) # TCS 50 days plt.axhline(y=0,color='r') plt.legend(['$NSE','TCS']) plt.ylabel('Daily Returns') plt.xlabel('Date') savefig('Daily_Returns.jpg',format='jpg') ## ## Scatter plot ## plt.clf() plt.cla() plt.scatter(normdat[:,0],normdat[:,1],c='blue') # $NSE v TCS plt.ylabel('TCS')
portsyms.pop(index) portalloc.pop(index) #configure the time and read the data #first, set the time boundaries endday = dt.datetime(2011,1,1) startday = endday - dt.timedelta(days=1095) # 3years back timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) close = dataobj.get_data(timestamps, portsyms, "close") # a quick backtest rets = close.values.copy() tsu.fillforward(rets) tsu.returnize0(rets) # what is the returnize0 method? # get the daily returns and total returns # don't understand how to do the back test portrets = sum(rets*portalloc, axis=1) # from the pylab porttot = cumprod(portrets+1) # method from the pylab, used to cal the cumulated ret componenttot = cumprod(rets+1,axis=0) # plot the plt.clf() fig = plt.figure() fig.add_subplot(111) plt.plot(timestamps,componenttot) plt.plot(timestamps,porttot) names = portsyms names.append('portfolio')