def featBeta( dData, lLookback=14, sMarket='$SPX', b_human=False ): ''' @summary: Calculate beta relative to a given stock/index. @param dData: Dictionary of data to use @param sStock: Stock to calculate beta relative to @param b_human: if true return dataframe to plot @return: DataFrame array containing feature values ''' dfPrice = dData['close'] #''' Calculate returns ''' dfRets = dfPrice.copy() tsu.returnize1(dfRets.values) tsMarket = dfRets[sMarket] dfRet = pand.rolling_cov(tsMarket, dfRets, lLookback) dfRet /= dfRet[sMarket] if b_human: for sym in dData['close']: x=1000/dData['close'][sym][0] dData['close'][sym]=dData['close'][sym]*x return dData['close'] return dfRet
def featBeta(dData, lLookback=14, sMarket='$SPX', b_human=False): ''' @summary: Calculate beta relative to a given stock/index. @param dData: Dictionary of data to use @param sStock: Stock to calculate beta relative to @param b_human: if true return dataframe to plot @return: DataFrame array containing feature values ''' dfPrice = dData['close'] #''' Calculate returns ''' dfRets = dfPrice.copy() tsu.returnize1(dfRets.values) tsMarket = dfRets[sMarket] dfRet = pand.rolling_cov(tsMarket, dfRets, lLookback) dfRet /= dfRet[sMarket] if b_human: for sym in dData['close']: x = 1000 / dData['close'][sym][0] dData['close'][sym] = dData['close'][sym] * x return dData['close'] return dfRet
def featCorrelation(dData, lLookback=20, sRel='$SPX', b_human=False): ''' @summary: Calculate correlation of two stocks. @param dData: Dictionary of data to use @param lLookback: Number of days to calculate moving average over @param b_human: if true return dataframe to plot @return: DataFrame array containing feature values ''' dfPrice = dData['close'] if sRel not in dfPrice.columns: raise KeyError("%s not found in data provided to featCorrelation" % sRel) #''' Calculate returns ''' naRets = dfPrice.values.copy() tsu.returnize1(naRets) dfHistReturns = pand.DataFrame(index=dfPrice.index, columns=dfPrice.columns, data=naRets) #''' Feature DataFrame will be 1:1, we can use the price as a template ''' dfRet = pand.DataFrame(index=dfPrice.index, columns=dfPrice.columns, data=np.zeros(dfPrice.shape)) #''' Loop through stocks ''' for sStock in dfHistReturns.columns: tsHistReturns = dfHistReturns[sStock] tsRelativeReturns = dfHistReturns[sRel] tsRet = dfRet[sStock] #''' Loop over time ''' for i in range(len(tsHistReturns.index)): #''' NaN if not enough data to do lookback ''' if i < lLookback - 1: tsRet[i] = float('nan') continue naCorr = np.corrcoef(tsHistReturns[i - (lLookback - 1):i + 1], tsRelativeReturns[i - (lLookback - 1):i + 1]) tsRet[i] = naCorr[0, 1] if b_human: for sym in dData['close']: x = 1000 / dData['close'][sym][0] dData['close'][sym] = dData['close'][sym] * x return dData['close'] return dfRet
def featCorrelation( dData, lLookback=20, sRel='$SPX', b_human=False ): ''' @summary: Calculate correlation of two stocks. @param dData: Dictionary of data to use @param lLookback: Number of days to calculate moving average over @param b_human: if true return dataframe to plot @return: DataFrame array containing feature values ''' dfPrice = dData['close'] if sRel not in dfPrice.columns: raise KeyError( "%s not found in data provided to featCorrelation"%sRel ) #''' Calculate returns ''' naRets = dfPrice.values.copy() tsu.returnize1(naRets) dfHistReturns = pand.DataFrame( index=dfPrice.index, columns=dfPrice.columns, data=naRets ) #''' Feature DataFrame will be 1:1, we can use the price as a template ''' dfRet = pand.DataFrame( index=dfPrice.index, columns=dfPrice.columns, data=np.zeros(dfPrice.shape) ) #''' Loop through stocks ''' for sStock in dfHistReturns.columns: tsHistReturns = dfHistReturns[sStock] tsRelativeReturns = dfHistReturns[sRel] tsRet = dfRet[sStock] #''' Loop over time ''' for i in range(len(tsHistReturns.index)): #''' NaN if not enough data to do lookback ''' if i < lLookback - 1: tsRet[i] = float('nan') continue naCorr = np.corrcoef( tsHistReturns[ i-(lLookback-1):i+1 ], tsRelativeReturns[ i-(lLookback-1):i+1 ] ) tsRet[i] = naCorr[0,1] if b_human: for sym in dData['close']: x=1000/dData['close'][sym][0] dData['close'][sym]=dData['close'][sym]*x return dData['close'] return dfRet
def testSTD(self): npData = np.random.random(10000) dData = {} dData['close'] = pand.DataFrame(npData) feat = price.featSTDReturn(dData, lLookback = 10, bRel = False).values.ravel() * np.sqrt( 0.9 ) #correction for Degrees of Freedom import QSTK.qstkutil.tsutil as tsu npData2 = np.copy(npData) npData2 = tsu.returnize1(npData2).ravel() tastd = ta.STDDEV(real = npData2, timeperiod = 10) np.testing.assert_array_almost_equal(feat, tastd, err_msg = "values not equal", verbose = True)
def simulate(startdate, enddate, symbols, allocations): # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys)[0] returns = tsu.returnize1(data) previous_day = None for row in data.iterrows(): day = row[0] series = row[1] if previous_day is None: for symbol in symbols: index = symbols.index(symbol) allocation = allocations[index] series[symbol] = allocation else: daily_returns = returns.loc[day] for (symbol, price) in series.iteritems(): series[symbol] = previous_day[symbol] * daily_returns[symbol] previous_day = series daily_sum = data.apply(lambda row : row[0] + row[1] + row[2] + row[3], 1) tsu.returnize0(daily_sum) stats = daily_sum.describe() std_dr = stats['std'] mean_dr = stats['mean'] sharpe = tsu.get_sharpe_ratio(daily_sum.values) cumulative_return = 1 for value in daily_sum: value += 1 cumulative_return = cumulative_return * value return std_dr, mean_dr, sharpe, cumulative_return
def featSTD( dData, lLookback=20, bRel=True, b_human=False ): ''' @summary: Calculate standard deviation @param dData: Dictionary of data to use @param lLookback: Number of days to look in the past @param b_human: if true return dataframe to plot @return: DataFrame array containing values ''' dfPrice = dData['close'].copy() tsu.returnize1(dfPrice.values) dfRet = pand.rolling_std(dfPrice, lLookback) if bRel: dfRet = dfRet / dfPrice if b_human: for sym in dData['close']: x=1000/dData['close'][sym][0] dData['close'][sym]=dData['close'][sym]*x return dData['close'] return dfRet
def featSTD(dData, lLookback=20, bRel=True, b_human=False): ''' @summary: Calculate standard deviation @param dData: Dictionary of data to use @param lLookback: Number of days to look in the past @param b_human: if true return dataframe to plot @return: DataFrame array containing values ''' dfPrice = dData['close'].copy() tsu.returnize1(dfPrice.values) dfRet = pand.rolling_std(dfPrice, lLookback) if bRel: dfRet = dfRet / dfPrice if b_human: for sym in dData['close']: x = 1000 / dData['close'][sym][0] dData['close'][sym] = dData['close'][sym] * x return dData['close'] return dfRet
def main(): (symbols, signals, dataAll, index2) = marketSimulator.main() curr_ownership = {} # KEY: TICKER, VALUE: NUMBER OF SHARES SOLD curr_cash = 10000 fundValue = pd.Series(index = index2) for timestamp in index2: for (symbol, rating) in signals[timestamp]: owned = curr_ownership.get(symbol, 0) if rating == 0: if owned == 0: pass elif owned > 0:# if we own that stock, sell all of it curr_price = dataAll[symbol].Close[timestamp] curr_cash += curr_price * owned curr_ownership[symbol] = 0 elif owned < 0:# if we owe that stock to someone else curr_price = dataAll[symbol].Close[timestamp] curr_cash -= curr_price * owned curr_ownership[symbol] = 0 elif (rating > 0) or (rating > 0):# buy signal amt = rating * 100 # print "BOUGHT ", amt ,"", symbol, "SHARES ON ", timestamp curr_price = dataAll[symbol].Close[timestamp] curr_ownership[symbol] = owned + amt curr_cash -= amt * curr_price fundValue[timestamp] = computeStockValue(curr_ownership, dataAll, timestamp, sorted(list(index2))) + curr_cash print fundValue plt.clf() plt.plot(fundValue) plt.savefig('fund.pdf', format='pdf') plt.clf() tsu.returnize1(fundValue.values) plt.plot(fundValue.index, fundValue.values) plt.xlabel("date") plt.ylabel("returns",) plt.savefig("returns.pdf", format = 'pdf') return fundValue
def stratMark( dtStart, dtEnd, dFuncArgs ): """ @summary Markovitz strategy, generates a curve and then chooses a point on it. @param dtStart: Start date for portfolio @param dtEnd: End date for portfolio @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ if not dFuncArgs.has_key('dmPrice'): print 'Error:', stratMark.__name__, 'requires dmPrice information' return if not dFuncArgs.has_key('sPeriod'): print 'Error:', stratMark.__name__, 'requires rebalancing period' return if not dFuncArgs.has_key('lLookback'): print 'Error:', stratMark.__name__, 'requires lookback' return if not dFuncArgs.has_key('sMarkPoint'): print 'Error:', stratMark.__name__, 'requires markowitz point to choose' return ''' Optional variables ''' if not dFuncArgs.has_key('bAddAlpha'): bAddAlpha = False else: bAddAlpha = dFuncArgs['bAddAlpha'] dmPrice = dFuncArgs['dmPrice'] sPeriod = dFuncArgs['sPeriod'] lLookback = dFuncArgs['lLookback'] sMarkPoint = dFuncArgs['sMarkPoint'] ''' Select rebalancing dates ''' drNewRange = pand.DateRange(dtStart, dtEnd, timeRule=sPeriod) + pand.DateOffset(hours=16) dfAlloc = pand.DataMatrix() ''' Go through each rebalance date and calculate an efficient frontier for each ''' for i, dtDate in enumerate(drNewRange): dtStart = dtDate - pand.DateOffset(days=lLookback) if( dtStart < dmPrice.index[0] ): print 'Error, not enough data to rebalance' continue naRets = dmPrice.ix[ dtStart:dtDate ].values.copy() tsu.returnize1(naRets) tsu.fillforward(naRets) tsu.fillbackward(naRets) ''' Add alpha to returns ''' if bAddAlpha: if i < len(drNewRange) - 1: naFutureRets = dmPrice.ix[ dtDate:drNewRange[i+1] ].values.copy() tsu.returnize1(naFutureRets) tsu.fillforward(naFutureRets) tsu.fillbackward(naFutureRets) naAvg = np.mean( naFutureRets, axis=0 ) ''' make a mix of past/future rets ''' for i in range( naRets.shape[0] ): naRets[i,:] = (naRets[i,:] + (naAvg*0.05)) / 1.05 ''' Generate the efficient frontier ''' (lfReturn, lfStd, lnaPortfolios) = getFrontier( naRets, fUpper=0.2, fLower=0.01 ) lInd = 0 ''' plt.clf() plt.plot( lfStd, lfReturn)''' if( sMarkPoint == 'Sharpe'): ''' Find portfolio with max sharpe ''' fMax = -1E300 for i in range( len(lfReturn) ): fShrp = (lfReturn[i]-1) / (lfStd[i]) if fShrp > fMax: fMax = fShrp lInd = i ''' plt.plot( [lfStd[lInd]], [lfReturn[lInd]], 'ro') plt.draw() time.sleep(2) plt.show()''' elif( sMarkPoint == 'MinVar'): ''' use portfolio with minimum variance ''' fMin = 1E300 for i in range( len(lfReturn) ): if lfStd[i] < fMin: fMin = lfStd[i] lInd = i elif( sMarkPoint == 'MaxRet'): ''' use Portfolio with max returns (not really markovitz) ''' lInd = len(lfReturn)-1 elif( sMarkPoint == 'MinRet'): ''' use Portfolio with min returns (not really markovitz) ''' lInd = 0 else: print 'Warning: invalid sMarkPoint''' return ''' Generate allocation based on selected portfolio ''' naAlloc = (np.array( lnaPortfolios[lInd] ).reshape(1,-1) ) dmNew = pand.DataMatrix( index=[dtDate], data=naAlloc, columns=(dmPrice.columns) ) dfAlloc = dfAlloc.append( dmNew ) dfAlloc['_CASH'] = 0.0 return dfAlloc
dtStart = dtEnd - dt.timedelta(days=365) dtTest = dtEnd + dt.timedelta(days=365) timeofday = dt.timedelta(hours=16) ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, timeofday) ldtTimestampTest = du.getNYSEdays(dtEnd, dtTest, timeofday) dmClose = norgateObj.get_data(ldtTimestamps, lsSymbols, "close") dmTest = norgateObj.get_data(ldtTimestampTest, lsSymbols, "close") naData = dmClose.values.copy() naDataTest = dmTest.values.copy() tsu.fillforward(naData) tsu.fillbackward(naData) tsu.returnize1(naData) tsu.fillforward(naDataTest) tsu.fillbackward(naDataTest) tsu.returnize1(naDataTest) lPeriod = 21 ''' Get efficient frontiers ''' (lfReturn, lfStd, lnaPortfolios, naAvgRets, naStd) = getFrontier(naData, lPeriod) (lfReturnTest, lfStdTest, unused, unused, unused) = getFrontier(naDataTest, lPeriod) plt.clf() fig = plt.figure() ''' Plot efficient frontiers '''
def stratMark(dtStart, dtEnd, dFuncArgs): """ @summary Markovitz strategy, generates a curve and then chooses a point on it. @param dtStart: Start date for portfolio @param dtEnd: End date for portfolio @param dFuncArgs: Dict of function args passed to the function @return DataFrame corresponding to the portfolio allocations """ if not dFuncArgs.has_key('dmPrice'): print 'Error:', stratMark.__name__, 'requires dmPrice information' return if not dFuncArgs.has_key('sPeriod'): print 'Error:', stratMark.__name__, 'requires rebalancing period' return if not dFuncArgs.has_key('lLookback'): print 'Error:', stratMark.__name__, 'requires lookback' return if not dFuncArgs.has_key('sMarkPoint'): print 'Error:', stratMark.__name__, 'requires markowitz point to choose' return ''' Optional variables ''' if not dFuncArgs.has_key('bAddAlpha'): bAddAlpha = False else: bAddAlpha = dFuncArgs['bAddAlpha'] dmPrice = dFuncArgs['dmPrice'] sPeriod = dFuncArgs['sPeriod'] lLookback = dFuncArgs['lLookback'] sMarkPoint = dFuncArgs['sMarkPoint'] ''' Select rebalancing dates ''' drNewRange = pand.DateRange(dtStart, dtEnd, timeRule=sPeriod) + pand.DateOffset(hours=16) dfAlloc = pand.DataMatrix() ''' Go through each rebalance date and calculate an efficient frontier for each ''' for i, dtDate in enumerate(drNewRange): dtStart = dtDate - pand.DateOffset(days=lLookback) if (dtStart < dmPrice.index[0]): print 'Error, not enough data to rebalance' continue naRets = dmPrice.ix[dtStart:dtDate].values.copy() tsu.returnize1(naRets) tsu.fillforward(naRets) tsu.fillbackward(naRets) ''' Add alpha to returns ''' if bAddAlpha: if i < len(drNewRange) - 1: naFutureRets = dmPrice.ix[dtDate:drNewRange[i + 1]].values.copy() tsu.returnize1(naFutureRets) tsu.fillforward(naFutureRets) tsu.fillbackward(naFutureRets) naAvg = np.mean(naFutureRets, axis=0) ''' make a mix of past/future rets ''' for i in range(naRets.shape[0]): naRets[i, :] = (naRets[i, :] + (naAvg * 0.05)) / 1.05 ''' Generate the efficient frontier ''' (lfReturn, lfStd, lnaPortfolios) = getFrontier(naRets, fUpper=0.2, fLower=0.01) lInd = 0 ''' plt.clf() plt.plot( lfStd, lfReturn)''' if (sMarkPoint == 'Sharpe'): ''' Find portfolio with max sharpe ''' fMax = -1E300 for i in range(len(lfReturn)): fShrp = (lfReturn[i] - 1) / (lfStd[i]) if fShrp > fMax: fMax = fShrp lInd = i ''' plt.plot( [lfStd[lInd]], [lfReturn[lInd]], 'ro') plt.draw() time.sleep(2) plt.show()''' elif (sMarkPoint == 'MinVar'): ''' use portfolio with minimum variance ''' fMin = 1E300 for i in range(len(lfReturn)): if lfStd[i] < fMin: fMin = lfStd[i] lInd = i elif (sMarkPoint == 'MaxRet'): ''' use Portfolio with max returns (not really markovitz) ''' lInd = len(lfReturn) - 1 elif (sMarkPoint == 'MinRet'): ''' use Portfolio with min returns (not really markovitz) ''' lInd = 0 else: print 'Warning: invalid sMarkPoint' '' return ''' Generate allocation based on selected portfolio ''' naAlloc = (np.array(lnaPortfolios[lInd]).reshape(1, -1)) dmNew = pand.DataMatrix(index=[dtDate], data=naAlloc, columns=(dmPrice.columns)) dfAlloc = dfAlloc.append(dmNew) dfAlloc['_CASH'] = 0.0 return dfAlloc
dtStart = dtEnd - dt.timedelta(days=365) dtTest = dtEnd + dt.timedelta(days=365) timeofday=dt.timedelta(hours=16) ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, timeofday ) ldtTimestampTest = du.getNYSEdays( dtEnd, dtTest, timeofday ) dmClose = norgateObj.get_data(ldtTimestamps, lsSymbols, "close") dmTest = norgateObj.get_data(ldtTimestampTest, lsSymbols, "close") naData = dmClose.values.copy() naDataTest = dmTest.values.copy() tsu.fillforward(naData) tsu.fillbackward(naData) tsu.returnize1(naData) tsu.fillforward(naDataTest) tsu.fillbackward(naDataTest) tsu.returnize1(naDataTest) lPeriod = 21 ''' Get efficient frontiers ''' (lfReturn, lfStd, lnaPortfolios, naAvgRets, naStd) = getFrontier( naData, lPeriod ) (lfReturnTest, lfStdTest, unused, unused, unused) = getFrontier( naDataTest, lPeriod ) plt.clf() fig = plt.figure() ''' Plot efficient frontiers '''
from QSTK.qstkutil.DataAccess import DataAccess from QSTK.qstkutil.qsdateutil import getNYSEdays from QSTK.qstkutil.tsutil import returnize1 startDate = datetime(2010, 8, 30) endDate = datetime(2012, 8, 30) symbols = ['AAPL', 'GLD', 'MCD', '$SPX'] forecastLength = 21 numPaths = 1000 origPrices = getPrices(startDate, endDate, symbols, 'close') # get log returns origLogRets = deepcopy(origPrices) for col in origLogRets.columns: returnize1(origLogRets[col]) origLogRets = np.log(origLogRets) print 'first 10 rows of origPrices:\n', origPrices[:10] # print origLogRets print '\nCorrelation matrix:\n', origLogRets.corr() print '\nCholesky decomposition:\n', np.linalg.cholesky(origLogRets.corr()) # get correlated future log returns meanVec = origLogRets.mean() covMat = np.cov(origLogRets.T) futureLogRets = multivariate_normal(meanVec, covMat, (forecastLength, numPaths)) # forecastLength x numpaths x numStocks # print futureLogRets
from QSTK.qstkutil.tsutil import returnize1 t0 = time() startDate = datetime(2012, 9, 1) endDate = datetime(2012, 9, 12) symbols = ['AAPL', 'GLD', 'MCD', 'SPY'] forecastLength = 21 numPaths = 1000 origPrices = getPrices(startDate, endDate, symbols, 'close') # get log returns origLogRets = deepcopy(origPrices) for col in origLogRets.columns: returnize1(origLogRets[col]) origLogRets = np.log(origLogRets) print 'first 10 rows of origPrices:\n', origPrices[:10] # print origLogRets print '\nCorrelation matrix:\n', origLogRets.corr() print '\nCholesky decomposition:\n', np.linalg.cholesky(origLogRets.corr()) # get correlated future log returns meanVec = origLogRets.mean() covMat = np.cov(origLogRets.T) futureLogRets = multivariate_normal( meanVec, covMat, (forecastLength, numPaths)) # forecastLength x numpaths x numStocks