def getTrainAndTestData(startDate=datetime.date(2018, 1, 1), endDate=datetime.date.today(), xSymbols=['SPY'], ySymbol=['AAPL'], trainingSize=0.7): """ For given period and symbols, generate the training and test data set. :param startDate: Start date for the entire data set :param endDate: End date for the entire data set :param xSymbols: Independent variables symbol tickers :param ySymbol: Depdendent variable symbol ticker :param trainingSize: The proportion of data set for training :return: (xTrain, yTrain, xTest, yTest), in 2-d array format """ dataHub = DataHub() historicalDataBySymbol = dataHub.downloadDataFromYahoo( startDate, endDate, xSymbols + ySymbol) df = pd.concat([ historicalDataBySymbol[symbol].loc[:, 'Close'] for symbol in xSymbols + ySymbol ], axis=1, join='inner') splitIdx = int(df.shape[0] * trainingSize) xTrain = df.iloc[:splitIdx, :-1].values yTrain = df.iloc[:splitIdx, -1].values xTest = df.iloc[splitIdx:, :-1].values yTest = df.iloc[splitIdx:, -1].values return xTrain, yTrain, xTest, yTest
def main(): """ Main entry point. """ dataHub = DataHub() startDate = datetime.date(2018,1,1) endDate = datetime.date.today() symbols = ['AAPL', 'SPY'] data = dataHub.downloadDataFromYahoo(startDate, endDate, symbols) ts1 = data.values()[0].loc[:, 'Close'].rename(data.keys()[0]) ts2 = data.values()[1].loc[:, 'Close'].rename(data.keys()[1]) corr = calculateCorrelation(ts1, ts2) logging.info('Correlation = %.2f', corr)
class VolatilityPricer(): """ Realized vol: Same as Black-Scholes, we assume the underlying follows a Geometric Brownian Motion. Then its log return follows a Normal distribution, with mean as 0. We take as input the historical daily underlying prices. Annualization factor is 252. Degree of Freedom is 0 as we are calculating the exact realized vol for the given historical period. Implied vol: Use Black-Scholes to back out the implied volatility from the given market option price. """ def __init__(self): self.historicalDataBySymbol = dict() self.dataHub = DataHub() self.realizedVolBySymbol = dict() def _loadHistoricalUnderlyingData(self, startDate, endDate, symbols): self.historicalDataBySymbol = self.dataHub.downloadDataFromYahoo( startDate, endDate, symbols) def _calculateRealizedVol(self, ts): """ Calculate the realized vol from given time series """ pctChange = ts.pct_change().dropna() logReturns = np.log(1 + pctChange) vol = np.sqrt(np.sum(np.square(logReturns)) / logReturns.size) annualizedVol = vol * np.sqrt(252) return annualizedVol def getRealizedVol(self, startDate=datetime.date.today() - datetime.timedelta(days=30), endDate=datetime.date.today(), symbols=['SPY']): """ Calculate the realized volatility from historical market data """ self._loadHistoricalUnderlyingData(startDate, endDate, symbols) for symbol, df in self.historicalDataBySymbol.iteritems(): # Use daily Close to calculate realized vols realizedVol = self._calculateRealizedVol(df.loc[:, 'Close']) self.realizedVolBySymbol[symbol] = realizedVol return self.realizedVolBySymbol def getImpliedVol(self, optionPrice=17.5, callPut='Call', spot=586.08, strike=585.0, tenor=0.109589, rate=0.0002): """ Calculate the implied volatility from option market price """ return blackScholesSolveImpliedVol(optionPrice, callPut, spot, strike, tenor, rate)
class VolatilityPricer(): def __init__(self): self.historicalDataBySymbol = dict() self.dataHub = DataHub() self.realizedVolBySymbol = dict() def _loadHistoricalUnderlyingData(self, startDate, endDate, symbols): self.historicalDataBySymbol = self.dataHub.downloadDataFromYahoo( startDate, endDate, symbols) def _calculateRealizedVol(self, ts): """ Calculate the realized vol from given time series """ pctChange = ts.pct_change().dropna() logReturns = np.log(1 + pctChange) vol = np.sqrt(np.sum(np.square(logReturns)) / logReturns.size) annualizedVol = vol * np.sqrt(252) return annualizedVol def getRealizedVol(self, startDate=datetime.date.today() - datetime.timedelta(days=30), endDate=datetime.date.today(), symbols=['SPY']): """ Calculate the realized volatility from historical market data """ self._loadHistoricalUnderlyingData(startDate, endDate, symbols) for symbol, df in self.historicalDataBySymbol.iteritems(): # Use daily Close to calculate realized vols realizedVol = self._calculateRealizedVol(df.loc[:, 'Close']) self.realizedVolBySymbol[symbol] = realizedVol return self.realizedVolBySymbol def getImpliedVol(self, optionPrice=17.5, callPut='Call', spot=586.08, strike=585.0, tenor=0.109589, rate=0.0002): """ Calculate the implied volatility from option market price """ return blackScholesSolveImpliedVol(optionPrice, callPut, spot, strike, tenor, rate)
def __init__(self): self.historicalDataBySymbol = dict() self.dataHub = DataHub() self.realizedVolBySymbol = dict()