def __init__(self): # Initialize data struture tickers = ['SPY'] start_date = utils.workday(num_days=-2) self.data = get_equity_implied_volatility(tickers=tickers, fields=self.ivol_fields, start_date=start_date)
def initialize_data(self, **kwargs): self._vsm = kwargs.get('volatility_surface_manager', None) start_date = self.settings['start_date'] # Set universe tickers = self.get_universe(**kwargs) # Implied volatility data if self._vsm is None: raise ValueError('Requires volatility surface manager as input!') fields = ['iv_2m', 'iv_3m', 'days_to_maturity_1mc', 'days_to_maturity_2mc', 'days_to_maturity_3mc'] # Stock prices stock_price_start_date = utils.workday( date=start_date, num_days=-constants.trading_days_per_year * 2) stock_prices = md.get_equity_prices(tickers=tickers, start_date=stock_price_start_date) stock_prices = stock_prices['adj_close'].unstack(level='ticker') # Implied volatility data vrv_data = self._vsm.get_data(tickers=tickers, start_date=start_date, fields=fields) # Add term structure slope stock_prices = stock_prices.stack('ticker') vrv_data['ts_slope'] = vrv_data['iv_3m'] - vrv_data['iv_2m'] vrv_data['stock_prices'] = stock_prices # Data storage self.strat_data['vrv_data'] = vrv_data self.strat_data['stock_prices'] = stock_prices self.settings['tickers'] = np.unique( vrv_data.index.get_level_values('ticker'))
def __init__(self): # Initialize data struture tickers = ['SPY'] start_date = utils.workday(num_days=-2) self.data = get_equity_prices(tickers=tickers, start_date=start_date)
import pandas as pd import datetime as dt from pandas.tseries.offsets import BDay import numpy as np import logging import qfl.core.constants as constants import qfl.core.calcs as calcs import qfl.utilities.basic_utilities as utils from qfl.core.database_interface import DatabaseInterface as db from qfl.core.database_interface import DatabaseUtilities as dbutils from scipy.interpolate import interp1d market_data_date = utils.workday(dt.datetime.today().date(), num_days=-1) history_default_start_date = dt.datetime(2000, 1, 1) option_delta_grid = [1] + range(5, 95, 5) + [99] db.initialize() """ ------------------------------------------------------------------------------- UTILITIES ------------------------------------------------------------------------------- """ def get_futures_calendar_name(futures_series=None): s = db.get_futures_series(futures_series=futures_series) exchange_code = s.iloc[0]['exchange'] calendar_name = utils.DateUtils.exchange_calendar_map[exchange_code]
def _create_sec_master(self, trade_dates=None, maturity_dates=None, buys=None, sells=None, sizes=None, sec_cols=None, **kwargs): # Settings trade_tenor_month = kwargs.get('trade_tenor_month', 3) # Convenience vrv_data = self.strat_data['vrv_data'].unstack('ticker') # Use a dictionary and then concatenate it sec_dict = dict() c = constants.trading_days_per_month * (trade_tenor_month - 1) for trade_date in trade_dates: # Buy the on-the-run Nth month maturity_date = maturity_dates[ maturity_dates >= utils.workday(trade_date, c)][0] # Identify the buys and sells if trade_date in buys.index: trade_date_buys = (buys.loc[trade_date][ buys.loc[trade_date] > 0]).reset_index() else: trade_date_buys = pd.DataFrame(columns=buys.columns) if trade_date in sells.index: trade_date_sells = (sells.loc[trade_date][ sells.loc[trade_date] > 0]).reset_index() else: trade_date_sells = pd.DataFrame(columns=sells.columns) # Create the security master structure num_trades = len(trade_date_buys) + len(trade_date_sells) trade_ids = np.arange(0, num_trades) securities = pd.DataFrame(index=trade_ids, columns=sec_cols) securities['start_date'] = trade_date securities['maturity_date'] = maturity_date # Underlying buy_ind = range(0, len(trade_date_buys)) sell_ind = range(len(trade_date_buys), num_trades) securities.loc[buy_ind, 'underlying'] = trade_date_buys[ 'ticker'].values securities.loc[sell_ind, 'underlying'] = trade_date_sells[ 'ticker'].values # Traded strikes trade_date_vols = vrv_data.loc[trade_date, 'iv_3m'] securities.loc[buy_ind, 'strike'] = trade_date_vols[ buys.loc[trade_date] > 0].values securities.loc[sell_ind, 'strike'] = trade_date_vols[ sells.loc[trade_date] > 0].values # Traded sizes trade_date_sizes = sizes.loc[trade_date] securities.loc[buy_ind, 'quantity'] = trade_date_sizes[ buys.loc[trade_date] > 0].values securities.loc[sell_ind, 'quantity'] = -trade_date_sizes[ sells.loc[trade_date] > 0].values securities['instrument'] = \ securities['underlying'] + " VOLS " \ + securities['start_date'].dt.date.map(str) + " " \ + securities['maturity_date'].dt.date.map(str) + " " \ + securities['strike'].map(str) sec_dict[trade_date] = securities sec_df = pd.concat(sec_dict).reset_index(drop=True) return sec_df
def _clean_implied_vol_data_one(stock_prices=None, ivol=None, ref_ivol=None, res_com=5, deg_f=2, buffer_days=3, pct_threshold=0.01, calendar_name='UnitedStates'): ivol = ivol.copy(deep=True) ivol = ivol[np.isfinite(ivol)] df = pd.DataFrame(index=ivol.index, columns=['px', 'ivol', 'ret', 'ivol_chg']) df['px'] = stock_prices df['ivol'] = ivol df = df[np.isfinite(df['px'])] df['ret'] = df['px'] / df['px'].shift(1) - 1 df['ivol_chg'] = np.log(df['ivol'] / df['ivol'].shift(1)) # Idea is that first we should predict ivol change based on stock chg # And then we should filter outliers from there x = df['ret'] if ref_ivol is not None: ref_ivol = ref_ivol[np.isfinite(ref_ivol)] df['ref_ivol_chg'] = np.log(ref_ivol / ref_ivol.shift(1)) x = df[['ret', 'ref_ivol_chg']] r1 = pd.ols(y=df['ivol_chg'], x=x) df['ivol_chg_res'] = r1.resid # Now... an outlier is a large unexpected change in ivol # And it's "post facto" an outlier if it then reverts back # So "probability of being an outlier" is some function of a large residual # followed by negative residuals # df['ivol_chg_res_fwd_ewm'] = df['ivol_chg_res'] \ # .iloc[::-1] \ # .ewm(com=res_com) \ # .mean() \ # .iloc[::-1] \ # .shift(-1) \ # * res_com df['ivol_chg_res_fwd_ewm'] = df['ivol_chg_res'] \ .iloc[::-1] \ .rolling(window=res_com) \ .sum() \ .iloc[::-1] \ .shift(-1) max_date = np.max(df.index) df['ivol_chg_res_fwd_ewm'].loc[max_date] = 0 tmp = df[['ivol_chg_res', 'ivol_chg_res_fwd_ewm']] tmpz = (tmp - tmp.mean()) / tmp.std() # Identify potential outliers from scipy.stats import t tmp_pr = t.pdf(tmpz['ivol_chg_res'], deg_f) \ * t.pdf(tmpz['ivol_chg_res_fwd_ewm'], deg_f) tmp_pr = pd.DataFrame(data=tmp_pr, index=tmpz.index, columns=['tmp_pr']) tmp_pr_f = tmp_pr[tmp_pr['tmp_pr'] < pct_threshold] if len(tmp_pr_f) == 0: return ivol, tmp_pr, r1 # Separate these into blocks tmp_pr_f['block'] = 0 tmp_pr_f.loc[tmp_pr_f.index[0], 'block'] = 1 dates = tmp_pr_f.index.get_level_values('date') for t in range(1, len(tmp_pr_f)): if dates[t] <= utils.workday(date=dates[t - 1], num_days=buffer_days, calendar_name=calendar_name): tmp_pr_f.loc[dates[t], 'block'] = tmp_pr_f.loc[dates[t - 1], 'block'] else: tmp_pr_f.loc[dates[t], 'block'] = tmp_pr_f.loc[dates[t - 1], 'block'] + 1 # NAN out that stuff ivol.loc[tmp_pr_f.index] = np.nan return ivol, tmp_pr, r1