def __init__(self, y, x, window_type='full_sample', window=None,
                 min_periods=None, intercept=True, nw_lags=None,
                 nw_overlap=False):

        for attr in self.ATTRIBUTES:
            setattr(self.__class__, attr, create_ols_attr(attr))

        results = {}

        for entity in y:
            entity_y = y[entity]

            entity_x = {}
            for x_var in x:
                entity_x[x_var] = x[x_var][entity]

            from pandas.stats.interface import ols
            results[entity] = ols(y=entity_y,
                                  x=entity_x,
                                  window_type=window_type,
                                  window=window,
                                  min_periods=min_periods,
                                  intercept=intercept,
                                  nw_lags=nw_lags,
                                  nw_overlap=nw_overlap)

        self.results = results
Example #2
0
    def __init__(self, y, x, window_type='full_sample', window=None,
                 min_periods=None, intercept=True, nw_lags=None,
                 nw_overlap=False):

        import warnings
        warnings.warn("The pandas.stats.plm module is deprecated and will be "
                      "removed in a future version. We refer to external packages "
                      "like statsmodels, see some examples here: "
                      "http://www.statsmodels.org/stable/mixed_linear.html",
                      FutureWarning, stacklevel=4)

        for attr in self.ATTRIBUTES:
            setattr(self.__class__, attr, create_ols_attr(attr))

        results = {}

        for entity in y:
            entity_y = y[entity]

            entity_x = {}
            for x_var in x:
                entity_x[x_var] = x[x_var][entity]

            from pandas.stats.interface import ols
            results[entity] = ols(y=entity_y,
                                  x=entity_x,
                                  window_type=window_type,
                                  window=window,
                                  min_periods=min_periods,
                                  intercept=intercept,
                                  nw_lags=nw_lags,
                                  nw_overlap=nw_overlap)

        self.results = results
Example #3
0
File: plm.py Project: ara818/pandas
    def __init__(self, y, x, window_type=common.FULL_SAMPLE, window=None,
                 min_periods=None, intercept=True, nw_lags=None,
                 nw_overlap=False):

        for attr in self.ATTRIBUTES:
            setattr(self.__class__, attr, create_ols_attr(attr))

        results = {}

        for entity in y:
            entity_y = y[entity]

            entity_x = {}
            for x_var in x:
                entity_x[x_var] = x[x_var][entity]

            from pandas.stats.interface import ols
            results[entity] = ols(y=entity_y,
                                  x=entity_x,
                                  window_type=window_type,
                                  window=window,
                                  min_periods=min_periods,
                                  intercept=intercept,
                                  nw_lags=nw_lags,
                                  nw_overlap=nw_overlap)

        self.results = results
Example #4
0
File: plm.py Project: adneu/pandas
    def __init__(self, y, x, window_type='full_sample', window=None,
                 min_periods=None, intercept=True, nw_lags=None,
                 nw_overlap=False):

        import warnings
        warnings.warn("The pandas.stats.plm module is deprecated and will be "
                      "removed in a future version. We refer to external packages "
                      "like statsmodels, see some examples here: "
                      "http://www.statsmodels.org/stable/mixed_linear.html",
                      FutureWarning, stacklevel=4)

        for attr in self.ATTRIBUTES:
            setattr(self.__class__, attr, create_ols_attr(attr))

        results = {}

        for entity in y:
            entity_y = y[entity]

            entity_x = {}
            for x_var in x:
                entity_x[x_var] = x[x_var][entity]

            from pandas.stats.interface import ols
            results[entity] = ols(y=entity_y,
                                  x=entity_x,
                                  window_type=window_type,
                                  window=window,
                                  min_periods=min_periods,
                                  intercept=intercept,
                                  nw_lags=nw_lags,
                                  nw_overlap=nw_overlap)

        self.results = results
Example #5
0
def Compute_IV(optionDataFrame,
               tMin=0,
               nMin=0,
               QDMin=0,
               QDMax=1,
               keepOTMData=True):
    """
    Pre-processing of a standard European option quote file.
    - Calculation of implied risk-free rate and dividend yield
    - Calculation of implied volatility
    - Estimate ATM volatility for each expiry
    - Compute implied volatility and Quick Delta for each quote
    
    Options for filtering the input data set: 
    - maturities with less than nMin strikes are ignored
    - maturities shorter than tMin (ACT/365 daycount) are ignored
    - strikes with Quick Delta < qdMin or > qdMax are ignored
    """

    grouped = optionDataFrame.groupby('dtExpiry')

    isFirst = True
    for spec, group in grouped:
        print('processing group %s' % spec)

        # implied vol for this type/expiry group

        indx = group.index

        dtTrade = group['dtTrade'][indx[0]]
        dtExpiry = group['dtExpiry'][indx[0]]
        spot = group['Spot'][indx[0]]
        daysToExpiry = (dtExpiry - dtTrade).days
        timeToMaturity = daysToExpiry / 365.0

        # exclude groups with too short time to maturity

        if timeToMaturity < tMin:
            continue

        # exclude groups with too few data points

        df_call = group[group['Type'] == 'C']
        df_put = group[group['Type'] == 'P']

        if (len(df_call) < nMin) | (len(df_put) < nMin):
            continue

        # calculate forward, implied interest rate and implied div. yield

        df_C = DataFrame((df_call['PBid'] + df_call['PAsk']) / 2,
                         columns=['PremiumC'])
        df_C.index = df_call['Strike']
        df_P = DataFrame((df_put['PBid'] + df_put['PAsk']) / 2,
                         columns=['PremiumP'])
        df_P.index = df_put['Strike']

        # use 'inner' join because some strikes are not quoted for C and P
        df_all = df_C.join(df_P, how='inner')
        df_all['Strike'] = df_all.index
        df_all['C-P'] = df_all['PremiumC'] - df_all['PremiumP']

        model = ols(y=df_all['C-P'], x=df_all['Strike'])
        b = model.beta

        # intercept is last coef
        iRate = -np.log(-b[0]) / timeToMaturity
        dRate = np.log(spot / b[1]) / timeToMaturity

        discountFactor = np.exp(-iRate * timeToMaturity)
        Fwd = spot * np.exp((iRate - dRate) * timeToMaturity)

        print('Fwd: %f int rate: %f div yield: %f' % (Fwd, iRate, dRate))

        # mid-market ATM volatility

        def impvol(cp, strike, premium):
            try:
                vol = blackFormulaImpliedStdDev(cp,
                                                strike,
                                                forward=Fwd,
                                                blackPrice=premium,
                                                discount=discountFactor,
                                                TTM=timeToMaturity)
            except:
                vol = np.nan
            return vol / np.sqrt(timeToMaturity)

        # implied bid/ask vol for all options

        df_call['IVBid'] = [
            impvol('C', strike, price)
            for strike, price in zip(df_call['Strike'], df_call['PBid'])
        ]
        df_call['IVAsk'] = [
            impvol('C', strike, price)
            for strike, price in zip(df_call['Strike'], df_call['PAsk'])
        ]

        df_call['IVMid'] = (df_call['IVBid'] + df_call['IVAsk']) / 2

        df_put['IVBid'] = [
            impvol('P', strike, price)
            for strike, price in zip(df_put['Strike'], df_put['PBid'])
        ]
        df_put['IVAsk'] = [
            impvol('P', strike, price)
            for strike, price in zip(df_put['Strike'], df_put['PAsk'])
        ]

        df_put['IVMid'] = (df_put['IVBid'] + df_put['IVAsk']) / 2

        f_call = interp1d(df_call['Strike'].values, df_call['IVMid'].values)
        f_put = interp1d(df_put['Strike'].values, df_put['IVMid'].values)

        atmVol = (f_call(Fwd) + f_put(Fwd)) / 2
        print('ATM vol: %f' % atmVol)

        # Quick Delta, computed with ATM vol
        rv = norm()
        df_call['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_call['Strike']]
        df_put['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_put['Strike']]

        # keep data within QD range

        df_call = df_call[(df_call['QuickDelta'] >= QDMin) & \
                          (df_call['QuickDelta'] <= QDMax) ]

        df_put = df_put[  (df_put['QuickDelta'] >= QDMin) & \
                          (df_put['QuickDelta'] <= QDMax) ]

        # final assembly...

        df_cp = df_call.append(df_put, ignore_index=True)
        df_cp['iRate'] = iRate
        df_cp['iDiv'] = dRate
        df_cp['ATMVol'] = atmVol
        df_cp['Fwd'] = Fwd
        df_cp['TTM'] = timeToMaturity
        df_cp['CP'] = [1 if t == 'C' else -1 for t in df_cp['Type']]

        # keep only OTM data ?
        if keepOTMData:
            df_cp = df_cp[((df_cp['Strike'] >= Fwd) & (df_cp['Type'] == 'C')) |
                          ((df_cp['Strike'] < Fwd) & (df_cp['Type'] == 'P'))]

        if isFirst:
            df_final = df_cp
            isFirst = False
        else:
            df_final = df_final.append(df_cp, ignore_index=True)

    return df_final
Example #6
0
def ATM_Vol(premium, discountFactor, forward, strike):
    """
    Aproximate std dev, for calls close to the money
    """
    vol = (premium/discountFactor - .5*(forward-strike))*5.0/(forward+strike) 

    return vol

    # get spot and option data frame
    
    (spot, optionDataFrame) = read_SPX_file(option_data_file)

    grouped = optionDataFrame.groupby('dtExpiry') 

    isFirst = True
    for spec, group in grouped:
        print('processing group %s' % spec)

        # implied vol for this type/expiry group

        indx = group.index
        
        dtTrade = group['dtTrade'][indx[0]]
        dtExpiry = group['dtExpiry'][indx[0]]
        daysToExpiry = (dtExpiry-dtTrade).days
        timeToMaturity = daysToExpiry/365.0

        # exclude groups with too few data points 
        # or too short maturity

        if timeToMaturity < tMin:
            continue
            
        # valid call and put quotes
        df_call = group[(group['Type'] == 'C') & (group['Bid']>0) \
                    & (group['Ask']>0)]
        df_put = group[(group['Type'] == 'P') &  (group['Bid']>0) \
                    & (group['Ask']>0)]
        if (len(df_call) == 0) | (len(df_put) == 0):
            continue

        # calculate forward, implied interest rate and implied div. yield
            
        df_call['Mid'] = (df_call['Bid']+df_call['Ask'])/2
        df_put['Mid'] = (df_put['Bid']+df_put['Ask'])/2
    
        df_C = DataFrame.filter(df_call, items=['Strike', 'Mid'])
        df_C.columns = ['Strike', 'PremiumC']
        to_join = DataFrame(df_put['Mid'], index=df_put['Strike'],
            columns=['PremiumP']) 

        # use 'inner' join because some strikes are not quoted for C and P
        df_all = df_C.join(to_join, on='Strike', how='inner')
    
        df_all['C-P'] = df_all['PremiumC'] - df_all['PremiumP']
    
        model = ols(y=df_all['C-P'], x=df_all.ix[:,'Strike'])
        b = model.beta 
    
        # intercept is last coef
        iRate = -np.log(-b[0])/timeToMaturity
        dRate = np.log(spot/b[1])/timeToMaturity
        discountFactor = np.exp(-iRate*timeToMaturity)
        Fwd = spot * np.exp((iRate-dRate)*timeToMaturity)

        print('Fwd: %f int rate: %f div yield: %f' % (Fwd, iRate, dRate))

        # interpolate ATM premium and vol: used to compute Quick Delta
        f_call = interp1d(df_all['Strike'].values, df_all['PremiumC'].values)
        f_put = interp1d(df_all['Strike'].values, df_all['PremiumP'].values)

        atmPremium = (f_call(Fwd)+f_put(Fwd))/2
        atmVol = blackFormulaImpliedStdDev('C', strike=Fwd,
                 forward=Fwd, blackPrice=atmPremium,
                 discount=discountFactor,
                 TTM=timeToMaturity)/np.sqrt(timeToMaturity)
                    
        print('ATM vol: %f' % atmVol)

        # Quick Delta, computed with ATM vol
        rv = norm()
        df_call['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_call['Strike']]
        df_put['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_put['Strike']]

        # implied bid/ask vol for all options
    
        def impvol(strike, premium):
            try:
                vol = blackFormulaImpliedStdDev(cp, strike,
                    forward=Fwd, blackPrice=premium, discount=discountFactor,
                    TTM=timeToMaturity)
            except:
                vol = np.nan
                return vol/np.sqrt(timeToMaturity)
        
        cp = 'C'
        df_call['IVBid'] = [impvol(strike, price) for strike, price in zip(df_call['Strike'], df_call['Bid'])]
    df_call['IVAsk'] = [impvol(strike, price) for strike, price in zip(df_call['Strike'], df_call['Ask'])]
    # QD computed with ATM vol 
        
    cp = 'P'
    df_put['IVBid'] = [impvol(strike, price) for strike, price in zip(df_put['Strike'], df_put['Bid'])]
    df_put['IVAsk'] = [impvol(strike, price) for strike, price in zip(df_put['Strike'], df_put['Ask'])]

    # keep OTM data for options within QD range
    
    df_call = df_call[  (df_call['Strike'] >= Fwd) & \
                        (df_call['QuickDelta'] >= QDMin) & \
                        (df_call['QuickDelta'] <= QDMax) ]
                        
    df_put = df_put[  (df_put['Strike'] < Fwd) & \
                        (df_put['QuickDelta'] >= QDMin) & \
                        (df_put['QuickDelta'] <= QDMax) ]

    # final assembly...

    df_cp = df_call.append(df_put,  ignore_index=True)
    df_cp['R'] = iRate 
    df_cp['D'] = dRate 
    df_cp['ATMVol'] = atmVol 
    df_cp['F'] = Fwd
    df_cp['T'] = timeToMaturity
    df_cp = df_cp.rename(columns=
                         {'IVBid': 'VB',
                          'IVAsk': 'VA',
                          'Strike': 'K'})
    df_cp['CP'] = [1 if t == 'C' else -1 for t in df_cp['Type']]
                         
    if isFirst:
        df_final = df_cp
        isFirst = False 
    else:
        df_final = df_final.append(df_cp, ignore_index=True)
Example #7
0
def Compute_IV(optionDataFrame, tMin=0, nMin=0, QDMin=0, QDMax=1, keepOTMData=True):
    
    """
    Pre-processing of a standard European option quote file.
    - Calculation of implied risk-free rate and dividend yield
    - Calculation of implied volatility
    - Estimate ATM volatility for each expiry
    - Compute implied volatility and Quick Delta for each quote
    
    Options for filtering the input data set: 
    - maturities with less than nMin strikes are ignored
    - maturities shorter than tMin (ACT/365 daycount) are ignored
    - strikes with Quick Delta < qdMin or > qdMax are ignored
    """
    
    grouped = optionDataFrame.groupby('dtExpiry') 

    isFirst = True
    for spec, group in grouped:
        print('processing group %s' % spec)

        # implied vol for this type/expiry group

        indx = group.index
        
        dtTrade = group['dtTrade'][indx[0]]
        dtExpiry = group['dtExpiry'][indx[0]]
        spot = group['Spot'][indx[0]]
        daysToExpiry = (dtExpiry-dtTrade).days
        timeToMaturity = daysToExpiry/365.0

        # exclude groups with too short time to maturity
        
        if timeToMaturity < tMin:
            continue
            
        # exclude groups with too few data points
        
        df_call = group[group['Type'] == 'C']
        df_put = group[group['Type'] == 'P']
        
        if (len(df_call) < nMin) | (len(df_put) < nMin):
            continue

        # calculate forward, implied interest rate and implied div. yield
            
        df_C = DataFrame((df_call['PBid']+df_call['PAsk'])/2,
                         columns=['PremiumC'])
        df_C.index = df_call['Strike']
        df_P = DataFrame((df_put['PBid']+df_put['PAsk'])/2,
                         columns=['PremiumP'])
        df_P.index = df_put['Strike']
        
        # use 'inner' join because some strikes are not quoted for C and P
        df_all = df_C.join(df_P, how='inner')
        df_all['Strike'] = df_all.index
        df_all['C-P'] = df_all['PremiumC'] - df_all['PremiumP']
    
        model = ols(y=df_all['C-P'], x=df_all['Strike'])
        b = model.beta 
    
        # intercept is last coef
        iRate = -np.log(-b[0])/timeToMaturity
        dRate = np.log(spot/b[1])/timeToMaturity
        
        discountFactor = np.exp(-iRate*timeToMaturity)
        Fwd = spot * np.exp((iRate-dRate)*timeToMaturity)

        print('Fwd: %f int rate: %f div yield: %f' % (Fwd, iRate, dRate))

        # mid-market ATM volatility
        
        def impvol(cp, strike, premium):
            try:
                vol = blackFormulaImpliedStdDev(cp, strike,
                    forward=Fwd, blackPrice=premium, discount=discountFactor,
                    TTM=timeToMaturity)
            except:
                vol = np.nan
            return vol/np.sqrt(timeToMaturity)
        
        # implied bid/ask vol for all options
        
        df_call['IVBid'] = [impvol('C', strike, price) for strike, price
                            in zip(df_call['Strike'], df_call['PBid'])]
        df_call['IVAsk'] = [impvol('C', strike, price) for strike, price
                            in zip(df_call['Strike'], df_call['PAsk'])]
        
        df_call['IVMid'] = (df_call['IVBid'] + df_call['IVAsk'])/2
        
        df_put['IVBid'] = [impvol('P', strike, price) for strike, price
                           in zip(df_put['Strike'], df_put['PBid'])]
        df_put['IVAsk'] = [impvol('P', strike, price) for strike, price
                           in zip(df_put['Strike'], df_put['PAsk'])]
        
        df_put['IVMid'] = (df_put['IVBid'] + df_put['IVAsk'])/2
        
        f_call = interp1d(df_call['Strike'].values, df_call['IVMid'].values)
        f_put = interp1d(df_put['Strike'].values, df_put['IVMid'].values)

        atmVol = (f_call(Fwd)+f_put(Fwd))/2
        print('ATM vol: %f' % atmVol)

        # Quick Delta, computed with ATM vol
        rv = norm()
        df_call['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_call['Strike']]
        df_put['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_put['Strike']]

        # keep data within QD range
    
        df_call = df_call[(df_call['QuickDelta'] >= QDMin) & \
                          (df_call['QuickDelta'] <= QDMax) ]
                        
        df_put = df_put[  (df_put['QuickDelta'] >= QDMin) & \
                          (df_put['QuickDelta'] <= QDMax) ]

        # final assembly...

        df_cp = df_call.append(df_put,  ignore_index=True)
        df_cp['iRate'] = iRate 
        df_cp['iDiv'] = dRate 
        df_cp['ATMVol'] = atmVol 
        df_cp['Fwd'] = Fwd
        df_cp['TTM'] = timeToMaturity
        df_cp['CP'] = [1 if t == 'C' else -1 for t in df_cp['Type']]

        # keep only OTM data ?
        if keepOTMData:
            df_cp = df_cp[((df_cp['Strike']>=Fwd) & (df_cp['Type'] == 'C')) |
                          ((df_cp['Strike']<Fwd) & (df_cp['Type'] == 'P'))]
                         
        if isFirst:
            df_final = df_cp
            isFirst = False 
        else:
            df_final = df_final.append(df_cp, ignore_index=True)

    return df_final