Example #1
0
    def test_filter_bytestring(self, name):
        # GH13101
        df = DataFrame({b'a': [1, 2], b'b': [3, 4]})
        expected = DataFrame({b'a': [1, 2]})

        assert_frame_equal(df.filter(like=name), expected)
        assert_frame_equal(df.filter(regex=name), expected)
Example #2
0
def process_recarray_pandas(data, endog_idx=0, exog_idx=None, dtype=None,
                            index_idx=None):

    data = DataFrame(data, dtype=dtype)
    names = data.columns

    if isinstance(endog_idx, (int, long)):
        endog_name = names[endog_idx]
        endog = data[endog_name]
        if exog_idx is None:
            exog = data.drop([endog_name], axis=1)
        else:
            exog = data.filter(names[exog_idx])
    else:
        endog = data.loc[:, endog_idx]
        endog_name = list(endog.columns)
        if exog_idx is None:
            exog = data.drop(endog_name, axis=1)
        elif isinstance(exog_idx, (int, long)):
            exog = data.filter([names[exog_idx]])
        else:
            exog = data.filter(names[exog_idx])

    if index_idx is not None:  # NOTE: will have to be improved for dates
        endog.index = Index(data.iloc[:, index_idx])
        exog.index = Index(data.iloc[:, index_idx])
        data = data.set_index(names[index_idx])

    exog_name = list(exog.columns)
    dataset = Dataset(data=data, names=list(names), endog=endog, exog=exog,
                      endog_name=endog_name, exog_name=exog_name)
    return dataset
Example #3
0
def process_recarray_pandas(data, endog_idx=0, exog_idx=None, dtype=None):
    from pandas import DataFrame

    data = DataFrame(data, dtype=dtype)
    names = data.columns

    if isinstance(endog_idx, int):
        endog_name = names[endog_idx]
        endog = data[endog_name]
        if exog_idx is None:
            exog = data.drop([endog_name], axis=1)
        else:
            exog = data.filter(names[exog_idx])
    else:
        endog = data.ix[:, endog_idx]
        endog_name = list(endog.columns)
        if exog_idx is None:
            exog = data.drop(endog_name, axis=1)
        elif isinstance(exog_idx, int):
            exog = data.filter([names[exog_idx]])
        else:
            exog = data.filter(names[exog_idx])

    exog_name = list(exog.columns)
    dataset = Dataset(data=data, names=list(names), endog=endog, exog=exog,
                      endog_name=endog_name, exog_name=exog_name)
    return dataset
    def test_filter_corner(self):
        empty = DataFrame()

        result = empty.filter([])
        assert_frame_equal(result, empty)

        result = empty.filter(like='foo')
        assert_frame_equal(result, empty)
    def test_filter(self):
        # items
        filtered = self.frame.filter(['A', 'B', 'E'])
        self.assertEqual(len(filtered.columns), 2)
        self.assertNotIn('E', filtered)

        filtered = self.frame.filter(['A', 'B', 'E'], axis='columns')
        self.assertEqual(len(filtered.columns), 2)
        self.assertNotIn('E', filtered)

        # other axis
        idx = self.frame.index[0:4]
        filtered = self.frame.filter(idx, axis='index')
        expected = self.frame.reindex(index=idx)
        assert_frame_equal(filtered, expected)

        # like
        fcopy = self.frame.copy()
        fcopy['AA'] = 1

        filtered = fcopy.filter(like='A')
        self.assertEqual(len(filtered.columns), 2)
        self.assertIn('AA', filtered)

        # like with ints in column names
        df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B'])
        filtered = df.filter(like='_')
        self.assertEqual(len(filtered.columns), 2)

        # regex with ints in column names
        # from PR #10384
        df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C'])
        expected = DataFrame(
            0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object))
        filtered = df.filter(regex='^[0-9]+$')
        assert_frame_equal(filtered, expected)

        expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1'])
        # shouldn't remove anything
        filtered = expected.filter(regex='^[0-9]+$')
        assert_frame_equal(filtered, expected)

        # pass in None
        with assertRaisesRegexp(TypeError, 'Must pass'):
            self.frame.filter(items=None)

        # objects
        filtered = self.mixed_frame.filter(like='foo')
        self.assertIn('foo', filtered)

        # unicode columns, won't ascii-encode
        df = self.frame.rename(columns={'B': u('\u2202')})
        filtered = df.filter(like='C')
        self.assertTrue('C' in filtered)
    def test_filter(self):
        # items
        filtered = self.frame.filter(["A", "B", "E"])
        self.assertEqual(len(filtered.columns), 2)
        self.assertNotIn("E", filtered)

        filtered = self.frame.filter(["A", "B", "E"], axis="columns")
        self.assertEqual(len(filtered.columns), 2)
        self.assertNotIn("E", filtered)

        # other axis
        idx = self.frame.index[0:4]
        filtered = self.frame.filter(idx, axis="index")
        expected = self.frame.reindex(index=idx)
        assert_frame_equal(filtered, expected)

        # like
        fcopy = self.frame.copy()
        fcopy["AA"] = 1

        filtered = fcopy.filter(like="A")
        self.assertEqual(len(filtered.columns), 2)
        self.assertIn("AA", filtered)

        # like with ints in column names
        df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
        filtered = df.filter(like="_")
        self.assertEqual(len(filtered.columns), 2)

        # regex with ints in column names
        # from PR #10384
        df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
        expected = DataFrame(0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object))
        filtered = df.filter(regex="^[0-9]+$")
        assert_frame_equal(filtered, expected)

        expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
        # shouldn't remove anything
        filtered = expected.filter(regex="^[0-9]+$")
        assert_frame_equal(filtered, expected)

        # pass in None
        with assertRaisesRegexp(TypeError, "Must pass"):
            self.frame.filter(items=None)

        # objects
        filtered = self.mixed_frame.filter(like="foo")
        self.assertIn("foo", filtered)

        # unicode columns, won't ascii-encode
        df = self.frame.rename(columns={"B": u("\u2202")})
        filtered = df.filter(like="C")
        self.assertTrue("C" in filtered)
Example #7
0
def make_helpers(df_option):
    """ build array of helpers and rate curves
    """

    # extract rates and div yields from the data set
    df_tmp = DataFrame.filter(df_option, items=['dtExpiry', 'iRate', 'iDiv'])
    grouped = df_tmp.groupby('dtExpiry')

    def aggregate(serie):
        return serie[serie.index[0]]

    df_rates = grouped.agg(aggregate)

    # Get first index:
    first_index = 0

    dtTrade = df_option['dtTrade'][first_index]
    # back out the spot from any forward
    iRate = df_option['iRate'][first_index]
    iDiv = df_option['iDiv'][first_index]
    TTM = df_option['TTM'][first_index]
    Fwd = df_option['Fwd'][first_index]
    spot = SimpleQuote(Fwd * np.exp(-(iRate - iDiv) * TTM))
    print('Spot: %f risk-free rate: %f div. yield: %f' % (spot.value,
                                                          iRate, iDiv))

    # build array of option helpers
    hh = heston_helpers(spot, df_option, dtTrade, df_rates)

    risk_free_ts = dfToZeroCurve(df_rates['iRate'], dtTrade)
    dividend_ts = dfToZeroCurve(df_rates['iDiv'], dtTrade)

    return {'options': hh['options'], 'spot': spot,
            'risk_free_rate': risk_free_ts,
            'dividend_rate': dividend_ts}
def predict(test_set: DataFrame, model: LogisticRegression, reg, filename):
    test_df = test_set.filter(regex=reg)
    test_np = test_df.as_matrix()
    predictions = model.predict(test_np)
    result = DataFrame({'PassengerId': test_set['PassengerId'].as_matrix(),
                        'Survived': predictions.astype(np.int32)})
    result.to_csv(filename, index=False)
Example #9
0
def merge_df(df_option, options, model_name):
    df_output = DataFrame.filter(
        df_option,
        items=[
            "dtTrade",
            "dtExpiry",
            "Type",
            "Strike",
            "Mid",
            "QuickDelta",
            "IVBid",
            "IVAsk",
            "iRate",
            "iDiv",
            "ATMVol",
            "Fwd",
            "TTM",
        ],
    )

    model_value = np.zeros(len(df_option))
    model_iv = np.zeros(len(df_option))
    for i, j in zip(range(len(df_option)), range(0, len(options), 2)):
        model_value[i] = options[j].model_value()
        model_iv[i] = options[j].impliedVolatility(
            model_value[i], accuracy=1.0e-5, maxEvaluations=5000, minVol=0.01, maxVol=10.0
        )

    df_output[model_name + "-Value"] = model_value
    df_output[model_name + "-IV"] = model_iv

    return df_output
Example #10
0
def calibration_plot(df_calibration, model_name):

    dtTrade = df_calibration['dtTrade'][0]
    title = '%s Model (%s)' % (model_name, dtTrade)

    df_calibration = DataFrame.filter(df_calibration,
                    items=['dtExpiry', 
                    'Strike', 'IVBid', 'IVAsk',
                    'TTM', model_name+'-IV'])

    # group by maturity
    grouped = df_calibration.groupby('dtExpiry')

    all_groups = [(dt, g) for dt, g in grouped]

    xy = [(0, 0), (0, 1), (1, 0), (1, 1)]

    for k in range(0, len(all_groups), 4):
        if (k + 4) >= len(all_groups):
            break
        fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)
        axs[0, 0].set_title(title)

        for i in range(4):
            x, y = xy[i]
            calibration_subplot(axs[x, y], all_groups[i + k][1], i,
                                model_name)
        plt.show(block=False)
def tree_modeling(train_set: DataFrame, reg):
    train_df = train_set.filter(regex=reg)
    train_np = train_df.as_matrix()
    x = train_np[:, 1:]
    y = train_np[:, 0]
    clf = LogisticRegression(penalty='l1')
    clf.fit(x, y)
    scores = np.array(cross_validation.cross_val_score(clf, x, y, cv=5))
    print('The accuracy on train set is', scores.mean())
    return clf, train_df
Example #12
0
def tree_modeling(train_set: DataFrame, reg):
    train_df = train_set.filter(regex=reg)
    train_np = train_df.as_matrix()
    x = train_np[:, 1:]
    y = train_np[:, 0]
    clf = DecisionTreeClassifier(criterion='gini')
    clf.fit(x, y)
    scores = np.array(cross_validation.cross_val_score(clf, x, y, cv=5))
    print('The accuracy on train set is', scores.mean())
    return clf, train_df
Example #13
0
def heston_calibration(df_option, ival=None):
    """
    calibrate heston model
    """

    # extract rates and div yields from the data set
    df_tmp = DataFrame.filter(df_option, items=["dtExpiry", "iRate", "iDiv"])
    grouped = df_tmp.groupby("dtExpiry")
    df_rates = grouped.agg(lambda x: x[0])

    dtTrade = df_option["dtTrade"][0]
    # back out the spot from any forward
    iRate = df_option["iRate"][0]
    iDiv = df_option["iDiv"][0]
    TTM = df_option["TTM"][0]
    Fwd = df_option["Fwd"][0]
    spot = SimpleQuote(Fwd * np.exp(-(iRate - iDiv) * TTM))
    print("Spot: %f risk-free rate: %f div. yield: %f" % (spot.value, iRate, iDiv))

    # build array of option helpers
    hh = heston_helpers(spot, df_option, dtTrade, df_rates)
    options = hh["options"]
    spot = hh["spot"]

    risk_free_ts = dfToZeroCurve(df_rates["iRate"], dtTrade)
    dividend_ts = dfToZeroCurve(df_rates["iDiv"], dtTrade)

    # initial values for parameters
    if ival is None:
        ival = {"v0": 0.1, "kappa": 1.0, "theta": 0.1, "sigma": 0.5, "rho": -0.5}

    process = HestonProcess(
        risk_free_ts, dividend_ts, spot, ival["v0"], ival["kappa"], ival["theta"], ival["sigma"], ival["rho"]
    )

    model = HestonModel(process)
    engine = AnalyticHestonEngine(model, 64)

    for option in options:
        option.set_pricing_engine(engine)

    om = LevenbergMarquardt(1e-8, 1e-8, 1e-8)
    model.calibrate(options, om, EndCriteria(400, 40, 1.0e-8, 1.0e-8, 1.0e-8))

    print("model calibration results:")
    print("v0: %f kappa: %f theta: %f sigma: %f rho: %f" % (model.v0, model.kappa, model.theta, model.sigma, model.rho))

    calib_error = (1.0 / len(options)) * sum([pow(o.calibration_error() * 100.0, 2) for o in options])

    print("SSE: %f" % calib_error)

    # merge the fitted volatility and the input data set
    return merge_df(df_option, options, "Heston")
Example #14
0
File: hjm.py Project: alpmdog/CQF
def calculate_pca(forwards, no_factors=3):
    fwddiff = forwards.diff()
    fwddiff = fwddiff.dropna()
    covmat = fwddiff.cov()
    covmat = covmat * 252 / 10000
    eigenvecs, eigenmat = jacobi(covmat.values)
    eigvecs = Series(eigenvecs, index=covmat.columns)
    sorted_eigvecs = eigvecs.order(ascending=False)
    top3 = sorted_eigvecs[:no_factors].index
    eigenmat_df = DataFrame(eigenmat, index=covmat.columns,
                            columns=covmat.columns)
    filtered_eigenmat = eigenmat_df.filter(top3)
    return sorted_eigvecs, filtered_eigenmat
Example #15
0
def tree_modeling(train_set: DataFrame, reg):
    train_df = train_set.filter(regex=reg)
    train_np = train_df.as_matrix()
    x = train_np[:, 1:]
    y = train_np[:, 0]
    clf = RandomForestClassifier(n_estimators=270,
                                 max_depth=8,
                                 min_samples_leaf=3,
                                 random_state=50)
    clf.fit(x, y)
    scores = np.array(cross_validation.cross_val_score(clf, x, y, cv=5))
    print('The accuracy on train set is', scores.mean())
    return clf, train_df
Example #16
0
def boost_modeling(train_set: DataFrame, reg):
    train_df = train_set.filter(regex=reg)
    train_np = train_df.as_matrix()
    x = train_np[:, 1:]
    y = train_np[:, 0]
    train_x, train_y, valid_x, valid_y = leave_out(x, y)
    dtrain = xgb.DMatrix(data=train_x, label=train_y)
    dvalid = xgb.DMatrix(data=valid_x, label=valid_y)
    watchlist = [(dtrain, 'train')]
    param = {'max_depth': 6, 'eta': 0.05, 'silent': 1,
             'objective': 'binary:logistic', 'subsample': 0.9}
    bst = xgb.train(param, dtrain, num_boost_round=17, evals=watchlist)
    return bst
    def test_filter_regex_search(self):
        fcopy = self.frame.copy()
        fcopy["AA"] = 1

        # regex
        filtered = fcopy.filter(regex="[A]+")
        self.assertEqual(len(filtered.columns), 2)
        self.assertIn("AA", filtered)

        # doesn't have to be at beginning
        df = DataFrame({"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]})

        result = df.filter(regex="BB")
        exp = df[[x for x in df.columns if "BB" in x]]
        assert_frame_equal(result, exp)
    def test_filter_regex_search(self):
        fcopy = self.frame.copy()
        fcopy['AA'] = 1

        # regex
        filtered = fcopy.filter(regex='[A]+')
        self.assertEqual(len(filtered.columns), 2)
        self.assertIn('AA', filtered)

        # doesn't have to be at beginning
        df = DataFrame({'aBBa': [1, 2],
                        'BBaBB': [1, 2],
                        'aCCa': [1, 2],
                        'aCCaBB': [1, 2]})

        result = df.filter(regex='BB')
        exp = df[[x for x in df.columns if 'BB' in x]]
        assert_frame_equal(result, exp)
Example #19
0
def merge_df(df_option, options, model_name):
    df_output = DataFrame.filter(df_option,
                items=['dtTrade', 'dtExpiry',
                       'Type', 'K', 'Mid',
                       'QuickDelta', 'VB', 'VA',
                       'R', 'D', 'ATMVol', 'F', 'T'])

    model_value = np.zeros(len(df_option))
    model_iv = np.zeros(len(df_option))
    for i, j in zip(range(len(df_option)), range(0, len(options),2)):
        model_value[i] = options[j].model_value()
        model_iv[i] = options[j].impliedVolatility(model_value[i],
            accuracy=1.e-5, maxEvaluations=5000,
            minVol=.01, maxVol=10.0)

    df_output[model_name + '-Value'] = model_value
    df_output[model_name + '-IV'] = model_iv

    return df_output
Example #20
0
def calibration_plot(title, df_calibration, model_name):
    df_calibration = DataFrame.filter(
        df_calibration, items=["dtExpiry", "Strike", "IVBid", "IVAsk", "TTM", model_name + "-IV"]
    )

    # group by maturity
    grouped = df_calibration.groupby("dtExpiry")

    all_groups = [(dt, g) for dt, g in grouped]

    xy = [(0, 0), (0, 1), (1, 0), (1, 1)]

    for k in range(0, len(all_groups), 4):
        if (k + 4) >= len(all_groups):
            break
        plt.figure()
        fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)

        for i in range(4):
            x, y = xy[i]
            calibration_subplot(axs[x, y], all_groups[i + k][1], i, model_name)
        plt.show()
	def main(self):
		
		sampleMap   = self.maps
		snpsDict    = self.snps
		MainDF      = DataFrame()
		snpsList    = self.snpsPos 
		samplesAll  = self.samA
		samplesUsed = self.samM

		## code for multiprocessing
		
		pool  = mp.Pool(processes=4)
		for res in pool.imap_unordered(functools.partial(mp_coverage_process,sampleMap=sampleMap,snpsDict=snpsDict,sortedSnps=snpsList),samplesAll,chunksize=50):
			sampleName, (coverages, snpIndices) = res
			DF_temp = DataFrame({sampleName:coverages},index = snpIndices)
			MainDF  = pd.concat([MainDF,DF_temp],axis = 1)
			
		"""
		for sampleName in samplesUsed:
			
			print sampleName
			sys.stdout.flush()	
			covrFile = sampleMap[sampleName]
			coverages, snpIndices = self.coverage_process(covrFile,snpsDict)
			DF_temp = DataFrame({sampleName:coverages},index = snpIndices)
			MainDF  = pd.concat([MainDF,DF_temp],axis = 1)
		
		"""		
		
		subDF   = MainDF.filter(items=samplesUsed)
		medians = subDF.median(axis=1)
		medFile = os.path.join(self.odir,'median_coverages_for_chrm_'+self.chrm+'_part'+self.part+'.csv')
		medians.to_csv(medFile)

		outf = os.path.join(self.odir,'coverages_for_chrm_'+self.chrm+'_part'+self.part+'.csv')
		MainDF.to_csv(outf)

		return 'Done'
Example #22
0
def calibration_plot(title, df_calibration, model_name):
    df_calibration = DataFrame.filter(df_calibration,
                    items=['dtExpiry', 
                           'K', 'VB', 'VA',
                           'T', model_name+'-IV'])

    # group by maturity
    grouped = df_calibration.groupby('dtExpiry')

    all_groups = [(dt, g) for dt, g in grouped]
    
    xy = [(0,0), (0,1), (1,0), (1,1)]

    for k in range(0, len(all_groups),4):
        if (k+4) >= len(all_groups):
            break
        plt.figure()
        fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)

        for i in range(4):
            x,y = xy[i]
            calibration_subplot(axs[x,y], all_groups[i+k][1],i, model_name)
        fig.suptitle(title, fontsize=12, fontweight='bold')
        fig.show()
Example #23
0
def heston_helpers(df_option, dtTrade=None, df_rates=None, ival=None):
    """
    Create array of heston options helpers
    """

    if dtTrade is None:
        dtTrade = df_option['dtTrade'][0]
    DtSettlement = datetoQLDate(dtTrade)
    
    settings = Settings()
    settings.evaluation_date = DtSettlement

    calendar = TARGET()

    if df_rates is None:
        df_tmp = DataFrame.filter(df_option, items=['dtExpiry', 'IR', 'IDIV'])
        grouped = df_tmp.groupby('dtExpiry')
        df_rates = grouped.agg(lambda x: x[0])

    # convert data frame (date/value) into zero curve
    # expect the index to be a date, and 1 column of values

    risk_free_ts = dfToZeroCurve(df_rates['R'], dtTrade)
    dividend_ts = dfToZeroCurve(df_rates['D'], dtTrade)

    # back out the spot from any forward
    iRate = df_option['R'][0]
    iDiv = df_option['D'][0]
    TTM = df_option['T'][0]
    Fwd = df_option['F'][0]
    spot = SimpleQuote(Fwd*np.exp(-(iRate-iDiv)*TTM))
    print('Spot: %f risk-free rate: %f div. yield: %f' % (spot.value, iRate, iDiv))

    # loop through rows in option data frame, construct
    # helpers for bid/ask

    oneDay = datetime.timedelta(days=1)
    dtExpiry = [dtTrade + int(t*365)*oneDay for t in df_option['T']]
    df_option['dtExpiry'] = dtExpiry

    options = []
    for index, row in df_option.T.iteritems():

        strike = row['K']
        if (strike/spot.value > 1.3) | (strike/spot.value < .7):
            continue

        days = int(365*row['T'])
        maturity = Period(days, Days)

        options.append(
                HestonModelHelper(
                    maturity, calendar, spot.value,
                    strike, SimpleQuote(row['VB']),
                    risk_free_ts, dividend_ts,
                    ImpliedVolError))
        
        options.append(
                HestonModelHelper(
                    maturity, calendar, spot.value,
                    strike, SimpleQuote(row['VA']),
                    risk_free_ts, dividend_ts,
                    ImpliedVolError))

    return {'options':options, 'spot': spot}
MainDF = DataFrame()
for sample in samplesAll:

    subDirectory = os.path.join(datad, sample)
    covrFileName = sample + "_" + chrm + "_part_" + str(part) + ".pickle"
    covrFilePath = os.path.join(subDirectory, covrFileName)

    try:
        with open(covrFilePath, "rb") as inp:
            data = cPickle.load(inp)
    except IOError, e:
        print e

    coverages = data["coverages"]
    varIndices = data["indices"]

    DF_temp = DataFrame({sample: coverages}, index=varIndices)
    MainDF = pd.concat([MainDF, DF_temp], axis=1)


subDF = MainDF.filter(items=samplesMed)
medians = subDF.median(axis=1)
medFile = os.path.join(outd, "Medians", "median_coverages_for_chrm_" + chrm + "_part" + part + ".csv")
medians.to_csv(medFile)

outf1 = os.path.join(outd, "Whites", "coverages_for_chrm_" + chrm + "_part" + part + "_whites_only.csv")
subDF.to_csv(outf1)
outf2 = os.path.join(outd, "All", "coverages_for_chrm_" + chrm + "_part" + part + "_all_samples.csv")
MainDF.to_csv(outf2)
Example #25
0
def heston_calibration(df_option, ival=None):
    """
    calibrate heston model
    """

    # extract rates and div yields from the data set    
    df_tmp = DataFrame.filter(df_option, items=['dtExpiry', 'iRate', 'iDiv'])
    grouped = df_tmp.groupby('dtExpiry')

    def aggregate(serie):
        return serie[serie.index[0]]

    df_rates = grouped.agg(aggregate)

    # Get first index:
    first_index = 0

    dtTrade = df_option['dtTrade'][first_index]
    # back out the spot from any forward
    iRate = df_option['iRate'][first_index]
    iDiv = df_option['iDiv'][first_index]
    TTM = df_option['TTM'][first_index]
    Fwd = df_option['Fwd'][first_index]
    spot = SimpleQuote(Fwd*np.exp(-(iRate-iDiv)*TTM))
    print('Spot: %f risk-free rate: %f div. yield: %f' % (spot.value, iRate, iDiv))

    # build array of option helpers
    hh = heston_helpers(spot, df_option, dtTrade, df_rates)
    options = hh['options']
    spot = hh['spot']

    risk_free_ts = dfToZeroCurve(df_rates['iRate'], dtTrade)
    dividend_ts = dfToZeroCurve(df_rates['iDiv'], dtTrade)

    # initial values for parameters
    if ival is None:
        ival = {'v0': 0.1, 'kappa': 1.0, 'theta': 0.1,
        'sigma': 0.5, 'rho': -.5}

    process = HestonProcess(
        risk_free_ts, dividend_ts, spot, ival['v0'], ival['kappa'],
         ival['theta'], ival['sigma'], ival['rho'])

    model = HestonModel(process)
    engine = AnalyticHestonEngine(model, 64)

    for option in options:
        option.set_pricing_engine(engine)

    om = LevenbergMarquardt(1e-8, 1e-8, 1e-8)
    model.calibrate(
        options, om, EndCriteria(400, 40, 1.0e-8, 1.0e-8, 1.0e-8)
    )

    print('model calibration results:')
    print('v0: %f kappa: %f theta: %f sigma: %f rho: %f' %
          (model.v0, model.kappa, model.theta, model.sigma,
           model.rho))

    calib_error = (1.0/len(options)) * sum(
        [pow(o.calibration_error()*100.0,2) for o in options])

    print('SSE: %f' % calib_error)

    # merge the fitted volatility and the input data set
    return merge_df(df_option, options, 'Heston')
Example #26
0
def ATM_Vol(premium, discountFactor, forward, strike):
    """
    Aproximate std dev, for calls close to the money
    """
    vol = (premium/discountFactor - .5*(forward-strike))*5.0/(forward+strike) 

    return vol

    # get spot and option data frame
    
    (spot, optionDataFrame) = read_SPX_file(option_data_file)

    grouped = optionDataFrame.groupby('dtExpiry') 

    isFirst = True
    for spec, group in grouped:
        print('processing group %s' % spec)

        # implied vol for this type/expiry group

        indx = group.index
        
        dtTrade = group['dtTrade'][indx[0]]
        dtExpiry = group['dtExpiry'][indx[0]]
        daysToExpiry = (dtExpiry-dtTrade).days
        timeToMaturity = daysToExpiry/365.0

        # exclude groups with too few data points 
        # or too short maturity

        if timeToMaturity < tMin:
            continue
            
        # valid call and put quotes
        df_call = group[(group['Type'] == 'C') & (group['Bid']>0) \
                    & (group['Ask']>0)]
        df_put = group[(group['Type'] == 'P') &  (group['Bid']>0) \
                    & (group['Ask']>0)]
        if (len(df_call) == 0) | (len(df_put) == 0):
            continue

        # calculate forward, implied interest rate and implied div. yield
            
        df_call['Mid'] = (df_call['Bid']+df_call['Ask'])/2
        df_put['Mid'] = (df_put['Bid']+df_put['Ask'])/2
    
        df_C = DataFrame.filter(df_call, items=['Strike', 'Mid'])
        df_C.columns = ['Strike', 'PremiumC']
        to_join = DataFrame(df_put['Mid'], index=df_put['Strike'],
            columns=['PremiumP']) 

        # use 'inner' join because some strikes are not quoted for C and P
        df_all = df_C.join(to_join, on='Strike', how='inner')
    
        df_all['C-P'] = df_all['PremiumC'] - df_all['PremiumP']
    
        model = ols(y=df_all['C-P'], x=df_all.ix[:,'Strike'])
        b = model.beta 
    
        # intercept is last coef
        iRate = -np.log(-b[0])/timeToMaturity
        dRate = np.log(spot/b[1])/timeToMaturity
        discountFactor = np.exp(-iRate*timeToMaturity)
        Fwd = spot * np.exp((iRate-dRate)*timeToMaturity)

        print('Fwd: %f int rate: %f div yield: %f' % (Fwd, iRate, dRate))

        # interpolate ATM premium and vol: used to compute Quick Delta
        f_call = interp1d(df_all['Strike'].values, df_all['PremiumC'].values)
        f_put = interp1d(df_all['Strike'].values, df_all['PremiumP'].values)

        atmPremium = (f_call(Fwd)+f_put(Fwd))/2
        atmVol = blackFormulaImpliedStdDev('C', strike=Fwd,
                 forward=Fwd, blackPrice=atmPremium,
                 discount=discountFactor,
                 TTM=timeToMaturity)/np.sqrt(timeToMaturity)
                    
        print('ATM vol: %f' % atmVol)

        # Quick Delta, computed with ATM vol
        rv = norm()
        df_call['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_call['Strike']]
        df_put['QuickDelta'] = [rv.cdf(np.log(Fwd/strike)/(atmVol*np.sqrt(timeToMaturity))) \
        for strike in df_put['Strike']]

        # implied bid/ask vol for all options
    
        def impvol(strike, premium):
            try:
                vol = blackFormulaImpliedStdDev(cp, strike,
                    forward=Fwd, blackPrice=premium, discount=discountFactor,
                    TTM=timeToMaturity)
            except:
                vol = np.nan
                return vol/np.sqrt(timeToMaturity)
        
        cp = 'C'
        df_call['IVBid'] = [impvol(strike, price) for strike, price in zip(df_call['Strike'], df_call['Bid'])]
    df_call['IVAsk'] = [impvol(strike, price) for strike, price in zip(df_call['Strike'], df_call['Ask'])]
    # QD computed with ATM vol 
        
    cp = 'P'
    df_put['IVBid'] = [impvol(strike, price) for strike, price in zip(df_put['Strike'], df_put['Bid'])]
    df_put['IVAsk'] = [impvol(strike, price) for strike, price in zip(df_put['Strike'], df_put['Ask'])]

    # keep OTM data for options within QD range
    
    df_call = df_call[  (df_call['Strike'] >= Fwd) & \
                        (df_call['QuickDelta'] >= QDMin) & \
                        (df_call['QuickDelta'] <= QDMax) ]
                        
    df_put = df_put[  (df_put['Strike'] < Fwd) & \
                        (df_put['QuickDelta'] >= QDMin) & \
                        (df_put['QuickDelta'] <= QDMax) ]

    # final assembly...

    df_cp = df_call.append(df_put,  ignore_index=True)
    df_cp['R'] = iRate 
    df_cp['D'] = dRate 
    df_cp['ATMVol'] = atmVol 
    df_cp['F'] = Fwd
    df_cp['T'] = timeToMaturity
    df_cp = df_cp.rename(columns=
                         {'IVBid': 'VB',
                          'IVAsk': 'VA',
                          'Strike': 'K'})
    df_cp['CP'] = [1 if t == 'C' else -1 for t in df_cp['Type']]
                         
    if isFirst:
        df_final = df_cp
        isFirst = False 
    else:
        df_final = df_final.append(df_cp, ignore_index=True)
Example #27
0
    def test_filter_unicode(self, name, expected):
        # GH13101
        df = DataFrame({u'a': [1, 2], u'あ': [3, 4]})

        assert_frame_equal(df.filter(like=name), expected)
        assert_frame_equal(df.filter(regex=name), expected)
Example #28
0
    df_cp = df_call.append(df_put,  ignore_index=True)
    df_cp['R'] = iRate 
    df_cp['D'] = dRate 
    df_cp['ATMVol'] = atmVol 
    df_cp['F'] = Fwd
    df_cp['T'] = timeToMaturity
    df_cp = df_cp.rename(columns=
                         {'IVBid': 'VB',
                          'IVAsk': 'VA',
                          'Strike': 'K'})
    df_cp['CP'] = [1 if t == 'C' else -1 for t in df_cp['Type']]
                         
    if isFirst:
        df_final = df_cp
        isFirst = False 
    else:
        df_final = df_final.append(df_cp, ignore_index=True)
        
df_final.to_csv(calibration_data_file, index=False)

df_final.save('data/df_final.pkl')

# save term structure of dividends and rate: first item in each expiry group   
df_tmp = DataFrame.filter(df_final, items=['dtExpiry', 'R', 'D'])
grouped = df_tmp.groupby('dtExpiry')
df_rates = grouped.agg(lambda x: x[0])
   
df_rates.to_csv(rate_div_file)
df_rates.save('data/df_rates.pkl')
Example #29
0
    def test_filter(self):
        # Items
        filtered = self.frame.filter(['A', 'B', 'E'])
        assert len(filtered.columns) == 2
        assert 'E' not in filtered

        filtered = self.frame.filter(['A', 'B', 'E'], axis='columns')
        assert len(filtered.columns) == 2
        assert 'E' not in filtered

        # Other axis
        idx = self.frame.index[0:4]
        filtered = self.frame.filter(idx, axis='index')
        expected = self.frame.reindex(index=idx)
        tm.assert_frame_equal(filtered, expected)

        # like
        fcopy = self.frame.copy()
        fcopy['AA'] = 1

        filtered = fcopy.filter(like='A')
        assert len(filtered.columns) == 2
        assert 'AA' in filtered

        # like with ints in column names
        df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B'])
        filtered = df.filter(like='_')
        assert len(filtered.columns) == 2

        # regex with ints in column names
        # from PR #10384
        df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C'])
        expected = DataFrame(
            0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object))
        filtered = df.filter(regex='^[0-9]+$')
        tm.assert_frame_equal(filtered, expected)

        expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1'])
        # shouldn't remove anything
        filtered = expected.filter(regex='^[0-9]+$')
        tm.assert_frame_equal(filtered, expected)

        # pass in None
        with pytest.raises(TypeError, match='Must pass'):
            self.frame.filter()
        with pytest.raises(TypeError, match='Must pass'):
            self.frame.filter(items=None)
        with pytest.raises(TypeError, match='Must pass'):
            self.frame.filter(axis=1)

        # test mutually exclusive arguments
        with pytest.raises(TypeError, match='mutually exclusive'):
            self.frame.filter(items=['one', 'three'], regex='e$', like='bbi')
        with pytest.raises(TypeError, match='mutually exclusive'):
            self.frame.filter(items=['one', 'three'], regex='e$', axis=1)
        with pytest.raises(TypeError, match='mutually exclusive'):
            self.frame.filter(items=['one', 'three'], regex='e$')
        with pytest.raises(TypeError, match='mutually exclusive'):
            self.frame.filter(items=['one', 'three'], like='bbi', axis=0)
        with pytest.raises(TypeError, match='mutually exclusive'):
            self.frame.filter(items=['one', 'three'], like='bbi')

        # objects
        filtered = self.mixed_frame.filter(like='foo')
        assert 'foo' in filtered

        # unicode columns, won't ascii-encode
        df = self.frame.rename(columns={'B': u('\u2202')})
        filtered = df.filter(like='C')
        assert 'C' in filtered