df_single = pd.DataFrame(columns=cols)
for root_sym in generic_futures_hist_prices_dict.keys():
    try:
        hist_series = generic_futures_hist_prices_dict[root_sym][root_sym +
                                                                 '1'].copy()
        hist_series.dropna(inplace=True)
        hist_series = hist_series[(hist_series.index[-1] +
                                   timedelta(days=-365 * 5)):]  # last 5 years

        meta_data = futures_contracts_meta_df[futures_contracts_meta_df['Root']
                                              == root_sym]
        meta_data.sort_values('Last_Trade_Date', inplace=True)

        row_dict = {}
        row_dict['Name'] = futures_meta_df.loc[root_sym, "NAME"][:-6]
        row_dict['Contract'] = get_futures_chain(
            meta_data, hist_series.index[-1]).index[0]
        row_dict['Price'] = round(hist_series.iloc[-1], 4)
        row_dict['Chg'] = round(
            (hist_series.iloc[-1] / hist_series.iloc[-2] - 1.0) * 100.0, 4)
        try:
            row_dict['1MChg'] = round(
                (hist_series.iloc[-1] / hist_series.iloc[-22] - 1.0) * 100.0,
                4)
        except:
            row_dict['1MChg'] = None
        row_dict['High'] = round(np.max(hist_series.dropna().to_numpy()), 4)
        row_dict['Low'] = round(np.min(hist_series.dropna().to_numpy()), 4)
        row_dict['Avg'] = round(np.average(hist_series.dropna().to_numpy()), 4)
        row_dict['SD'] = round(np.std(hist_series.dropna().to_numpy()), 4)
        hist_return = hist_series / hist_series.shift(1) - 1
        row_dict['EWMA'] = round(
Esempio n. 2
0
def download_futures_hist_prices_from_quandl() -> None:
    """
    TODO: should use ZC, ZS, ZW
    :return:
    """
    # start_date = datetime(2017, 1, 1)
    end_date = datetime.today()
    start_date = end_date + timedelta(days=-75)

    df_futures_meta = pd.read_csv(os.path.join(global_settings.root_path, 'data/config/futures_meta.csv'), index_col=0)
    df_futures_meta = df_futures_meta[~np.isnan(df_futures_meta['QuandlMultiplier'])]
    df_futures_contracts_meta = pd.read_csv(os.path.join(global_settings.root_path, 'data/config/futures_contract_meta.csv'), index_col=0, keep_default_na=False)
    df_futures_contracts_meta['Last_Trade_Date'] = pd.to_datetime(df_futures_contracts_meta['Last_Trade_Date'])

    futures_hist_prices_dict = dict()
    if os.path.isfile(os.path.join(global_settings.root_path, 'data/futures_historical_prices.h5')):
        with h5py.File(os.path.join(global_settings.root_path, 'data/futures_historical_prices.h5'), 'r') as f:
            for k in f.keys():
                futures_hist_prices_dict[k] = None

    for row_idx, row in df_futures_meta.iterrows():
        quandl_ticker = row['Quandl']
        quandl_multiplier = row['QuandlMultiplier']
        if not isinstance(quandl_ticker, str):             # empty is type(np.nan) == float
            continue
        if quandl_multiplier == 0:
            continue

        # download new dataset, combine with old dataset
        df_hist_prices = pd.DataFrame()
        try:
            # find all eligible contracts
            df_futures_contract_meta = df_futures_contracts_meta[df_futures_contracts_meta['Root'] == row_idx].copy()
            df_futures_contract_meta.sort_values('Last_Trade_Date', inplace=True)
            df_futures_contract_meta = get_futures_chain(df_futures_contract_meta,  start_date)

            for row_idx2, row2 in df_futures_contract_meta.iterrows():
                if row_idx == 'UX':      # directly from CBOE
                    try:
                        # https://markets.cboe.com/us/futures/market_statistics/historical_data/
                        url = fr'https://markets.cboe.com/us/futures/market_statistics/historical_data/products/csv/VX/{row2["Last_Trade_Date"].strftime("%Y-%m-%d")}/'
                        r = requests.get(url, stream=True)
                        data = r.content.decode('utf8')
                        df = pd.read_csv(io.StringIO(data))
                        df.set_index('Trade Date', inplace=True)
                        df = df['Settle']
                        df.name = row_idx2
                        df.index = pd.to_datetime(df.index)
                        df.sort_index(ascending=True, inplace=True)
                        df_hist_prices = pd.concat([df_hist_prices, df], axis=1, join='outer', sort=True)

                        logging.debug('Contract {} is downloaded'.format(row_idx2))
                    except:
                        logging.error('Contract {} is missing'.format(row_idx2))
                else:
                    try:
                        quandl_contract = quandl_ticker[:-5] + row_idx2[-5:]
                        df = quandl.get(quandl_contract, start_date=start_date, end_date=end_date,
                                        qopts={'columns': ['Settle']}, authtoken=global_settings.quandl_auth)
                        try:
                            df = df['Settle']
                        except:
                            df = df['Last']
                            df.name = 'Settle'
                        df.name = row_idx2
                        if not np.isnan(quandl_multiplier):         # consistent with Bloomberg
                            df = df * quandl_multiplier
                        df_hist_prices = pd.concat([df_hist_prices, df], axis=1, join='outer', sort=True)

                        logging.debug('Contract {} is downloaded'.format(row_idx2))
                    except:
                        logging.error('Contract {} is missing'.format(row_idx2))

                time.sleep(3)

            # update existing dataset
            if row_idx in futures_hist_prices_dict.keys():
                df_old = pd.read_hdf(os.path.join(global_settings.root_path, 'data/futures_historical_prices.h5'), key=row_idx)
                df_hist_prices = df_hist_prices.combine_first(df_old)

            df_hist_prices.sort_index(inplace=True)
            df_hist_prices.to_hdf(os.path.join(global_settings.root_path, 'data/futures_historical_prices.h5'), key=row_idx)
            logging.debug('{} is download'.format(row_idx))
        except:
            logging.error('{} failed to download'.format(row_idx))
Esempio n. 3
0
def construct_curve_spread_fly():
    # cache_dir = os.path.dirname(os.path.realpath(__file__))
    futures_meta_df, futures_contracts_meta_df, inter_comdty_spread_meta_df, inter_comdty_spread_contracts_meta_df = data_loader.load_futures_meta_data(
    )
    futures_hist_prices_dict = data_loader.load_futures_hist_prices()
    generic_futures_hist_prices_dict = data_loader.load_comdty_generic_hist_prices(
    )
    inter_comdty_spread_hist_data_dict = data_loader.load_inter_comdty_spread_hist_prices(
    )
    generic_inter_comdty_hist_prices_dict = data_loader.load_inter_comdty_generic_hist_prices(
    )

    combined_root_syms = list(generic_futures_hist_prices_dict.keys())
    combined_root_syms.extend(
        list(generic_inter_comdty_hist_prices_dict.keys()))

    # get spread/fly for outright and inter-comdty-spread
    for sym_root in combined_root_syms:
        if ':' in sym_root:
            hist_data = inter_comdty_spread_hist_data_dict[sym_root]
            meta_data = inter_comdty_spread_contracts_meta_df[
                inter_comdty_spread_contracts_meta_df['Root'] == sym_root]
            meta_data.sort_values('Last_Trade_Date', inplace=True)
            generic_data = generic_inter_comdty_hist_prices_dict[sym_root]
        else:
            hist_data = futures_hist_prices_dict[sym_root]
            meta_data = futures_contracts_meta_df[
                futures_contracts_meta_df['Root'] == sym_root]
            meta_data.sort_values('Last_Trade_Date', inplace=True)
            generic_data = generic_futures_hist_prices_dict[sym_root]

        try:
            asofdate = hist_data.index[-1]
        except:  # probably no data
            continue

        meta_data = get_futures_chain(meta_data, asofdate)

        # get spread combos
        spread_combos = []
        tenors = range(1, generic_data.shape[1] + 1)
        for i in tenors:
            for j in tenors:
                spread = j - i
                if i <= 24 and j > i and spread <= 12:
                    spread_combos.append((i, j))

        fly_combos = []
        tenors = range(1, generic_data.shape[1] + 1)
        for i in tenors:
            for j in tenors:
                spread1 = j - i
                for k in tenors:
                    spread2 = k - j
                    if i <= 24 and j > i and k > j and spread1 <= 12 and spread2 <= 12 and spread1 == spread2:
                        fly_combos.append((
                            i,
                            j,
                            k,
                        ))

        cols_spread = [
            'Name', 'Leg1', 'Leg2', 'Leg1 Actual', 'Leg2 Actual', 'Spread',
            'Spread Prcnt', 'RD Prcnt', 'Spread Z-Score', 'RD Z-Score'
        ]
        df_spread_stats = pd.DataFrame(columns=cols_spread)
        for i in range(len(spread_combos)):
            row_dict = {}
            # extract individual CM time series tickers for combo [i]
            try:
                legA = generic_data[sym_root + str(spread_combos[i][0])]
                legB = generic_data[sym_root + str(spread_combos[i][1])]
            except:
                logging.error('{} {} skipped'.format(sym_root,
                                                     spread_combos[i]))
                continue

            try:
                legA_RD = generic_data[sym_root + str(spread_combos[i][0] - 1)]
                legB_RD = generic_data[sym_root + str(spread_combos[i][1] - 1)]
                merged = pd.concat([legA, legB, legA_RD, legB_RD],
                                   axis=1).dropna(axis=0, how='any')
            except:  # front month has no roll down
                legA_RD = None
                legB_RD = None
                merged = pd.concat([legA, legB], axis=1).dropna(axis=0,
                                                                how='any')

            try:
                merged['SpreadLevel'] = merged.iloc[:, 0] - merged.iloc[:, 1]
                current_spread_level = merged.iloc[-1]['SpreadLevel']
                percentile_spread = stats.percentileofscore(
                    merged['SpreadLevel'], current_spread_level, kind='mean')
                stdev_pread = np.std(merged['SpreadLevel'])
                mean_spread = np.average(merged['SpreadLevel'])
                z_spread = (current_spread_level - mean_spread) / stdev_pread
                if legA_RD is not None:
                    merged['RolledDownLevel'] = merged.iloc[:,
                                                            2] - merged.iloc[:,
                                                                             3]
                    percentile_RD = stats.percentileofscore(
                        merged['RolledDownLevel'],
                        current_spread_level,
                        kind='mean')
                    stdev_RD = np.std(merged['RolledDownLevel'])
                    mean_RD = np.average(merged['RolledDownLevel'])
                    z_RD = (current_spread_level - mean_RD) / stdev_RD
                else:
                    percentile_RD = np.NaN
                    z_RD = np.NaN

                row_dict['Name'] = sym_root
                row_dict['Leg1'] = spread_combos[i][0]
                row_dict['Leg2'] = spread_combos[i][1]
                row_dict['Leg1 Actual'] = get_futures_actual_ticker(
                    meta_data, legA.name)
                row_dict['Leg2 Actual'] = get_futures_actual_ticker(
                    meta_data, legB.name)
                row_dict['Spread'] = round(current_spread_level, 4)
                row_dict['Spread Prcnt'] = round(percentile_spread, 4)
                row_dict['RD Prcnt'] = round(percentile_RD, 4)
                row_dict['Spread Z-Score'] = round(z_spread, 4)
                row_dict['RD Z-Score'] = round(z_RD, 4)

                df_2 = pd.DataFrame(row_dict, index=[spread_combos[i]])
                df_spread_stats = df_spread_stats.append(df_2)
                logging.info('spread {} {} finished'.format(
                    sym_root, spread_combos[i]))
            except:
                logging.error('spread {} {} failed'.format(
                    sym_root, spread_combos[i]))

        df_spread_stats.to_hdf(os.path.join(global_settings.root_path,
                                            'data/spread_scores.h5'),
                               key=sym_root)

        cols_fly = [
            'Name', 'Leg1', 'Leg2', 'Leg3', 'Leg1 Actual', 'Leg2 Actual',
            'Leg3 Actual', 'Fly', 'Fly Prcnt', 'RD Prcnt', 'Fly Z-Score',
            'RD Z-Score'
        ]
        df_fly_stats = pd.DataFrame(columns=cols_fly)
        for i in range(len(fly_combos)):
            row_dict = {}
            # extract individual CM time series tickers for combo [i]
            try:
                legA = generic_data[sym_root + str(fly_combos[i][0])]
                legB = generic_data[sym_root + str(fly_combos[i][1])]
                legC = generic_data[sym_root + str(fly_combos[i][2])]
            except:
                logging.error('{} {} skipped'.format(sym_root, fly_combos[i]))
                continue

            try:
                legA_RD = generic_data[sym_root + str(fly_combos[i][0] - 1)]
                legB_RD = generic_data[sym_root + str(fly_combos[i][1] - 1)]
                legC_RD = generic_data[sym_root + str(fly_combos[i][2] - 1)]
                merged = pd.concat(
                    [legA, legB, legC, legA_RD, legB_RD, legC_RD],
                    axis=1).dropna(axis=0, how='any')
            except:  # front month has no roll down
                legA_RD = None
                legB_RD = None
                legC_RD = None
                merged = pd.concat([legA, legB, legC],
                                   axis=1).dropna(axis=0, how='any')

            try:
                merged[
                    'FlyLevel'] = merged.iloc[:,
                                              0] - 2.0 * merged.iloc[:,
                                                                     1] + merged.iloc[:,
                                                                                      2]
                current_fly_level = merged.iloc[-1]['FlyLevel']
                percentile_fly = stats.percentileofscore(merged['FlyLevel'],
                                                         current_fly_level,
                                                         kind='mean')
                stdev_fly = np.std(merged['FlyLevel'])
                mean_fly = np.average(merged['FlyLevel'])
                z_fly = (current_fly_level - mean_fly) / stdev_fly
                if legA_RD is not None:
                    merged[
                        'RolledDownLevel'] = merged.iloc[:,
                                                         3] - 2.0 * merged.iloc[:,
                                                                                4] + merged.iloc[:,
                                                                                                 5]
                    percentile_RD = stats.percentileofscore(
                        merged['RolledDownLevel'],
                        current_fly_level,
                        kind='mean')
                    stdev_RD = np.std(merged['RolledDownLevel'])
                    mean_RD = np.average(merged['RolledDownLevel'])
                    z_RD = (current_fly_level - mean_RD) / stdev_RD
                else:
                    percentile_RD = np.NaN
                    z_RD = np.NaN

                row_dict['Name'] = sym_root
                row_dict['Leg1'] = fly_combos[i][0]
                row_dict['Leg2'] = fly_combos[i][1]
                row_dict['Leg3'] = fly_combos[i][2]
                row_dict['Leg1 Actual'] = get_futures_actual_ticker(
                    meta_data, legA.name)
                row_dict['Leg2 Actual'] = get_futures_actual_ticker(
                    meta_data, legB.name)
                row_dict['Leg3 Actual'] = get_futures_actual_ticker(
                    meta_data, legC.name)
                row_dict['Fly'] = round(current_fly_level, 4)
                row_dict['Fly Prcnt'] = round(percentile_fly, 4)
                row_dict['RD Prcnt'] = round(percentile_RD, 4)
                row_dict['Fly Z-Score'] = round(z_fly, 4)
                row_dict['RD Z-Score'] = round(z_RD, 4)

                df_2 = pd.DataFrame(row_dict, index=[fly_combos[i]])
                df_fly_stats = df_fly_stats.append(df_2)
                logging.info('fly {} {} finished'.format(
                    sym_root, fly_combos[i]))
            except:
                logging.error('fly {} {} failed'.format(
                    sym_root, fly_combos[i]))

        df_fly_stats.to_hdf(os.path.join(global_settings.root_path,
                                         'data/fly_scores.h5'),
                            key=sym_root)