def get_summary(**kwargs): symbol1 = kwargs['symbol1'] symbol2 = kwargs['symbol2'] report_date = kwargs['report_date'] if 'get_diagnosticQ' in kwargs.keys(): get_diagnosticQ = kwargs['get_diagnosticQ'] else: get_diagnosticQ = False report_datetime = cu.convert_doubledate_2datetime(report_date) data1 = gsp.get_stock_price_preloaded(ticker=symbol1, data_source='iex', settle_date_to = report_date) data2 = gsp.get_stock_price_preloaded(ticker=symbol2, data_source='iex', settle_date_to = report_date) merged_data = pd.merge(data1[['close','settle_datetime']], data2[['close','settle_datetime']], how='inner', on='settle_datetime') split = int(len(merged_data) * .4) if split<200 or report_datetime!=merged_data['settle_datetime'].iloc[-1]: return {'price1': np.nan,'price2': np.nan, 'p_value_2': np.nan,'p_value_1': np.nan, 'beta_1': np.nan, 'beta_2': np.nan, 'corr': np.nan, 'cagr1': np.nan, 'cagr2': np.nan, 'kalpha': np.nan, 'kbeta': np.nan, 'meanSpread': np.nan, 'stdSpread': np.nan, 'zScore': np.nan} training_data = merged_data[:split] test_data = merged_data[split:] cointegration_output_2 = sm.tsa.stattools.coint(training_data['close_x'], training_data['close_y']) cointegration_output_1 = sm.tsa.stattools.coint(test_data['close_x'], test_data['close_y']) regress_output_1 = ss.get_regression_results({'y': test_data['close_y'].values, 'x': test_data['close_x'].values}) regress_output_2 = ss.get_regression_results({'y': training_data['close_y'].values, 'x': training_data['close_x'].values}) regress_output_3 = ss.get_regression_results({'y': test_data['close_y'].diff().values, 'x': test_data['close_x'].diff().values}) merged_data.set_index('settle_datetime', drop=True, inplace=True) backtest_output_1 = backtest(merged_data[split:], 'close_x', 'close_y') backtest_output_2 = backtest(merged_data[:split], 'close_x', 'close_y') if get_diagnosticQ: return {'backtest_output': backtest_output_1, 'cagr2': backtest_output_2['cagr']} else: return {'price1': merged_data['close_x'].iloc[-1],'price2': merged_data['close_y'].iloc[-1], 'p_value_2': cointegration_output_2[1],'p_value_1': cointegration_output_1[1], 'beta_1': regress_output_1['beta'], 'beta_2': regress_output_2['beta'], 'corr': np.sqrt(regress_output_3['rsquared']/100), 'cagr1': backtest_output_1['cagr'], 'cagr2': backtest_output_2['cagr'], 'kalpha': backtest_output_1['kalpha'], 'kbeta': backtest_output_1['kbeta'], 'meanSpread': backtest_output_1['meanSpread'], 'stdSpread': backtest_output_1['stdSpread'], 'zScore': backtest_output_1['zScore']}
def time_series_regression(**kwargs): data_frame_input = kwargs['data_frame_input'] num_obs = kwargs['num_obs'] data_frame_input = data_frame_input.iloc[-num_obs:] return stats.get_regression_results({ 'x': range(num_obs), 'y': data_frame_input[kwargs['y_var_name']], 'clean_num_obs': num_obs })
def get_time_series_based_return_forecast(**kwargs): data_frame_input = kwargs['data_frame_input'] regression_window = kwargs['regression_window'] forecast_window1 = kwargs['forecast_window1'] forecast_window2 = kwargs['forecast_window2'] regress_output_list = [ stats.get_regression_results({ 'x': range(regression_window), 'y': data_frame_input['close'].iloc[(i - regression_window + 1):(i + 1)].values, 'clean_num_obs': regression_window }) for i in range(regression_window - 1, len(data_frame_input.index)) ] data_frame_input['alpha'] = np.nan data_frame_input['beta'] = np.nan data_frame_input['alpha'].iloc[(regression_window - 1):] = [ x['alpha'] for x in regress_output_list ] data_frame_input['beta'].iloc[(regression_window - 1):] = [ x['beta'] for x in regress_output_list ] data_frame_input['tsf_' + str(regression_window) + '_' + str(forecast_window1)] = \ (100*(data_frame_input['alpha'] + data_frame_input['beta'] * (regression_window + forecast_window1))/(data_frame_input['close']))-100 data_frame_input['tsf_' + str(regression_window) + '_' + str(forecast_window2)] = \ (100*(data_frame_input['alpha'] + data_frame_input['beta'] * (regression_window + forecast_window2))/(data_frame_input['close']))-100 return data_frame_input.drop(['alpha', 'beta'], 1, inplace=False)
def get_intraday_spread_signals(**kwargs): ticker_list = kwargs['ticker_list'] date_to = kwargs['date_to'] #print(ticker_list) ticker_list = [x for x in ticker_list if x is not None] ticker_head_list = [ cmi.get_contract_specs(x)['ticker_head'] for x in ticker_list ] ticker_class_list = [cmi.ticker_class[x] for x in ticker_head_list] #print('-'.join(ticker_list)) if 'tr_dte_list' in kwargs.keys(): tr_dte_list = kwargs['tr_dte_list'] else: tr_dte_list = [ exp.get_days2_expiration(ticker=x, date_to=date_to, instrument='futures')['tr_dte'] for x in ticker_list ] if 'aggregation_method' in kwargs.keys( ) and 'contracts_back' in kwargs.keys(): aggregation_method = kwargs['aggregation_method'] contracts_back = kwargs['contracts_back'] else: amcb_output = [ opUtil.get_aggregation_method_contracts_back( cmi.get_contract_specs(x)) for x in ticker_list ] aggregation_method = max( [x['aggregation_method'] for x in amcb_output]) contracts_back = min([x['contracts_back'] for x in amcb_output]) if 'futures_data_dictionary' in kwargs.keys(): futures_data_dictionary = kwargs['futures_data_dictionary'] else: futures_data_dictionary = { x: gfp.get_futures_price_preloaded(ticker_head=x) for x in list(set(ticker_head_list)) } if 'use_last_as_current' in kwargs.keys(): use_last_as_current = kwargs['use_last_as_current'] else: use_last_as_current = True if 'datetime5_years_ago' in kwargs.keys(): datetime5_years_ago = kwargs['datetime5_years_ago'] else: date5_years_ago = cu.doubledate_shift(date_to, 5 * 365) datetime5_years_ago = cu.convert_doubledate_2datetime(date5_years_ago) if 'num_days_back_4intraday' in kwargs.keys(): num_days_back_4intraday = kwargs['num_days_back_4intraday'] else: num_days_back_4intraday = 10 contract_multiplier_list = [ cmi.contract_multiplier[x] for x in ticker_head_list ] aligned_output = opUtil.get_aligned_futures_data( contract_list=ticker_list, tr_dte_list=tr_dte_list, aggregation_method=aggregation_method, contracts_back=contracts_back, date_to=date_to, futures_data_dictionary=futures_data_dictionary, use_last_as_current=use_last_as_current) aligned_data = aligned_output['aligned_data'] current_data = aligned_output['current_data'] if ticker_head_list in fixed_weight_future_spread_list: weights_output = sutil.get_spread_weights_4contract_list( ticker_head_list=ticker_head_list) spread_weights = weights_output['spread_weights'] portfolio_weights = weights_output['portfolio_weights'] else: regress_output = stats.get_regression_results({ 'x': aligned_data['c2']['change_1'][-60:], 'y': aligned_data['c1']['change_1'][-60:] }) spread_weights = [1, -regress_output['beta']] portfolio_weights = [ 1, -regress_output['beta'] * contract_multiplier_list[0] / contract_multiplier_list[1] ] aligned_data['spread'] = 0 aligned_data['spread_pnl_1'] = 0 aligned_data['spread_pnl1'] = 0 spread_settle = 0 last5_years_indx = aligned_data['settle_date'] >= datetime5_years_ago num_contracts = len(ticker_list) for i in range(num_contracts): aligned_data['spread'] = aligned_data['spread'] + aligned_data[ 'c' + str(i + 1)]['close_price'] * spread_weights[i] spread_settle = spread_settle + current_data[ 'c' + str(i + 1)]['close_price'] * spread_weights[i] aligned_data[ 'spread_pnl_1'] = aligned_data['spread_pnl_1'] + aligned_data[ 'c' + str(i + 1)]['change_1'] * portfolio_weights[ i] * contract_multiplier_list[i] aligned_data[ 'spread_pnl1'] = aligned_data['spread_pnl1'] + aligned_data[ 'c' + str(i + 1)]['change1_instant'] * portfolio_weights[ i] * contract_multiplier_list[i] aligned_data['spread_normalized'] = aligned_data['spread'] / aligned_data[ 'c1']['close_price'] data_last5_years = aligned_data[last5_years_indx] percentile_vector = stats.get_number_from_quantile( y=data_last5_years['spread_pnl_1'].values, quantile_list=[1, 15, 85, 99], clean_num_obs=max(100, round(3 * len(data_last5_years.index) / 4))) downside = (percentile_vector[0] + percentile_vector[1]) / 2 upside = (percentile_vector[2] + percentile_vector[3]) / 2 date_list = [ exp.doubledate_shift_bus_days(double_date=date_to, shift_in_days=x) for x in reversed(range(1, num_days_back_4intraday)) ] date_list.append(date_to) intraday_data = opUtil.get_aligned_futures_data_intraday( contract_list=ticker_list, date_list=date_list) if len(intraday_data.index) == 0: return { 'downside': downside, 'upside': upside, 'intraday_data': intraday_data, 'trading_data': intraday_data, 'spread_weight': spread_weights[1], 'portfolio_weight': portfolio_weights[1], 'z': np.nan, 'recent_trend': np.nan, 'intraday_mean10': np.nan, 'intraday_std10': np.nan, 'intraday_mean5': np.nan, 'intraday_std5': np.nan, 'intraday_mean2': np.nan, 'intraday_std2': np.nan, 'intraday_mean1': np.nan, 'intraday_std1': np.nan, 'aligned_output': aligned_output, 'spread_settle': spread_settle, 'data_last5_years': data_last5_years, 'ma_spread_lowL': np.nan, 'ma_spread_highL': np.nan, 'ma_spread_low': np.nan, 'ma_spread_high': np.nan, 'intraday_sharp': np.nan } intraday_data['time_stamp'] = [ x.to_datetime() for x in intraday_data.index ] intraday_data['settle_date'] = intraday_data['time_stamp'].apply( lambda x: x.date()) end_hour = min([cmi.last_trade_hour_minute[x] for x in ticker_head_list]) start_hour = max( [cmi.first_trade_hour_minute[x] for x in ticker_head_list]) trade_start_hour = dt.time(9, 30, 0, 0) if 'Ag' in ticker_class_list: start_hour1 = dt.time(0, 45, 0, 0) end_hour1 = dt.time(7, 45, 0, 0) selection_indx = [ x for x in range(len(intraday_data.index)) if ((intraday_data['time_stamp'].iloc[x].time() < end_hour1) and (intraday_data['time_stamp'].iloc[x].time() >= start_hour1)) or ((intraday_data['time_stamp'].iloc[x].time() < end_hour) and (intraday_data['time_stamp'].iloc[x].time() >= start_hour)) ] else: selection_indx = [ x for x in range(len(intraday_data.index)) if (intraday_data.index[x].to_datetime().time() < end_hour) and ( intraday_data.index[x].to_datetime().time() >= start_hour) ] intraday_data = intraday_data.iloc[selection_indx] intraday_data['spread'] = 0 for i in range(num_contracts): intraday_data[ 'c' + str(i + 1), 'mid_p'] = (intraday_data['c' + str(i + 1)]['best_bid_p'] + intraday_data['c' + str(i + 1)]['best_ask_p']) / 2 intraday_data['spread'] = intraday_data['spread'] + intraday_data[ 'c' + str(i + 1)]['mid_p'] * spread_weights[i] unique_settle_dates = intraday_data['settle_date'].unique() intraday_data['spread1'] = np.nan for i in range(len(unique_settle_dates) - 1): if (intraday_data['settle_date'] == unique_settle_dates[i]).sum() == \ (intraday_data['settle_date'] == unique_settle_dates[i+1]).sum(): intraday_data.loc[intraday_data['settle_date'] == unique_settle_dates[i],'spread1'] = \ intraday_data['spread'][intraday_data['settle_date'] == unique_settle_dates[i+1]].values intraday_data = intraday_data[intraday_data['settle_date'].notnull()] intraday_mean10 = intraday_data['spread'].mean() intraday_std10 = intraday_data['spread'].std() intraday_data_last5days = intraday_data[ intraday_data['settle_date'] >= cu.convert_doubledate_2datetime( date_list[-5]).date()] intraday_data_last2days = intraday_data[ intraday_data['settle_date'] >= cu.convert_doubledate_2datetime( date_list[-2]).date()] intraday_data_yesterday = intraday_data[intraday_data['settle_date'] == cu.convert_doubledate_2datetime( date_list[-1]).date()] intraday_mean5 = intraday_data_last5days['spread'].mean() intraday_std5 = intraday_data_last5days['spread'].std() intraday_mean2 = intraday_data_last2days['spread'].mean() intraday_std2 = intraday_data_last2days['spread'].std() intraday_mean1 = intraday_data_yesterday['spread'].mean() intraday_std1 = intraday_data_yesterday['spread'].std() intraday_z = (spread_settle - intraday_mean5) / intraday_std5 num_obs_intraday = len(intraday_data.index) num_obs_intraday_half = round(num_obs_intraday / 2) intraday_tail = intraday_data.tail(num_obs_intraday_half) num_positives = sum( intraday_tail['spread'] > intraday_data['spread'].mean()) num_negatives = sum( intraday_tail['spread'] < intraday_data['spread'].mean()) if num_positives + num_negatives != 0: recent_trend = 100 * (num_positives - num_negatives) / (num_positives + num_negatives) else: recent_trend = np.nan intraday_data_shifted = intraday_data.groupby('settle_date').shift(-60) intraday_data['spread_shifted'] = intraday_data_shifted['spread'] intraday_data[ 'delta60'] = intraday_data['spread_shifted'] - intraday_data['spread'] intraday_data['ewma10'] = pd.ewma(intraday_data['spread'], span=10) intraday_data['ewma50'] = pd.ewma(intraday_data['spread'], span=50) intraday_data['ewma200'] = pd.ewma(intraday_data['spread'], span=200) intraday_data['ma40'] = pd.rolling_mean(intraday_data['spread'], 40) intraday_data[ 'ewma50_spread'] = intraday_data['spread'] - intraday_data['ewma50'] intraday_data[ 'ma40_spread'] = intraday_data['spread'] - intraday_data['ma40'] selection_indx = [ x for x in range(len(intraday_data.index)) if (intraday_data['time_stamp'].iloc[x].time() > trade_start_hour) ] selected_data = intraday_data.iloc[selection_indx] selected_data['delta60Net'] = (contract_multiplier_list[0] * selected_data['delta60'] / spread_weights[0]) selected_data.reset_index(drop=True, inplace=True) selected_data['proxy_pnl'] = 0 t_cost = cmi.t_cost[ticker_head_list[0]] ma_spread_low = np.nan ma_spread_high = np.nan ma_spread_lowL = np.nan ma_spread_highL = np.nan intraday_sharp = np.nan if sum(selected_data['ma40_spread'].notnull()) > 30: quantile_list = selected_data['ma40_spread'].quantile([0.1, 0.9]) down_indx = selected_data['ma40_spread'] < quantile_list[0.1] up_indx = selected_data['ma40_spread'] > quantile_list[0.9] up_data = selected_data[up_indx] down_data = selected_data[down_indx] ma_spread_lowL = quantile_list[0.1] ma_spread_highL = quantile_list[0.9] #return {'selected_data':selected_data,'up_data':up_data,'up_indx':up_indx} selected_data.loc[up_indx, 'proxy_pnl'] = (-up_data['delta60Net'] - 2 * num_contracts * t_cost).values selected_data.loc[down_indx, 'proxy_pnl'] = (down_data['delta60Net'] - 2 * num_contracts * t_cost).values short_term_data = selected_data[ selected_data['settle_date'] >= cu.convert_doubledate_2datetime( date_list[-5]).date()] if sum(short_term_data['ma40_spread'].notnull()) > 30: quantile_list = short_term_data['ma40_spread'].quantile([0.1, 0.9]) ma_spread_low = quantile_list[0.1] ma_spread_high = quantile_list[0.9] if selected_data['proxy_pnl'].std() != 0: intraday_sharp = selected_data['proxy_pnl'].mean( ) / selected_data['proxy_pnl'].std() return { 'downside': downside, 'upside': upside, 'intraday_data': intraday_data, 'trading_data': selected_data, 'spread_weight': spread_weights[1], 'portfolio_weight': portfolio_weights[1], 'z': intraday_z, 'recent_trend': recent_trend, 'intraday_mean10': intraday_mean10, 'intraday_std10': intraday_std10, 'intraday_mean5': intraday_mean5, 'intraday_std5': intraday_std5, 'intraday_mean2': intraday_mean2, 'intraday_std2': intraday_std2, 'intraday_mean1': intraday_mean1, 'intraday_std1': intraday_std1, 'aligned_output': aligned_output, 'spread_settle': spread_settle, 'data_last5_years': data_last5_years, 'ma_spread_lowL': ma_spread_lowL, 'ma_spread_highL': ma_spread_highL, 'ma_spread_low': ma_spread_low, 'ma_spread_high': ma_spread_high, 'intraday_sharp': intraday_sharp }
def get_futures_butterfly_signals(**kwargs): ticker_list = kwargs['ticker_list'] date_to = kwargs['date_to'] if 'tr_dte_list' in kwargs.keys(): tr_dte_list = kwargs['tr_dte_list'] else: tr_dte_list = [ exp.get_futures_days2_expiration({ 'ticker': x, 'date_to': date_to }) for x in ticker_list ] if 'aggregation_method' in kwargs.keys( ) and 'contracts_back' in kwargs.keys(): aggregation_method = kwargs['aggregation_method'] contracts_back = kwargs['contracts_back'] else: amcb_output = opUtil.get_aggregation_method_contracts_back( cmi.get_contract_specs(ticker_list[0])) aggregation_method = amcb_output['aggregation_method'] contracts_back = amcb_output['contracts_back'] if 'use_last_as_current' in kwargs.keys(): use_last_as_current = kwargs['use_last_as_current'] else: use_last_as_current = False if 'futures_data_dictionary' in kwargs.keys(): futures_data_dictionary = kwargs['futures_data_dictionary'] else: futures_data_dictionary = { x: gfp.get_futures_price_preloaded(ticker_head=x) for x in [cmi.get_contract_specs(ticker_list[0])['ticker_head']] } if 'contract_multiplier' in kwargs.keys(): contract_multiplier = kwargs['contract_multiplier'] else: contract_multiplier = cmi.contract_multiplier[cmi.get_contract_specs( ticker_list[0])['ticker_head']] if 'datetime5_years_ago' in kwargs.keys(): datetime5_years_ago = kwargs['datetime5_years_ago'] else: date5_years_ago = cu.doubledate_shift(date_to, 5 * 365) datetime5_years_ago = cu.convert_doubledate_2datetime(date5_years_ago) if 'datetime2_months_ago' in kwargs.keys(): datetime2_months_ago = kwargs['datetime2_months_ago'] else: date2_months_ago = cu.doubledate_shift(date_to, 60) datetime2_months_ago = cu.convert_doubledate_2datetime( date2_months_ago) aligned_output = opUtil.get_aligned_futures_data( contract_list=ticker_list, tr_dte_list=tr_dte_list, aggregation_method=aggregation_method, contracts_back=contracts_back, date_to=date_to, futures_data_dictionary=futures_data_dictionary, use_last_as_current=use_last_as_current) if not aligned_output['success']: return {'success': False} current_data = aligned_output['current_data'] aligned_data = aligned_output['aligned_data'] month_diff_1 = 12 * (current_data['c1']['ticker_year'] - current_data['c2']['ticker_year']) + ( current_data['c1']['ticker_month'] - current_data['c2']['ticker_month']) month_diff_2 = 12 * (current_data['c2']['ticker_year'] - current_data['c3']['ticker_year']) + ( current_data['c2']['ticker_month'] - current_data['c3']['ticker_month']) weight_11 = 2 * month_diff_2 / (month_diff_1 + month_diff_1) weight_12 = -2 weight_13 = 2 * month_diff_1 / (month_diff_1 + month_diff_1) price_1 = current_data['c1']['close_price'] price_2 = current_data['c2']['close_price'] price_3 = current_data['c3']['close_price'] linear_interp_price2 = (weight_11 * aligned_data['c1']['close_price'] + weight_13 * aligned_data['c3']['close_price']) / 2 butterfly_price = aligned_data['c1']['close_price'] - 2 * aligned_data[ 'c2']['close_price'] + aligned_data['c3']['close_price'] price_ratio = linear_interp_price2 / aligned_data['c2']['close_price'] linear_interp_price2_current = (weight_11 * price_1 + weight_13 * price_3) / 2 price_ratio_current = linear_interp_price2_current / price_2 q = stats.get_quantile_from_number({ 'x': price_ratio_current, 'y': price_ratio.values, 'clean_num_obs': max(100, round(3 * len(price_ratio.values) / 4)) }) qf = stats.get_quantile_from_number({ 'x': price_ratio_current, 'y': price_ratio.values[-40:], 'clean_num_obs': 30 }) recent_quantile_list = [ stats.get_quantile_from_number({ 'x': x, 'y': price_ratio.values[-40:], 'clean_num_obs': 30 }) for x in price_ratio.values[-40:] ] weight1 = weight_11 weight2 = weight_12 weight3 = weight_13 last5_years_indx = aligned_data['settle_date'] >= datetime5_years_ago last2_months_indx = aligned_data['settle_date'] >= datetime2_months_ago data_last5_years = aligned_data[last5_years_indx] yield1 = 100 * ( aligned_data['c1']['close_price'] - aligned_data['c2']['close_price']) / aligned_data['c2']['close_price'] yield2 = 100 * ( aligned_data['c2']['close_price'] - aligned_data['c3']['close_price']) / aligned_data['c3']['close_price'] yield1_last5_years = yield1[last5_years_indx] yield2_last5_years = yield2[last5_years_indx] yield1_current = 100 * ( current_data['c1']['close_price'] - current_data['c2']['close_price']) / current_data['c2']['close_price'] yield2_current = 100 * ( current_data['c2']['close_price'] - current_data['c3']['close_price']) / current_data['c3']['close_price'] butterfly_price_current = current_data['c1']['close_price']\ -2*current_data['c2']['close_price']\ +current_data['c3']['close_price'] #return {'yield1': yield1, 'yield2': yield2, 'yield1_current':yield1_current, 'yield2_current': yield2_current} yield_regress_output = stats.get_regression_results({ 'x': yield2, 'y': yield1, 'x_current': yield2_current, 'y_current': yield1_current, 'clean_num_obs': max(100, round(3 * len(yield1.values) / 4)) }) yield_regress_output_last5_years = stats.get_regression_results({ 'x': yield2_last5_years, 'y': yield1_last5_years, 'x_current': yield2_current, 'y_current': yield1_current, 'clean_num_obs': max(100, round(3 * len(yield1_last5_years.values) / 4)) }) bf_qz_frame_short = pd.DataFrame() bf_qz_frame_long = pd.DataFrame() if (len(yield1) >= 40) & (len(yield2) >= 40): recent_zscore_list = [ (yield1[-40 + i] - yield_regress_output['alpha'] - yield_regress_output['beta'] * yield2[-40 + i]) / yield_regress_output['residualstd'] for i in range(40) ] bf_qz_frame = pd.DataFrame.from_dict({ 'bf_price': butterfly_price.values[-40:], 'q': recent_quantile_list, 'zscore': recent_zscore_list }) bf_qz_frame = np.round(bf_qz_frame, 8) bf_qz_frame.drop_duplicates(['bf_price'], keep='last', inplace=True) # return bf_qz_frame bf_qz_frame_short = bf_qz_frame[(bf_qz_frame['zscore'] >= 0.6) & (bf_qz_frame['q'] >= 85)] bf_qz_frame_long = bf_qz_frame[(bf_qz_frame['zscore'] <= -0.6) & (bf_qz_frame['q'] <= 12)] if bf_qz_frame_short.empty: short_price_limit = np.NAN else: short_price_limit = bf_qz_frame_short['bf_price'].min() if bf_qz_frame_long.empty: long_price_limit = np.NAN else: long_price_limit = bf_qz_frame_long['bf_price'].max() zscore1 = yield_regress_output['zscore'] rsquared1 = yield_regress_output['rsquared'] zscore2 = yield_regress_output_last5_years['zscore'] rsquared2 = yield_regress_output_last5_years['rsquared'] second_spread_weight_1 = yield_regress_output['beta'] second_spread_weight_2 = yield_regress_output_last5_years['beta'] butterfly_5_change = data_last5_years['c1']['change_5']\ - (1+second_spread_weight_1)*data_last5_years['c2']['change_5']\ + second_spread_weight_1*data_last5_years['c3']['change_5'] butterfly_5_change_current = current_data['c1']['change_5']\ - (1+second_spread_weight_1)*current_data['c2']['change_5']\ + second_spread_weight_1*current_data['c3']['change_5'] butterfly_1_change = data_last5_years['c1']['change_1']\ - (1+second_spread_weight_1)*data_last5_years['c2']['change_1']\ + second_spread_weight_1*data_last5_years['c3']['change_1'] percentile_vector = stats.get_number_from_quantile( y=butterfly_5_change.values, quantile_list=[1, 15, 85, 99], clean_num_obs=max(100, round(3 * len(butterfly_5_change.values) / 4))) downside = contract_multiplier * (percentile_vector[0] + percentile_vector[1]) / 2 upside = contract_multiplier * (percentile_vector[2] + percentile_vector[3]) / 2 recent_5day_pnl = contract_multiplier * butterfly_5_change_current residuals = yield1 - yield_regress_output[ 'alpha'] - yield_regress_output['beta'] * yield2 regime_change_ind = (residuals[last5_years_indx].mean() - residuals.mean()) / residuals.std() seasonal_residuals = residuals[aligned_data['c1']['ticker_month'] == current_data['c1']['ticker_month']] seasonal_clean_residuals = seasonal_residuals[np.isfinite( seasonal_residuals)] clean_residuals = residuals[np.isfinite(residuals)] contract_seasonality_ind = ( seasonal_clean_residuals.mean() - clean_residuals.mean()) / clean_residuals.std() yield1_quantile_list = stats.get_number_from_quantile( y=yield1, quantile_list=[10, 90]) yield2_quantile_list = stats.get_number_from_quantile( y=yield2, quantile_list=[10, 90]) noise_ratio = (yield1_quantile_list[1] - yield1_quantile_list[0]) / ( yield2_quantile_list[1] - yield2_quantile_list[0]) daily_noise_recent = stats.get_stdev(x=butterfly_1_change.values[-20:], clean_num_obs=15) daily_noise_past = stats.get_stdev( x=butterfly_1_change.values, clean_num_obs=max(100, round(3 * len(butterfly_1_change.values) / 4))) recent_vol_ratio = daily_noise_recent / daily_noise_past alpha1 = yield_regress_output['alpha'] residuals_last5_years = residuals[last5_years_indx] residuals_last2_months = residuals[last2_months_indx] residual_current = yield1_current - alpha1 - second_spread_weight_1 * yield2_current z3 = (residual_current - residuals_last5_years.mean()) / residuals.std() z4 = (residual_current - residuals_last2_months.mean()) / residuals.std() yield_change = (alpha1 + second_spread_weight_1 * yield2_current - yield1_current) / (1 + second_spread_weight_1) new_yield1 = yield1_current + yield_change new_yield2 = yield2_current - yield_change price_change1 = 100 * ( (price_2 * (new_yield1 + 100) / 100) - price_1) / (200 + new_yield1) price_change2 = 100 * ( (price_3 * (new_yield2 + 100) / 100) - price_2) / (200 + new_yield2) theo_pnl = contract_multiplier * ( 2 * price_change1 - 2 * second_spread_weight_1 * price_change2) aligned_data['residuals'] = residuals aligned_output['aligned_data'] = aligned_data grouped = aligned_data.groupby(aligned_data['c1']['cont_indx']) aligned_data['shifted_residuals'] = grouped['residuals'].shift(-5) aligned_data['residual_change'] = aligned_data[ 'shifted_residuals'] - aligned_data['residuals'] mean_reversion = stats.get_regression_results({ 'x': aligned_data['residuals'].values, 'y': aligned_data['residual_change'].values, 'clean_num_obs': max(100, round(3 * len(yield1.values) / 4)) }) theo_spread_move_output = su.calc_theo_spread_move_from_ratio_normalization( ratio_time_series=price_ratio.values[-40:], starting_quantile=qf, num_price=linear_interp_price2_current, den_price=current_data['c2']['close_price'], favorable_quantile_move_list=[5, 10, 15, 20, 25]) theo_pnl_list = [ x * contract_multiplier * 2 for x in theo_spread_move_output['theo_spread_move_list'] ] return { 'success': True, 'aligned_output': aligned_output, 'q': q, 'qf': qf, 'theo_pnl_list': theo_pnl_list, 'ratio_target_list': theo_spread_move_output['ratio_target_list'], 'weight1': weight1, 'weight2': weight2, 'weight3': weight3, 'zscore1': zscore1, 'rsquared1': rsquared1, 'zscore2': zscore2, 'rsquared2': rsquared2, 'zscore3': z3, 'zscore4': z4, 'zscore5': zscore1 - regime_change_ind, 'zscore6': zscore1 - contract_seasonality_ind, 'zscore7': zscore1 - regime_change_ind - contract_seasonality_ind, 'theo_pnl': theo_pnl, 'regime_change_ind': regime_change_ind, 'contract_seasonality_ind': contract_seasonality_ind, 'second_spread_weight_1': second_spread_weight_1, 'second_spread_weight_2': second_spread_weight_2, 'downside': downside, 'upside': upside, 'yield1': yield1, 'yield2': yield2, 'yield1_current': yield1_current, 'yield2_current': yield2_current, 'bf_price': butterfly_price_current, 'short_price_limit': short_price_limit, 'long_price_limit': long_price_limit, 'noise_ratio': noise_ratio, 'alpha1': alpha1, 'alpha2': yield_regress_output_last5_years['alpha'], 'residual_std1': yield_regress_output['residualstd'], 'residual_std2': yield_regress_output_last5_years['residualstd'], 'recent_vol_ratio': recent_vol_ratio, 'recent_5day_pnl': recent_5day_pnl, 'price_1': price_1, 'price_2': price_2, 'price_3': price_3, 'last5_years_indx': last5_years_indx, 'price_ratio': price_ratio, 'mean_reversion_rsquared': mean_reversion['rsquared'], 'mean_reversion_signif': (mean_reversion['conf_int'][1, :] < 0).all() }
def get_futures_butterfly_signals(**kwargs): ticker_list = kwargs['ticker_list'] date_to = kwargs['date_to'] if 'tr_dte_list' in kwargs.keys(): tr_dte_list = kwargs['tr_dte_list'] else: tr_dte_list = [exp.get_futures_days2_expiration({'ticker': x,'date_to': date_to}) for x in ticker_list] if 'aggregation_method' in kwargs.keys() and 'contracts_back' in kwargs.keys(): aggregation_method = kwargs['aggregation_method'] contracts_back = kwargs['contracts_back'] else: amcb_output = opUtil.get_aggregation_method_contracts_back(cmi.get_contract_specs(ticker_list[0])) aggregation_method = amcb_output['aggregation_method'] contracts_back = amcb_output['contracts_back'] if 'use_last_as_current' in kwargs.keys(): use_last_as_current = kwargs['use_last_as_current'] else: use_last_as_current = False if 'futures_data_dictionary' in kwargs.keys(): futures_data_dictionary = kwargs['futures_data_dictionary'] else: futures_data_dictionary = {x: gfp.get_futures_price_preloaded(ticker_head=x) for x in [cmi.get_contract_specs(ticker_list[0])['ticker_head']]} if 'contract_multiplier' in kwargs.keys(): contract_multiplier = kwargs['contract_multiplier'] else: contract_multiplier = cmi.contract_multiplier[cmi.get_contract_specs(ticker_list[0])['ticker_head']] if 'datetime5_years_ago' in kwargs.keys(): datetime5_years_ago = kwargs['datetime5_years_ago'] else: date5_years_ago = cu.doubledate_shift(date_to,5*365) datetime5_years_ago = cu.convert_doubledate_2datetime(date5_years_ago) if 'datetime2_months_ago' in kwargs.keys(): datetime2_months_ago = kwargs['datetime2_months_ago'] else: date2_months_ago = cu.doubledate_shift(date_to,60) datetime2_months_ago = cu.convert_doubledate_2datetime(date2_months_ago) aligned_output = opUtil.get_aligned_futures_data(contract_list=ticker_list, tr_dte_list=tr_dte_list, aggregation_method=aggregation_method, contracts_back=contracts_back, date_to=date_to, futures_data_dictionary=futures_data_dictionary, use_last_as_current=use_last_as_current) current_data = aligned_output['current_data'] aligned_data = aligned_output['aligned_data'] month_diff_1 = 12*(current_data['c1']['ticker_year']-current_data['c2']['ticker_year'])+(current_data['c1']['ticker_month']-current_data['c2']['ticker_month']) month_diff_2 = 12*(current_data['c2']['ticker_year']-current_data['c3']['ticker_year'])+(current_data['c2']['ticker_month']-current_data['c3']['ticker_month']) weight_11 = 2*month_diff_2/(month_diff_1+month_diff_1) weight_12 = -2 weight_13 = 2*month_diff_1/(month_diff_1+month_diff_1) price_1 = current_data['c1']['close_price'] price_2 = current_data['c2']['close_price'] price_3 = current_data['c3']['close_price'] linear_interp_price2 = (weight_11*aligned_data['c1']['close_price']+weight_13*aligned_data['c3']['close_price'])/2 butterfly_price = aligned_data['c1']['close_price']-2*aligned_data['c2']['close_price']+aligned_data['c3']['close_price'] price_ratio = linear_interp_price2/aligned_data['c2']['close_price'] linear_interp_price2_current = (weight_11*price_1+weight_13*price_3)/2 price_ratio_current = linear_interp_price2_current/price_2 q = stats.get_quantile_from_number({'x': price_ratio_current, 'y': price_ratio.values, 'clean_num_obs': max(100, round(3*len(price_ratio.values)/4))}) qf = stats.get_quantile_from_number({'x': price_ratio_current, 'y': price_ratio.values[-40:], 'clean_num_obs': 30}) recent_quantile_list = [stats.get_quantile_from_number({'x': x, 'y': price_ratio.values[-40:], 'clean_num_obs': 30}) for x in price_ratio.values[-40:]] weight1 = weight_11 weight2 = weight_12 weight3 = weight_13 last5_years_indx = aligned_data['settle_date']>=datetime5_years_ago last2_months_indx = aligned_data['settle_date']>=datetime2_months_ago data_last5_years = aligned_data[last5_years_indx] yield1 = 100*(aligned_data['c1']['close_price']-aligned_data['c2']['close_price'])/aligned_data['c2']['close_price'] yield2 = 100*(aligned_data['c2']['close_price']-aligned_data['c3']['close_price'])/aligned_data['c3']['close_price'] yield1_last5_years = yield1[last5_years_indx] yield2_last5_years = yield2[last5_years_indx] yield1_current = 100*(current_data['c1']['close_price']-current_data['c2']['close_price'])/current_data['c2']['close_price'] yield2_current = 100*(current_data['c2']['close_price']-current_data['c3']['close_price'])/current_data['c3']['close_price'] butterfly_price_current = current_data['c1']['close_price']\ -2*current_data['c2']['close_price']\ +current_data['c3']['close_price'] yield_regress_output = stats.get_regression_results({'x':yield2, 'y':yield1,'x_current': yield2_current, 'y_current': yield1_current, 'clean_num_obs': max(100, round(3*len(yield1.values)/4))}) yield_regress_output_last5_years = stats.get_regression_results({'x':yield2_last5_years, 'y':yield1_last5_years, 'x_current': yield2_current, 'y_current': yield1_current, 'clean_num_obs': max(100, round(3*len(yield1_last5_years.values)/4))}) bf_qz_frame_short = pd.DataFrame() bf_qz_frame_long = pd.DataFrame() if (len(yield1) >= 40)&(len(yield2) >= 40): recent_zscore_list = [(yield1[-40+i]-yield_regress_output['alpha']-yield_regress_output['beta']*yield2[-40+i])/yield_regress_output['residualstd'] for i in range(40)] bf_qz_frame = pd.DataFrame.from_items([('bf_price', butterfly_price.values[-40:]), ('q',recent_quantile_list), ('zscore', recent_zscore_list)]) bf_qz_frame = np.round(bf_qz_frame, 8) bf_qz_frame.drop_duplicates(['bf_price'], take_last=True, inplace=True) # return bf_qz_frame bf_qz_frame_short = bf_qz_frame[(bf_qz_frame['zscore'] >= 0.6) & (bf_qz_frame['q'] >= 85)] bf_qz_frame_long = bf_qz_frame[(bf_qz_frame['zscore'] <= -0.6) & (bf_qz_frame['q'] <= 12)] if bf_qz_frame_short.empty: short_price_limit = np.NAN else: short_price_limit = bf_qz_frame_short['bf_price'].min() if bf_qz_frame_long.empty: long_price_limit = np.NAN else: long_price_limit = bf_qz_frame_long['bf_price'].max() zscore1= yield_regress_output['zscore'] rsquared1= yield_regress_output['rsquared'] zscore2= yield_regress_output_last5_years['zscore'] rsquared2= yield_regress_output_last5_years['rsquared'] second_spread_weight_1 = yield_regress_output['beta'] second_spread_weight_2 = yield_regress_output_last5_years['beta'] butterfly_5_change = data_last5_years['c1']['change_5']\ - (1+second_spread_weight_1)*data_last5_years['c2']['change_5']\ + second_spread_weight_1*data_last5_years['c3']['change_5'] butterfly_5_change_current = current_data['c1']['change_5']\ - (1+second_spread_weight_1)*current_data['c2']['change_5']\ + second_spread_weight_1*current_data['c3']['change_5'] butterfly_1_change = data_last5_years['c1']['change_1']\ - (1+second_spread_weight_1)*data_last5_years['c2']['change_1']\ + second_spread_weight_1*data_last5_years['c3']['change_1'] percentile_vector = stats.get_number_from_quantile(y=butterfly_5_change.values, quantile_list=[1, 15, 85, 99], clean_num_obs=max(100, round(3*len(butterfly_5_change.values)/4))) downside = contract_multiplier*(percentile_vector[0]+percentile_vector[1])/2 upside = contract_multiplier*(percentile_vector[2]+percentile_vector[3])/2 recent_5day_pnl = contract_multiplier*butterfly_5_change_current residuals = yield1-yield_regress_output['alpha']-yield_regress_output['beta']*yield2 regime_change_ind = (residuals[last5_years_indx].mean()-residuals.mean())/residuals.std() contract_seasonality_ind = (residuals[aligned_data['c1']['ticker_month'] == current_data['c1']['ticker_month']].mean()-residuals.mean())/residuals.std() yield1_quantile_list = stats.get_number_from_quantile(y=yield1, quantile_list=[10, 90]) yield2_quantile_list = stats.get_number_from_quantile(y=yield2, quantile_list=[10, 90]) noise_ratio = (yield1_quantile_list[1]-yield1_quantile_list[0])/(yield2_quantile_list[1]-yield2_quantile_list[0]) daily_noise_recent = stats.get_stdev(x=butterfly_1_change.values[-20:], clean_num_obs=15) daily_noise_past = stats.get_stdev(x=butterfly_1_change.values, clean_num_obs=max(100, round(3*len(butterfly_1_change.values)/4))) recent_vol_ratio = daily_noise_recent/daily_noise_past alpha1 = yield_regress_output['alpha'] residuals_last5_years = residuals[last5_years_indx] residuals_last2_months = residuals[last2_months_indx] residual_current = yield1_current-alpha1-second_spread_weight_1*yield2_current z3 = (residual_current-residuals_last5_years.mean())/residuals.std() z4 = (residual_current-residuals_last2_months.mean())/residuals.std() yield_change = (alpha1+second_spread_weight_1*yield2_current-yield1_current)/(1+second_spread_weight_1) new_yield1 = yield1_current + yield_change new_yield2 = yield2_current - yield_change price_change1 = 100*((price_2*(new_yield1+100)/100)-price_1)/(200+new_yield1) price_change2 = 100*((price_3*(new_yield2+100)/100)-price_2)/(200+new_yield2) theo_pnl = contract_multiplier*(2*price_change1-2*second_spread_weight_1*price_change2) aligned_data['residuals'] = residuals aligned_output['aligned_data'] = aligned_data grouped = aligned_data.groupby(aligned_data['c1']['cont_indx']) aligned_data['shifted_residuals'] = grouped['residuals'].shift(-5) aligned_data['residual_change'] = aligned_data['shifted_residuals']-aligned_data['residuals'] mean_reversion = stats.get_regression_results({'x':aligned_data['residuals'].values, 'y':aligned_data['residual_change'].values, 'clean_num_obs': max(100, round(3*len(yield1.values)/4))}) theo_spread_move_output = su.calc_theo_spread_move_from_ratio_normalization(ratio_time_series=price_ratio.values[-40:], starting_quantile=qf, num_price=linear_interp_price2_current, den_price=current_data['c2']['close_price'], favorable_quantile_move_list=[5, 10, 15, 20, 25]) theo_pnl_list = [x*contract_multiplier*2 for x in theo_spread_move_output['theo_spread_move_list']] return {'aligned_output': aligned_output, 'q': q, 'qf': qf, 'theo_pnl_list': theo_pnl_list, 'ratio_target_list': theo_spread_move_output['ratio_target_list'], 'weight1': weight1, 'weight2': weight2, 'weight3': weight3, 'zscore1': zscore1, 'rsquared1': rsquared1, 'zscore2': zscore2, 'rsquared2': rsquared2, 'zscore3': z3, 'zscore4': z4, 'zscore5': zscore1-regime_change_ind, 'zscore6': zscore1-contract_seasonality_ind, 'zscore7': zscore1-regime_change_ind-contract_seasonality_ind, 'theo_pnl': theo_pnl, 'regime_change_ind' : regime_change_ind,'contract_seasonality_ind': contract_seasonality_ind, 'second_spread_weight_1': second_spread_weight_1, 'second_spread_weight_2': second_spread_weight_2, 'downside': downside, 'upside': upside, 'yield1': yield1, 'yield2': yield2, 'yield1_current': yield1_current, 'yield2_current': yield2_current, 'bf_price': butterfly_price_current, 'short_price_limit': short_price_limit,'long_price_limit':long_price_limit, 'noise_ratio': noise_ratio, 'alpha1': alpha1, 'alpha2': yield_regress_output_last5_years['alpha'], 'residual_std1': yield_regress_output['residualstd'], 'residual_std2': yield_regress_output_last5_years['residualstd'], 'recent_vol_ratio': recent_vol_ratio, 'recent_5day_pnl': recent_5day_pnl, 'price_1': price_1, 'price_2': price_2, 'price_3': price_3, 'last5_years_indx': last5_years_indx, 'price_ratio': price_ratio, 'mean_reversion_rsquared': mean_reversion['rsquared'], 'mean_reversion_signif' : (mean_reversion['conf_int'][1, :] < 0).all()}