def pair_trading(self, util, date, k=0): """calculate the pair trading linear model""" if k > 2: # raise Exception("k is too big to have the result") k = 0 bd = BuildData() data = bd.cal_remain_maturity(util, date) ticker_lst = data['ticker'].unique() main_contract = ticker_lst[k] second_contract = ticker_lst[k + 1] main_data = data[data['ticker'] == main_contract] second_data = data[data['ticker'] == second_contract] main_mid = bd.get_mid_price(main_data) second_mid = bd.get_mid_price(second_data) main_mid.index = main_data['updatetime'] second_mid.index = second_data['updatetime'] second_mid = second_mid[~second_mid.index.duplicated()] reg_df = pd.DataFrame() reg_df['close'] = main_mid reg_df['far'] = second_mid reg_df = reg_df.dropna(axis=0, how='any') model = sm.OLS(reg_df['close'], reg_df['far']).fit() return model
def get_big_data(self, util): bd = BuildData() date_lst = util.generate_date_lst(self.start_date, self.end_date) whole_data = pd.DataFrame() for i in range(0, len(date_lst)): date = date_lst[i] df = bd.cal_remain_maturity(util, date) whole_data = whole_data.append(df) return whole_data
def total_volume(self, util, date): bd = BuildData() data = bd.cal_remain_maturity(util, date) if data is None: return [0], [0], pd.Series([0]) ticker_lst = data['ticker'].unique() ts_lst = [] remain_date_lst = [] for ticker in ticker_lst: temp_data = data[data['ticker'] == ticker] ts = temp_data['ts'].iloc[-1] remain_date = temp_data['remain_date'].iloc[-1] remain_date_lst.append(remain_date) ts_lst.append(ts) return remain_date_lst, ts_lst, ticker_lst
def save_ln(util, start_date, end_date): secs_two_week = 14 * 24 * 60 * 60 date_lst = util.generate_date_lst(start_date, end_date) for i in range(0, len(date_lst)): date = date_lst[i] stock_price = util.get_stock_price(date) stock_price = stock_price.rename(columns = {'close': "cp"}) futures = BuildData().cal_remain_maturity(util, date) futures['datetime'] = futures['datetime'].apply( lambda x: x[:-3] + '000' if x[-3] < "5" else x[:-3] + '500') idx = stock_price.index ticker_lst = futures['ticker'].unique() ln_df = pd.DataFrame() ln_df['datetime'] = stock_price.index ln_df.index = ln_df['datetime'] ln_df['spot'] = stock_price['cp'] for ticker in ticker_lst: futures_df = get_futures(futures, ticker, idx) ln_df[ticker] = futures_df['cp'] ln_df[ticker + '_remain_secs'] = futures_df['remain_secs'] ln_df[ticker + '_remain_mius'] = futures_df['remain_secs'] / 60 # ln_df[ticker + '_remain_secs_re'] = futures_df['remain_secs'].apply(lambda x: 1/float(x)) ln_df[ticker + '_T-t'] = futures_df['remain_secs'].apply(lambda x: secs_two_week / float(x)) for j in range(0, len(ticker_lst) - 1): ticker_close = ticker_lst[j] ticker_far = ticker_lst[j + 1] ln_ratio = get_division_ratio(ln_df[ticker_close], ln_df[ticker_far]) ln_df[ticker_close + '_ratio'] = ln_ratio['ratio'] ln_df['ln_spot'] = get_division_ratio(ln_df['spot'], ln_df[ticker_lst[0]]) ln_df.to_csv('/home/lky/volume_speculate/output/pair_trading/ln_futures/' + date + '.csv')
def mature_futures(self, util, date, k): """get the mature futures""" bd = BuildData() ticker = self.get_main_ticker(util, date, k) mature_date = util.get_ticker_maturity(ticker) spread = self.spot_futures(util, mature_date) return spread
def spot_futures(self, util, date): stock_price = util.get_stock_price(date) bd = BuildData() data = bd.cal_remain_maturity(util, date) ticker_lst = data['ticker'].unique() main_contract = ticker_lst[0] main_data = data[data['ticker'] == main_contract] idx = main_data['datetime'].apply(lambda x: datetime.datetime.strptime( x, "%Y-%m-%d %H:%M:%S.%f").strftime("%Y-%m-%d %H:%M:%S")) idx = idx.apply(lambda x: datetime.datetime.strptime( str(x)[:-3], "%Y-%m-%d %H:%M").strftime("%Y-%m-%d %H:%M:%S")) cp_df = main_data['cp'] cp_df.index = idx stock_price = stock_price.reindex(index=idx).fillna(method='ffill') gap = main_data['cp'] - stock_price['close'] gap = gap[~gap.index.duplicated()] return gap
def get_data(self, util, date, k): data = BuildData().cal_remain_maturity(util, date) data['datetime'] = data['datetime'].apply( lambda x: x[:-3] + '000' if x[-3] < "5" else x[:-3] + '500') ticker_lst = data['ticker'].unique() ticker_1 = ticker_lst[k] start_time = util.get_open_time(date, self.morning) df = data[data['ticker'] == ticker_1] df = df[df['sod'] >= start_time] return df
def pair_spread(self, util, date, k): if k > 2: # raise Exception("k is too big to have the result") k = 0 bd = BuildData() data = bd.cal_remain_maturity(util, date) ticker_lst = data['ticker'].unique() main_contract = ticker_lst[k] second_contract = ticker_lst[k + 1] main_data = data[data['ticker'] == main_contract] second_data = data[data['ticker'] == second_contract] main_data.index = main_data['updatetime'] second_data.index = second_data['updatetime'] second_data = second_data[~second_data.index.duplicated()] spread = main_data['cp'] - second_data['cp'] return spread
def mean_spot(self, util, date, k): stock_price = util.get_stock_price(date) bd = BuildData() data = bd.cal_remain_maturity(util, date) ticker_lst = data['ticker'].unique() main_contract = ticker_lst[k] main_data = data[data['ticker'] == main_contract] idx = main_data['datetime'].apply(lambda x: datetime.datetime.strptime( x, "%Y-%m-%d %H:%M:%S.%f").strftime("%Y-%m-%d %H:%M:%S")) idx = idx.apply(lambda x: datetime.datetime.strptime( str(x)[:-3], "%Y-%m-%d %H:%M").strftime("%Y-%m-%d %H:%M:%S")) cp_df = main_data['cp'] cp_df.index = idx stock_price = stock_price.reindex(index=idx).fillna(method='ffill') df = pd.DataFrame() df['futures'] = cp_df df['stock'] = stock_price['close'] return df
def cal_corr(self, util, date, k): bd = BuildData() data = bd.cal_remain_maturity(util, date) data['datetime'] = data['datetime'].apply(lambda x: x[:-6] + '00') data = data.set_index(['datetime']) data = data[~data.index.duplicated(keep='first')] data.index = data['sod'] if k == 0: back_date = date else: back_date = util.get_week_date(date, k) stock_price = util.get_stock_price(back_date) idx = stock_price['sod'] data = data.reindex(index = idx) stock_price = stock_price.set_index(['sod']) corr_df = pd.DataFrame() corr_df['cp'] = data['cp'] corr_df['stock'] = stock_price['close'] corr = corr_df.corr() return corr['stock'].loc['cp']
def mature_corr(self, util, date, k): # bigdata = self.get_big_data(util) # data = bigdata[(bigdata['remain_date'] <= k + 1) & (bigdata['remain_date'] >= k - 1)] bd = BuildData() stock_price = util.get_stock_price(date) data = bd.cal_remain_maturity(util, date) ticker_lst = data['ticker'].unique() corr_df = pd.DataFrame() for ticker in ticker_lst: tmp_data = data[data['ticker'] == ticker] tmp_data.index = tmp_data['sod'] tmp_data = tmp_data[~tmp_data.index.duplicated(keep='first')] corr_df[ticker] = tmp_data['cp'] stock_price.index = stock_price['sod'] corr_df['stock'] = stock_price['close'] stock = corr_df['stock'].dropna() corr_df = corr_df.reindex(index = stock.index).fillna(method = 'ffill') corr = corr_df.corr() corr['remain_date'] = data['remain_date'].unique().tolist() + [0] return corr
def trigger(start_date, end_date, name, k, n=5): if name == "eff_price": func = get_effective_price elif name == "dp": func = cal_dp util = Utility(config) date_lst = util.generate_date_lst(start_date, end_date) params_df = pd.DataFrame() for i in range(0, len(date_lst)): date = date_lst[i] data = BuildData().cal_remain_maturity(util, date) ticker_lst = data['ticker'].unique() ticker = ticker_lst[k] futures = data[data['ticker'] == ticker] price_change = get_price_change(futures) effective_price = func(futures) independent_df = data_lag(effective_price, name, n) coefs = get_linear_model(independent_df, price_change).params params_df[date] = coefs params_df.to_csv( '/home/lky/volume_speculate/output/pair_trading/linear/coefs/{name}_{start_date}_{end_date}.csv' .format_map(vars()))
def get_main_ticker(self, util, date, k=0): data = BuildData().cal_remain_maturity(util, date) ticker_lst = data['ticker'].unique() ticker = ticker_lst[k] return ticker
def get_remain_date(self, util, date): data = BuildData().cal_remain_maturity(util, date) remain_date = data['remain_date'].unique() return remain_date