def gradient_boosting_result(_ticker='SP500'): univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen')[_ticker].values z = list(np.where(univ_ib_gd.astype('int') == 1)[0]) _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M'] _trn_names = ['LVL', 'CH0', 'CH1', 'CH2'] _fil_names = ['LRB', 'QRB', 'QRG'] _hoz_names = [] for j in range(5, 305, 5): if j < 10: _hoz_names.append('00' + str(j)) elif j < 100: _hoz_names.append('0' + str(j)) else: _hoz_names.append(str(j)) # get volatility forecast univ_ib_vl = cr_vol_all_adj.retrieve(univ_ib_eqidx_ext + 'vol_pb_120')[_ticker].values[z] # get return univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close')[_ticker].values[z] univ_ib_cl = filt.ret(univ_ib_cl) univ_ib_sig_all = None for k in _trn_names: for i in _prc_names: for j in _fil_names: for mi, m in enumerate(_hoz_names): fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m univ_ib_sig = cr_sig_mr_sg.retrieve(fn)[_ticker].values[z] if univ_ib_sig_all is None: univ_ib_sig_all = DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig}) else: univ_ib_sig_all.col_bind(DataFrame({i+'_'+j+'_'+k+'_'+m: univ_ib_sig})) # just for storage - incase something happens # univ_ib_gb = DataFrame({'Close': univ_ib_cl, 'Volatility': univ_ib_vl}) # univ_ib_gb.col_bind(univ_ib_sig_all) # cr_cret.store(univ_ib_eqidx_ext + 'GBM', univ_ib_gb) reg_lookback = [120, 240, 360, 480] new_col_names = list(univ_ib_sig_all.columns) for i in new_col_names: # i = new_col_names[0] test_ = np.empty(0) for j in reg_lookback: # j = reg_lookback[0] uic = univ_ib_cl uiv = univ_ib_vl uis = filt.lag(univ_ib_sig_all[i].values, 1) uic, uis, uiv = reduce_nonnan(uic, uis, uiv) b1 = qreg.roll_e_ladreg_1d(uic, uis, j) b2 = qreg.roll_e_ladreg_1d(uic/uiv, uis/uiv, j) resid1 = uic - filt.lag(b1)*uis resid2 = uic - filt.lag(b2)*uis resid1, resid2 = reduce_nonnan(resid1, resid2) test_ = np.hstack((test_, np.array([np.median(abs(resid1)), np.median(abs(resid2))]))) print(i+' : '+np_to_str(test_))
def _conv_more_hist_to_df(): x = cr_sfx_morehs_csv.listdir() for i in x: print(i) idata = DataFrame.from_csv(opj(cr_sfx_morehs_csv.name, i)+'.csv') cr_sfx_morehs_df.store(i, idata) return None
def _get_curr_to_usd_data(x, close_only=True): if x in _curr_to_usd_ticks.keys(): t1 = cr_sfx_hs.retrieve(_curr_to_usd_ticks[x]) elif x in _curr_to_usd_ticks_inv.keys(): t1 = cr_sfx_hs.retrieve(_curr_to_usd_ticks_inv[x]) for i in ['Open', 'High', 'Low', 'Close']: t1[i] = 1/t1[i] else: raise NotImplementedError('Currency file information not added to curr_to_usd_ticks') if close_only: t1 = t1[['Date', 'Close']] # for certain currencies extend the history if x in ['EUR', 'GBP', 'JPY', 'CHF', 'HKD', 'AUD', 'NZD', 'SEK', 'SGD', 'ZAR', 'CAD']: t1h = cr_sfx_morehs_df.retrieve('USD'+x) if close_only: t1h = t1h[['Date', 'Close']] t1m = DataFrame.merge(t1, t1h, on='Date') t1m['Date'] = t1m['Date'].astype('int64') t1m.sort('Date') t1m['Close'] = np.nan tidx = np.where(~np.isnan(t1m['Close_x'].values))[0] t1m[tidx, 'Close'] = t1m[tidx, 'Close_x'] tidx = np.where(np.isnan(t1m['Close_x'].values))[0] t1m[tidx, 'Close'] = t1m[tidx, 'Close_y'] t1 = t1m[['Date', 'Close']] else: t1m = DataFrame.merge(t1, t1h, on='Date') t1m['Date'] = t1m['Date'].astype('int64') t1m.sort('Date') for i in ['Open', 'High', 'Low', 'Close']: t1m[i] = np.nan tidx = np.where(~np.isnan(t1m['Close_x'].values))[0] for i in ['Open', 'High', 'Low', 'Close']: t1m[tidx, i] = t1m[tidx, i+'_x'] tidx = np.where(np.isnan(t1m['Close_x'].values))[0] for i in ['Open', 'High', 'Low', 'Close']: t1m[tidx, i] = t1m[tidx, i+'_y'] t1 = t1m[['Date', 'Open', 'High', 'Low', 'Close']] # fix_errors 20160712 for SEK if x == 'SEK': _bad_pos = np.where(t1['Date'].values == 20160712)[0] if close_only: t1[_bad_pos, 'Close'] = t1[_bad_pos, 'Close']/10 else: for i in ['Open', 'High', 'Low', 'Close']: t1[_bad_pos, i] = t1[_bad_pos, i]/10 return t1
def get_vol_combo(): _eqidx = get_all_equity_index_names() for k in range(0, len(vol_names)): # k = 0 for j in vol_lookbacks: # j = vol_lookbacks[0] for i in _eqidx: # i = 'SP500' _data_ = mkt_retrieve(i, 'Stats', 'Volatility')[['Date', vol_names[k]+str(j)]] _data_.set_columns(['Date', i]) if i == _eqidx[0]: _data = _data_ else: _data = DataFrame.merge(_data, _data_, on='Date') _data['Date'] = _data['Date'].values.astype('int64') _data.sort(['Date']) tcl = _data.tick_cols() for i in tcl: _data[i] = filt.fill(_data[i].values) cr_cret.store(univ_ib_eqidx_ext + vol_names[k]+str(j), _data) return None
'D07S14_005_QRG', 'D03S20_349_LRB', 'D10S08_236_QRG', 'D02S08_009_QRG', 'D02S26_027_QRB', 'D06S29_005_QRG', 'D04S26_005_QRG', 'D08S17_021_QRG', 'D07S23_015_QRG', 'D09S29_129_QRG', 'D08S26_236_LRB', 'D10S11_005_QRG', 'D09S29_521_QRG'] for i in _eq_idx: # lag the volatility # i = _eq_idx[0] testv = filt.lag(mkt_retrieve(i, 'Stats', 'Volatility')['vol_gk240'].values) # lag the signal test2 = mkt_retrieve(i, 'MovReg', 'Signals')[['Date']+_sig_set_95_lvl] test2.set_columns(['Date']+[k + '_LVL' for k in _sig_set_95_lvl]) test2_ = mkt_retrieve(i, 'MovReg', 'Changes1')[['Date']+_sig_set_95_ch1] test2_.set_columns(['Date']+[k + '_CH1' for k in _sig_set_95_ch1]) test2 = DataFrame.merge(test2, test2_, on='Date') test2_ = mkt_retrieve(i, 'MovReg', 'Changes3')[['Date']+_sig_set_95_ch3] test2_.set_columns(['Date']+[k + '_CH3' for k in _sig_set_95_ch3]) test2 = DataFrame.merge(test2, test2_, on='Date') test2_ = mkt_retrieve(i, 'MovReg', 'Changes5')[['Date']+_sig_set_95_ch5] test2_.set_columns(['Date']+[k + '_CH5' for k in _sig_set_95_ch5]) test2 = DataFrame.merge(test2, test2_, on='Date') test2_ = mkt_retrieve(i, 'MovReg', 'Changes7')[['Date']+_sig_set_95_ch7] test2_.set_columns(['Date']+[k + '_CH7' for k in _sig_set_95_ch7]) test2 = DataFrame.merge(test2, test2_, on='Date') test2_ = mkt_retrieve(i, 'MovReg', 'Changes9')[['Date']+_sig_set_95_ch9] test2_.set_columns(['Date']+[k + '_CH9' for k in _sig_set_95_ch9])
def test_measures2(): pd.set_option('display.max_columns', 30) pd.set_option('display.max_rows', 100) univ_ib_gd = cr_cret.retrieve(univ_ib_eqidx_ext + 'ExchOpen') _prc_names = ['CLS', 'A2D', 'A3D', 'A4D', 'A2W', 'A3W', 'A4W', 'A2M', 'A3M', 'A4M'] _trn_names = ['LVL', 'CH0', 'CH1', 'CH2'] _fil_names = ['LRB', 'QRB', 'QRG'] _hoz_names = [] for j in range(5, 305, 5): if j < 10: _hoz_names.append('00'+str(j)) elif j < 100: _hoz_names.append('0'+str(j)) else: _hoz_names.append(str(j)) univ_ib_cl = cr_cret.retrieve(univ_ib_eqidx_ext + 'Close') tick_cols = univ_ib_cl.tick_cols() for n in tick_cols: z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0]) univ_ib_cl_ = filt.ret(univ_ib_cl[n].values[z]) univ_ib_cl[n] = np.nan univ_ib_cl[z, n] = univ_ib_cl_ univ_ib_cl[n] = filt.fill1(univ_ib_cl[n].values, 0) # tick_cols_ = tick_cols[0:1] tick_cols = ['SP500', 'SP400Mid', 'Nikkei225', 'R2000', 'FTSE100', 'DAX', 'SMI', 'CAC40', 'AEX', 'MIBFTSE', 'IBEX35', 'OMXS30'] siz_ = [5330, 7115, 6757, 7349, 5324, 6785, 6794, 6199, 7232, 7936, 7180, 7295] # siz = [2784, 999, 1357, 765, 2790, 1329, 1320, 1915, 882, 178, 934, 819] tick_cols = tick_cols[1:] siz_ = siz_[1:] for k in _trn_names: dummy_cols = [] for i in _prc_names: for j in _fil_names: dummy_cols.append(i+'_'+j) x_ = np.random.random(len(_hoz_names))*np.nan y_ = np.random.random(len(_hoz_names))*np.nan dummy_df = DataFrame({dummy_cols[0]: x_, dummy_cols[1]: y_}) for i in range(2, 30): dummy_df[dummy_cols[i]] = x_ _col = 0 for i in _prc_names: # j = _fil_names[0] for j in _fil_names: # k = _trn_names[0] for mi, m in enumerate(_hoz_names): # m = _hoz_names[0] fn = univ_ib_eqidx_ext+i+'_'+j+'_'+k+'_'+m univ_ib_sig = cr_sig_mr_sg.retrieve(fn) ret_, sig_ = np.empty(0), np.empty(0) for ni, n in enumerate(tick_cols): # ni = 0 # n = tick_cols[ni] z = list(np.where(univ_ib_gd[n].values.astype('int') == 1)[0]) z = [z_ for z_ in z if z_ > siz_[ni]] # 5 is just a buffer sig__ = univ_ib_sig[n].values[z] ret__ = univ_ib_cl[n].values[z] sig__ = filt.lag(sig__, 2) ret_ = np.hstack((ret_, ret__[2:])) sig_ = np.hstack((sig_, sig__[2:])) gidx = np.where(~np.isnan(ret_) & ~np.isnan(sig_))[0] dummy_df[mi, dummy_cols[_col]] = 100*spearmanr(ret_[gidx], sig_[gidx]).correlation _col += 1 dummy_df.show_all()
def get_equity_index_combo_returns(): # split up o/h/l/c, trd val into separate files # also store the multiplier tick_sym_all, tick_name_all = [], [] for i in EquityIndexFuturesSet: tick_sym_all.append(i.cstick) tick_name_all.append(i.crname) for i in tick_name_all: # i = tick_name_all[0] _data = mkt_retrieve(i, 'Stats', 'Returns') # _data = cr_ret.retrieve(i) # change this print(i) if i == tick_name_all[0]: comb_data_op = _data[['Date', 'Open']] comb_data_hi = _data[['Date', 'High']] comb_data_lo = _data[['Date', 'Low']] comb_data_cl = _data[['Date', 'Close']] comb_data_tv = _data[['Date', 'AggVolume', 'Close']] comb_data_tv['TradedValue'] = comb_data_tv['AggVolume']*comb_data_tv['Close'] comb_data_tv = comb_data_tv[['Date', 'TradedValue']] comb_data_ex = _data[['Date']] comb_data_ex['ExchOpen'] = 1 comb_data_op.rename({'Open': i}) comb_data_hi.rename({'High': i}) comb_data_lo.rename({'Low': i}) comb_data_cl.rename({'Close': i}) comb_data_tv.rename({'TradedValue': i}) comb_data_ex.rename({'ExchOpen': i}) else: comb_data_op_new = _data[['Date', 'Open']] comb_data_hi_new = _data[['Date', 'High']] comb_data_lo_new = _data[['Date', 'Low']] comb_data_cl_new = _data[['Date', 'Close']] comb_data_tv_new = _data[['Date', 'AggVolume', 'Close']] comb_data_tv_new['TradedValue'] = comb_data_tv_new['AggVolume']*comb_data_tv_new['Close'] comb_data_tv_new = comb_data_tv_new[['Date', 'TradedValue']] comb_data_ex_new = _data[['Date']] comb_data_ex_new['ExchOpen'] = 1 comb_data_op_new.rename({'Open': i}) comb_data_hi_new.rename({'High': i}) comb_data_lo_new.rename({'Low': i}) comb_data_cl_new.rename({'Close': i}) comb_data_tv_new.rename({'TradedValue': i}) comb_data_ex_new.rename({'ExchOpen': i}) # merge them comb_data_op = DataFrame.merge(comb_data_op, comb_data_op_new, on='Date') comb_data_hi = DataFrame.merge(comb_data_hi, comb_data_hi_new, on='Date') comb_data_lo = DataFrame.merge(comb_data_lo, comb_data_lo_new, on='Date') comb_data_cl = DataFrame.merge(comb_data_cl, comb_data_cl_new, on='Date') comb_data_tv = DataFrame.merge(comb_data_tv, comb_data_tv_new, on='Date') comb_data_ex = DataFrame.merge(comb_data_ex, comb_data_ex_new, on='Date') comb_data_op.sort('Date') comb_data_hi.sort('Date') comb_data_lo.sort('Date') comb_data_cl.sort('Date') comb_data_tv.sort('Date') comb_data_ex.sort('Date') tick_cols = comb_data_cl.tick_cols() for i in tick_cols: comb_data_cl[i] = cuff.fill(comb_data_cl[i].values) comb_data_op[i] = cuff.fill2(comb_data_op[i].values, comb_data_cl[i].values) comb_data_hi[i] = cuff.fill2(comb_data_hi[i].values, comb_data_cl[i].values) comb_data_lo[i] = cuff.fill2(comb_data_lo[i].values, comb_data_lo[i].values) comb_data_tv[i] = cuff.fill1(comb_data_tv[i].values, 0) comb_data_ex[i] = cuff.fill1(comb_data_ex[i].values, 0) # save the data comb_data_op['Date'] = comb_data_op['Date'].astype('int64') comb_data_hi['Date'] = comb_data_hi['Date'].astype('int64') comb_data_lo['Date'] = comb_data_lo['Date'].astype('int64') comb_data_cl['Date'] = comb_data_cl['Date'].astype('int64') comb_data_tv['Date'] = comb_data_tv['Date'].astype('int64') comb_data_ex['Date'] = comb_data_ex['Date'].astype('int64') cr_cret.store('EquityIndex_IBUniv_Open', comb_data_op) cr_cret.store('EquityIndex_IBUniv_High', comb_data_hi) cr_cret.store('EquityIndex_IBUniv_Low', comb_data_lo) cr_cret.store('EquityIndex_IBUniv_Close', comb_data_cl) cr_cret.store('EquityIndex_IBUniv_TrdVal', comb_data_tv) cr_cret.store('EquityIndex_IBUniv_ExchOpen', comb_data_ex) return None
def get_history(self, cur_adj=True, trans_deriv=True): if (self.stdate == 0) or (self.stdate != 0 and self.cstickold != ''): x1 = get_cont_series_roll_basic(self.cstick, self.ndays, matur=self.fmatur) else: x1 = get_cont_series_roll_basic(self.cstick, self.ndays, matur=self.fmatur, hist_st_date=self.stdate) if self.cstickold != '': if self.stdate == 0: x2 = get_cont_series_roll_basic(self.cstickold, self.ndays, matur=self.fmatur) else: x2 = get_cont_series_roll_basic(self.cstickold, self.ndays, matur=self.fmatur, hist_st_date=self.stdate) # merge x1 and x2 based on traded volume x1['TradedValue'] = x1['Close'] * x1['Volume'] x2['TradedValue'] = x2['Close'] * x2['Volume'] * self.multold # find the first maturity when the x1 contract is larger than the x2 contract x1t = x1[['Date', 'TradedValue', 'ContractAtClose']] x2t = x2[['Date', 'TradedValue', 'ContractAtClose']] xt = DataFrame.merge(x2t, x1t, on='Date') switch_idx = [i for i, j in enumerate(zip(xt['TradedValue_x', list], xt['TradedValue_y', list])) if j[0] < j[1]][0] # switch_idx = switch_idx[0] xt = xt[switch_idx:, :] # now find the first rollover date of x1 xtf = [i for i, j in enumerate(xt['ContractAtClose_y', list]) if j > 0][0] xtd_switch = xt[xtf, 'Date'] # switch over at the end of this date x1 = x1[x1['Date'] >= xtd_switch, :] x2 = x2[x2['Date'] <= xtd_switch, :] # transform xr = x1[0, 'Close']/x2[x2.shape[0]-1, 'Close'] for i in ['Open', 'High', 'Low', 'Close']: x2[i] = x2[i]*xr for i in ['Volume', 'OpenInterest', 'AggVolume', 'AggOpenInterest']: x2[i] = x2[i]/xr # relabel the rollover date xrc = int(x1[0, 'ContractAtClose']) xro = [int(i) for i in x2['ContractAtClose', list] if i!=0] if xrc in xro: # remove xrc from xro xroi = [i for i, j in enumerate(x2['ContractAtClose', list]) if j==xrc][0] x2[xroi, 'ContractAtClose'] = int(0) # now merge x2 with x1 from the second row of 1 x2.row_bind(x1[1:, :]) del x1['TradedValue'] x1 = x2.copy() # convert to USD if not converted if cur_adj and self.curr != 'USD': xc = _get_curr_to_usd_data(self.curr) xc.set_columns(['Date', 'Curr']) x2 = DataFrame.merge(x1, xc, on='Date') x2 = x2[~np.isnan(x2['Contract'].values), :] for i in ['Contract', 'Date', 'Front', 'LastTrdDate', 'ContractAtClose']: x2[i] = x2[i].astype('int64') # invert the currency x2['Curr'] = 1/x2['Curr'] # replace currency by previous value x2['Curr'] = x2['Curr'].fillna(method='ffill') if trans_deriv: # may be some bug here (?) # assumes daily cash settlement cl_val = x2['Close'].values fx_val = x2['Curr'].values n = cl_val.shape[0] fx_mult = np.empty(n)*np.nan fx_mult[n-1] = 1 fx_mult[0:(n-1)] = fx_val[1:n]/fx_val[0:(n-1)] # fx_mult[0:(n-1)] = 1/(fx_val[0:(n-1)]+(1-fx_val[0:(n-1)])*cl_val[0:(n-1)]/cl_val[1:n]) --- bug fx_mult[0:(n - 1)] = 1 / (fx_mult[0:(n - 1)] + (1 - fx_mult[0:(n - 1)]) * cl_val[0:(n - 1)] / cl_val[1:n]) fx_mult = np.cumprod(fx_mult[::-1])[::-1] # get new values for i in ['Open', 'High', 'Low', 'Close']: x2[i+'_'] = x2[i].values*fx_mult for i in ['Volume', 'OpenInterest', 'AggVolume', 'AggOpenInterest']: x2[i+'_'] = ((x2['Close'].values*x2[i].values)*fx_val)/x2['Close_'].values x2 = x2[['Contract', 'Date', 'Open_', 'High_', 'Low_', 'Close_', 'Volume_', 'OpenInterest_', 'AggVolume_', 'AggOpenInterest_', 'Front', 'LastTrdDate', 'ContractAtClose']] x2.set_columns(['Contract', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'OpenInterest', 'AggVolume', 'AggOpenInterest', 'Front', 'LastTrdDate', 'ContractAtClose']) else: # trans_spot # cl_val = x2['Close'].values fx_val = x2['Curr'].values for i in ['Open', 'High', 'Low', 'Close']: x2[i + '_'] = x2[i].values * fx_val for i in ['Volume', 'OpenInterest', 'AggVolume', 'AggOpenInterest']: x2[i + '_'] = ((x2['Close'].values * x2[i].values) * fx_val) / x2['Close_'].values x2 = x2[['Contract', 'Date', 'Open_', 'High_', 'Low_', 'Close_', 'Volume_', 'OpenInterest_', 'AggVolume_', 'AggOpenInterest_', 'Front', 'LastTrdDate', 'ContractAtClose']] x2.set_columns(['Contract', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'OpenInterest', 'AggVolume', 'AggOpenInterest', 'Front', 'LastTrdDate', 'ContractAtClose']) x1 = x2.copy() if 'TradedValue' in x1.columns: del x1['TradedValue'] return x1
def get_cont_series_roll_basic(tick, n_days, n_months = 0, matur=1, eom=None, hist_st_date=None): # eom = uses n_days after doing a n_months month end, else it is n_months + n_days x = cr_ft_hs.retrieve(tick) # drop data when the AggregateVolume was less than 50 contracts if hist_st_date is not None: x = x[x['Date']>=hist_st_date, :] x['Contract'] = x['Contract'].astype('int64') x['Date'] = x['Date'].astype('int64') x1 = cr_ft_ldt.retrieve(tick) # for each date, retain only the two nearest contracts x.sort(['Date', 'Contract']) y = x.apply(['Contract'], _rank, group_col=['Date']) x['TempCol'] = y.values # print(matur) x = x[x['TempCol']<=(matur+1), :] # merge with te last trading day x2 = DataFrame.merge(x, x1, on='Contract') x2 = x2[~np.isnan(x2['Close'].values), :] x2 = x2[[i for i, j in enumerate(x2['TempCol', list]) if ~np.isnan(j)], :] x2['Contract'] = x2['Contract'].astype('int64') x2['Date'] = x2['Date'].astype('int64') x2['LastTrdDate'] = x2['LastTrdDate'].values.astype('int64') # get the roll dates y = str_to_pd_dt([str(int(i)) for i in x2['LastTrdDate', list]]) if n_months == 0: y = pd_dt_to_str([j-dt.timedelta(days=n_days) for j in y]) else: if eom is None: y = pd_dt_to_str([j-dt.timedelta(days=n_days)-dt2.relativedelta(months=n_months) for j in y]) else: y = pd_dt_to_str([last_day_of_month(j-dt2.relativedelta(months=n_months))-dt.timedelta(days=n_days) for j in y]) y = [int(k) for k in y] # fix actual roll date to be a date that is among the list of dates # or, use calendar to get roll dates in the future yun = list(sorted(set(y))) yact = list(sorted(set(x2['Date', list]))) yact_max = max(yact) yun_alt = [] for i in yun: if (i <= yact_max) and (i not in yact): yun_alt.append(max([j for j in yact if j<i])) else: if i>yact_max: idt = dt.datetime(i // 10000, (i % 10000) // 100, i % 100) if idt.isoweekday() in range(1,6): yun_alt.append(i) else: if idt.isoweekday() == 6: idt = idt - dt.timedelta(days=1) else: # idt.isoweekday() == 7: idt = idt - dt.timedelta(days=2) yun_alt.append(idt.year*10000+idt.month*100+idt.day) else: yun_alt.append(i) # DataFrame(data={'yun': yun, 'yun_alt': yun_alt}) for i, j in enumerate(yun): if j != yun_alt[i]: # print(j, yun_alt[i]) y = [(k if k!= j else yun_alt[i]) for k in y] x2['RollDate'] = y # carry out the rollover x2.sort(['Date', 'Contract']) # for each date, retain only the earlier rollover date y2 = x2.apply(['RollDate'], min, group_col=['Date']) x2['RollDate2'] = y2.values if matur > 1: x2 = x2[x2['TempCol'] >= matur, :] # make a list of all roll dates y3 = list(sorted(set(x2['RollDate2', list]))) # check if there are atleast two rows on each roll dates # otherwise reduce the data set y3n = [] for i in range(0, len(y3)): xt = x2[x2['Date'] == y3[i], :] y3n.append(xt.shape[0]) if y3n[0] == 2: y3nt = 0 else: y3nt = [i for i, j in enumerate(y3n) if j==2] y3nt = y3nt[0]-1 y3u = y3[y3nt:len(y3)] y3uc = x2['RollDate2', list] y3uc = [i for i, j in enumerate(y3uc) if j in y3u] x2 = x2[y3uc, :] # now on the roll dates, retain two contracts # and between the roll dates, retain one contract cont_f = [] cont_s = [] for i in y3u: # i = y3u[76] x2t = x2[x2['Date'] == i, :] if x2t.shape[0] == 0: cont_f.append(0) cont_s.append(0) elif (x2t.shape[0] == 1) and (i != y3u[len(y3u)-1]): # newly added condition x2 = x2[x2['Date'] >= i, :] cont_f.append(0) cont_s.append(x2t['Contract', list][0]) elif (x2t.shape[0] == 1) and (i == y3u[len(y3u)-1]): # newly added statement (to handle dead futures) cont_f.append(x2t['Contract', list][0]) cont_s.append(0) else: x2ti = x2t['Contract', list] cont_f.append(x2ti[0]) cont_s.append(x2ti[1]) cont_def = DataFrame(data={'ARollover': y3u, 'First': cont_f, 'Second': cont_s}) # drop if both 'First' and 'Second' are zero while (cont_def[0, 'First'] == 0) and (cont_def[0, 'Second'] == 0): cont_def = cont_def[1:,:] if cont_def[0, 'Second'] != cont_def[1, 'First']: cont_def[0, 'Second'] = cont_def[1, 'First'] # this happens if a new contract started midway at the beg if cont_def[cont_def.shape[0]-1,'First'] == 0: cont_def = cont_def[0:(cont_def.shape[0]-2), :] # this happens if today is not a rollover date if cont_def[0, 'First'] == 0: cont_def = cont_def[1:(cont_def.shape[0]-1), :] # now built out the concatenated time-series # on the rollover date, sell the first, and buy the second x3 = x2[(x2['Contract'] == cont_def[0,'First']).values & (x2['Date'] <= cont_def[0, 'ARollover']), :] x3['Contract2'] = 0 x3['Contract2'] = x3['Contract2'].astype('int64') for i in range(0, cont_def.shape[0]-1): # i = cont_def.shape[0]-2 x3t = x2[(x2['Contract'] == cont_def[i, 'Second']).values & (x2['Date'] >= cont_def[i, 'ARollover']).values & (x2['Date'] <= cont_def[i+1, 'ARollover']).values, :] # merge x3 and x3t x3r = x3t.head(1)['Close', list][0]/x3.tail(1)['Close', list][0] for j in ['Open', 'High', 'Low', 'Close']: x3[j] = x3[j]*x3r for j in ['Volume', 'OpenInterest', 'AggVolume', 'AggOpenInterest']: x3[j] = x3[j]/x3r x3t['Contract2'] = 0 x3t['Contract2'] = x3t['Contract2'].astype('int64') x3[x3.shape[0]-1, 'Contract2'] = x3t[0, 'Contract'] # drop first row of x3t x3t = x3t[1:, :] # append the two x3.row_bind(x3t) # add the last contract if cont_def[cont_def.shape[0]-1, 'Second'] != 0: # live contract nc = cont_def.shape[0]-1 x3t = x2[(x2['Contract'] == cont_def[nc, 'Second']).values & (x2['Date'] >= cont_def[nc, 'ARollover']).values, :] x3r = x3t.head(1)['Close', list][0] / x3.tail(1)['Close', list][0] for j in ['Open', 'High', 'Low', 'Close']: x3[j] = x3[j] * x3r for j in ['Volume', 'OpenInterest', 'AggVolume', 'AggOpenInterest']: x3[j] = x3[j] / x3r x3t['Contract2'] = 0 x3t['Contract2'] = x3t['Contract2'].astype('int64') x3[x3.shape[0] - 1, 'Contract2'] = x3t[0, 'Contract'] # drop first row of x3t x3t = x3t[1:, :] # append the two x3.row_bind(x3t) x3['Contract2'] = x3['Contract2'].astype('int64') # x3[[i for i, j in enumerate(zip(x3['RollDate', list], x3['RollDate2', list])) if j[0]!=j[1]], :] del x3['RollDate'] del x3['RollDate2'] names = [(i if i != 'TempCol' else 'Front') for i in list(x3.columns)] x3.set_columns(names) names = [(i if i != 'Contract2' else 'ContractAtClose') for i in list(x3.columns)] x3.set_columns(names) return x3