def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() trade_status = dv.get_ts('trade_status') mask_sus = trade_status == '停牌' dv.append_df(mask_sus, 'suspended', is_quarterly=False) dv.add_formula('not_index_member', '!index_member', is_quarterly=False) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20160101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('mask_limit_reached', 'limit_reached > 0', is_quarterly=False) dv.add_formula('mask_index_member', '!(index_member > 0)', is_quarterly=False) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' dv.append_df(mask_sus, 'mask_sus', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20160101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('mask_limit_reached', 'limit_reached > 0', is_quarterly=False) dv.add_formula('mask_index_member', '!(index_member > 0)', is_quarterly=False) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' dv.append_df(mask_sus, 'mask_sus', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def store_ic_weight(): """ Calculate IC weight and save it to file """ dv = DataView() dv.load_dataview(folder_path=dataview_dir_path) factorList = ['TO', 'BP', 'REVS20', 'float_mv_factor'] orthFactor_dic = {} for factor in factorList: orthFactor_dic[factor] = {} # add the orthogonalized factor to dataview for trade_date in dv.dates: snapshot = dv.get_snapshot(trade_date) factorPanel = snapshot[factorList] factorPanel = factorPanel.dropna() if len(factorPanel) != 0: orthfactorPanel = Schmidt(factorPanel) orthfactorPanel.columns = [x + '_adj' for x in factorList] snapshot = pd.merge(left=snapshot, right=orthfactorPanel, left_index=True, right_index=True, how='left') for factor in factorList: orthFactor_dic[factor][trade_date] = snapshot[factor] for factor in factorList: dv.append_df(pd.DataFrame(orthFactor_dic[factor]).T, field_name=factor + '_adj', is_quarterly=False) dv.save_dataview(dataview_dir_path) factorList_adj = [x + '_adj' for x in factorList] jutil.save_json(factorList_adj, custom_data_path) w = get_ic_weight(dv) store = pd.HDFStore(ic_weight_hd5_path) store['ic_weight'] = w store.close()
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() trade_status = dv.get_ts('trade_status') mask_sus = trade_status == '停牌' dv.append_df(mask_sus, 'suspended', is_quarterly=False) dv.add_formula('not_index_member', '!index_member', is_quarterly=False) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.save_dataview(dataview_folder)
def test_DIY_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # 方法1:add_formula 基于dataview里已有的字段,通过表达式定义因子 dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) # 方法2: append_df 构造一个因子表格(pandas.Dataframe),直接添加到dataview当中 import pandas as pd import talib as ta close = dv.get_ts("close_adj").dropna(how='all', axis=1) slope_df = pd.DataFrame( { sec_symbol: -ta.LINEARREG_SLOPE(value.values, 10) for sec_symbol, value in close.iteritems() }, index=close.index) dv.append_df(slope_df, 'slope') dv.get_ts("slope") # 定义事件 from jaqs_fxdayu.research.signaldigger import process Open = dv.get_ts("open_adj") High = dv.get_ts("high_adj") Low = dv.get_ts("low_adj") Close = dv.get_ts("close_adj") trade_status = dv.get_ts('trade_status') mask_sus = trade_status != 1 # 剔除掉停牌期的数据 再计算指标 open_masked = process._mask_df(Open, mask=mask_sus) high_masked = process._mask_df(High, mask=mask_sus) low_masked = process._mask_df(Low, mask=mask_sus) close_masked = process._mask_df(Close, mask=mask_sus) from jaqs_fxdayu.data import signal_function_mod as sfm MA5 = sfm.ta(ta_method='MA', ta_column=0, Open=open_masked, High=high_masked, Low=low_masked, Close=close_masked, Volume=None, timeperiod=10) MA10 = sfm.ta('MA', Close=close_masked, timeperiod=10) dv.append_df(MA5, 'MA5') dv.append_df(MA10, 'MA10') dv.add_formula("Cross", "(MA5>=MA10)&&(Delay(MA5<MA10, 1))", is_quarterly=False, add_data=True)
if X.isnull().sum() != X.shape[0]: X = sm.add_constant(X) model = OLS(Y, X, missing='drop') results = model.fit() res = results.resid.iloc[-1] new_dd.iloc[-1] = res return new_dd else: return T return close.apply(reg2, axis=0) # In[4]: dv.append_df(GetResidual(), 'R') dv.append_df(alpha32_.run_formula(dv), 'alpha32_') dv.append_df(alpha42_.run_formula(dv), 'alpha42_') dv.append_df(alpha56_.run_formula(dv), 'alpha56_') dv.append_df(alpha62_.run_formula(dv), 'alpha62_') dv.append_df(alpha64_.run_formula(dv), 'alpha64_') dv.append_df(alpha194.run_formula(dv), 'alpha194') dv.append_df(alpha195.run_formula(dv), 'alpha195') dv.append_df(alpha197.run_formula(dv), 'alpha197') dv.append_df(Beta3.run_formula(dv), 'Beta3') # In[6]: id_zz500 = dp.daily_index_cons(api, "000300.SH", start, end) id_hs300 = dp.daily_index_cons(api, "000905.SH", start, end)
#因子编写 #gainvariance120 def cal_positive(df): return df[df > 0] pct_return = cal_positive(dv.get_ts('close').pct_change()) temp = pd.DataFrame( { name: value.dropna().rolling(120).std()**2 for name, value in pct_return.iteritems() }, index=pct_return.index).fillna(method='ffill') gainvariance120 = dv.append_df(temp, 'gainvariance120') #alpha46 def mean(df, day): return df.rolling(window=day, center=False).mean() alpha46 = dv.add_formula( 'alpha46', "(mean(close,3)+mean(close,6)+mean(close,12)+mean(close,24))/(4*close)", is_quarterly=False, add_data=True, register_funcs={"mean": mean})
sw1 = dv.get_ts('sw1') dict_classify = {'480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融', '370000': '医药生物', '710000': '计算机', '280000': '汽车', '340000': '食品饮料', '220000': '化工', '210000': '采掘', '230000': '钢铁', '650000': '国防军工', '110000': '农林牧渔', '420000': '交通运输', '620000': '建筑装饰', '350000': '纺织服装', '610000': '建筑材料', '360000': '轻工制造'} sw1_name = sw1.replace(dict_classify) # In[76]: pm = dv.add_formula('pm','tot_profit/float_mv',is_quarterly=False,add_data=True) ETOP = dv.add_formula('ETOP','tot_profit/total_mv',is_quarterly=False,add_data=True) roa = dv.add_formula('roa','roa',is_quarterly=True,add_data=True) roe = dv.add_formula('roe','roe',is_quarterly=True,add_data=True) import alpha32_,alpha194,alpha195,alpha42_,alpha62_,alpha64_,alpha197,alpha211,alpha56_ dv.append_df(alpha32_.run_formula(dv),'alpha32_') dv.append_df(alpha194_.run_formula(dv),'alpha194_') dv.append_df(alpha195_.run_formula(dv),'alpha195_') dv.append_df(alpha42_.run_formula(dv),'alpha42_') dv.append_df(alpha62_.run_formula(dv),'alpha62_') dv.append_df(alpha64_.run_formula(dv),'alpha64_') dv.append_df(alpha197_.run_formula(dv),'alpha197_') dv.append_df(alpha211_.run_formula(dv),'alpha211_') dv.append_df(alpha56_.run_formula(dv),'alpha56_') factor_lis = ['alpha32_','alpha42_','alpha56_','alpha62_','alpha64_','alpha194','alpha195','alpha197','alpha211','pb','pe','roa','roe','pm','ETOP'] for each in factor_lis: assert(each in dv.fields) factors = {name:dv.get_ts(name) for name in factor_lis}