def test_save_dataview(): ds = RemoteDataService() ds.init_from_config() dv = DataView() props = { 'start_date': 20170101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': ('float_mv,pb,pe_ttm,sw2'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'GroupQuantile(-float_mv, sw2, 10)' dv.add_formula('rank_mv', factor_formula, is_quarterly=False) factor_formula = 'GroupQuantile(If(pb >= 0.2, pb, 100), sw2, 10)' dv.add_formula('rank_pb', factor_formula, is_quarterly=False) factor_formula = 'GroupQuantile(If(pe_ttm >= 3, pe_ttm, 9999.0), sw2, 10)' dv.add_formula('rank_pe', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config() dv = DataView() props = {'start_date': 20150101, 'end_date': 20170930, 'universe': '000905.SH', 'fields': ('tot_cur_assets,tot_cur_liab,inventories,pre_pay,deferred_exp,' 'eps_basic,ebit,pe,pb,float_mv,sw1'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'pe < 30' dv.add_formula('pe_condition', factor_formula, is_quarterly=False) factor_formula = 'pb < 3' dv.add_formula('pb_condition', factor_formula, is_quarterly=False) factor_formula = 'Return(eps_basic, 4) > 0' dv.add_formula('eps_condition', factor_formula, is_quarterly=True) factor_formula = 'Return(ebit, 4) > 0' dv.add_formula('ebit_condition', factor_formula, is_quarterly=True) factor_formula = 'tot_cur_assets/tot_cur_liab > 2' dv.add_formula('current_condition', factor_formula, is_quarterly=True) factor_formula = '(tot_cur_assets - inventories - pre_pay - deferred_exp)/tot_cur_liab > 1' dv.add_formula('quick_condition', factor_formula, is_quarterly=True) dv.add_formula('mv_rank', 'Rank(float_mv)', is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def save_dataview(): ds = RemoteDataService() ds.init_from_config() dv = DataView() props = { 'start_date': 20140101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('random', 'StdDev(volume, 20)', is_quarterly=False) dv.add_formula('momentum', 'Return(close_adj, 20)', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def test_remote_data_service_components(): ds = RemoteDataService() res = ds.get_index_comp_df(index='000300.SH', start_date=20140101, end_date=20170505) assert res.shape == (814, 430) arr = ds.get_index_comp(index='000300.SH', start_date=20140101, end_date=20170505) assert len(arr) == 430
def test_write(): ds = RemoteDataService() ds.init_from_config() dv = DataView() secs = '600030.SH,000063.SZ,000001.SZ' props = { 'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 'fields': 'open,close,high,low,volume,pb,net_assets,pcf_ncf', 'freq': 1 } dv.init_from_config(props, data_api=ds) dv.prepare_data() assert dv.data_d.shape == (281, 48) assert dv.dates.shape == (281, ) # TODO """ PerformanceWarning: your performance may suffer as PyTables will pickle object types that it cannot map directly to c-types [inferred_type->mixed,key->block1_values] [items->[('000001.SZ', 'int_income'), ('000001.SZ', 'less_handling_chrg_comm_exp'), ('000001.SZ', 'net_int_income'), ('000001.SZ', 'oper_exp'), ('000001.SZ', 'symbol'), ('000063.SZ', 'int_income'), ('000063.SZ', 'less_handling_chrg_comm_exp'), ('000063.SZ', 'net_int_income'), ('000063.SZ', 'oper_exp'), ('000063.SZ', 'symbol'), ('600030.SH', 'int_income'), ('600030.SH', 'less_handling_chrg_comm_exp'), ('600030.SH', 'net_int_income'), ('600030.SH', 'oper_exp'), ('600030.SH', 'symbol')]] """ dv.save_dataview(folder_path=daily_path)
def save_dataview(): ds = RemoteDataService() ds.init_from_config() dv = DataView() props = {'start_date': 20150101, 'end_date': 20170930, 'universe': '000905.SH', 'fields': ('turnover,float_mv,close_adj,pe,pb'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'Cutoff(Standardize(turnover / 10000 / float_mv), 2)' dv.add_formula('TO', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(1/pb), 2)' dv.add_formula('BP', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(Return(close_adj, 20)), 2)' dv.add_formula('REVS20', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(Log(float_mv)), 2)' dv.add_formula('float_mv_factor', factor_formula, is_quarterly=False) factor_formula = 'Delay(Return(close_adj, 1), -1)' dv.add_formula('NextRet', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_remote_data_service_inst_info(): ds = RemoteDataService() res, msg = ds.query_inst_info('000001.SZ', fields='status,selllot,buylot,pricetick,multiplier,product') assert res.loc[0, 'multiplier'] == 1 assert abs(res.loc[0, 'pricetick'] - 0.01) < 1e-2 assert res.loc[0, 'buylot'] == 100
def test_remote_data_service_industry(): from jaqs.data.align import align import pandas as pd ds = RemoteDataService() arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505) df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ') df = df.astype(dtype={'in_date': int}) # df_ann = df.loc[:, ['in_date', 'symbol']] # df_ann = df_ann.set_index(['symbol', 'in_date']) # df_ann = df_ann.unstack(level='symbol') from jaqs.data.dataview import DataView dic_sec = DataView._group_df_to_dict(df, by='symbol') dic_sec = {sec: df.reset_index() for sec, df in dic_sec.viewitems()} df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1) df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1) dates_arr = ds.get_trade_date(20140101, 20170505) res = align(df_value, df_ann, dates_arr) # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date') # df_value = df.pivot(index=None, columns='symbol', values='industry1_code') def align_single_df(df_one_sec): df_value = df_one_sec.loc[:, ['industry1_code']] df_ann = df_one_sec.loc[:, ['in_date']] res = align(df_value, df_ann, dates_arr) return res # res_list = [align_single_df(df) for sec, df in dic_sec.viewitems()] res_list = [align_single_df(df) for sec, df in dic_sec.items()[:10]] res = pd.concat(res_list, axis=1)
def save_dataview(): ds = RemoteDataService() ds.init_from_config() dv = DataView() props = { 'start_date': 20170101, 'end_date': 20171030, 'universe': '000300.SH', 'fields': ( 'open,high,low,close,vwap,volume,turnover,sw1,' # + 'pb,net_assets,' + 'eps_basic,total_mv,tot_profit,int_income'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'close >= Delay(Ts_Max(close, 20), 1)' # 20 days new high factor_name = 'new_high' dv.add_formula(factor_name, factor_formula, is_quarterly=False) dv.add_formula('total_profit_growth', formula='Return(tot_profit, 4)', is_quarterly=True) dv.save_dataview(folder_path=dataview_dir_path)
def __init__(self, inst_type="", symbol="", data_api=None): if data_api is None: self.data_api = RemoteDataService() else: self.data_api = data_api self.inst_map = {} self.load_instruments(inst_type=inst_type, symbol=symbol)
def test_remote_data_service_fin_indicator(): ds = RemoteDataService() symbol = '000008.SZ' filter_argument = ds._dic2url({'symbol': symbol}) df_raw, msg = ds.query("lb.finIndicator", fields="", filter=filter_argument, orderby="symbol")
def test_remote_data_service_daily_quited(): ds = RemoteDataService() # test daily res, msg = ds.daily('600832.SH', fields="", start_date=20140828, end_date=20170831, adjust_mode=None) assert msg == '0,' assert res.shape == (175, 13)
def analyze(): ta = ana.EventAnalyzer() ds = RemoteDataService() ds.init_from_config() ta.initialize(data_server_=ds, file_folder=result_dir_path) ta.do_analyze(result_dir=result_dir_path, selected_sec=[])
def test_remote_data_service_adj_factor(): ds = RemoteDataService() arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505) symbol_arr = ','.join(arr) res = ds.get_adj_factor_daily(symbol_arr, start_date=20130101, end_date=20170101, div=False) assert abs(res.loc[20160408, '300024.SZ'] - 10.735) < 1e-3 assert abs(res.loc[20160412, '300024.SZ'] - 23.658) < 1e-3 assert res.isnull().sum().sum() == 0
def test_add_field(): dv = DataView() dv.load_dataview(folder_path=daily_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) ds = RemoteDataService() ds.init_from_config() dv.add_field('total_share', ds) assert dv.data_d.shape == (nrows, ncols + 1 * n_securities)
def test_remote_data_service_inst_info(): ds = RemoteDataService() ds.init_from_config() sec = '000001.SZ' res = ds.query_inst_info( sec, fields='status,selllot,buylot,pricetick,multiplier,product') assert res.at[sec, 'multiplier'] == 1 assert abs(res.at[sec, 'pricetick'] - 0.01) < 1e-2 assert res.at[sec, 'buylot'] == 100
def test_align(): # ------------------------------------------------------------------------------------- # input and pre-process demo data ds = RemoteDataService() raw, msg = ds.query_lb_fin_stat('income', '600000.SH', 20151225, 20170501, 'oper_rev') assert msg == '0,' idx_list = ['report_date', 'symbol'] raw_idx = raw.set_index(idx_list) raw_idx.sort_index(axis=0, level=idx_list, inplace=True) df_ann = raw_idx.loc[pd.IndexSlice[:, :], 'ann_date'] df_ann = df_ann.unstack(level=1) df_value = raw_idx.loc[pd.IndexSlice[:, :], 'oper_rev'] df_value = df_value.unstack(level=1) date_arr = ds.get_trade_date(20160101, 20170501) df_close = pd.DataFrame(index=date_arr, columns=df_value.columns, data=1e3) # ------------------------------------------------------------------------------------- # demo usage of parser parser = Parser() parser.register_function( 'Myfunc', lambda x: x * 0 + 1) # simultaneously test register function and align expr_formula = 'revenue / Myfunc(close)' expression = parser.parse(expr_formula) for i in range(100): df_res = parser.evaluate({ 'revenue': df_value, 'close': df_close }, df_ann, date_arr) # ------------------------------------------------------------------------------------- sec = '600000.SH' """ # print to validate results print "\n======Expression Formula:\n{:s}".format(expr_formula) print "\n======Report date, ann_date and evaluation value:" tmp = pd.concat([df_ann.loc[:, sec], df_value.loc[:, sec]], axis=1) tmp.columns = ['df_ann', 'df_value'] print tmp print "\n======Selection of result of expansion:" print "20161028 {:.4f}".format(df_res.loc[20161028, sec]) print "20161031 {:.4f}".format(df_res.loc[20161031, sec]) print "20170427 {:.4f}".format(df_res.loc[20170427, sec]) """ assert abs(df_res.loc[20161028, sec] - 82172000000) < 1 assert abs(df_res.loc[20161031, sec] - 120928000000) < 1 assert abs(df_res.loc[20170427, sec] - 42360000000) < 1
def test_remote_data_service_daily_ind_performance(): ds = RemoteDataService() hs300 = ds.get_index_comp('000300.SH', 20140101, 20170101) hs300_str = ','.join(hs300) fields = "pb,pe,share_float_free,net_assets,limit_status" res, msg = ds.query("lb.secDailyIndicator", fields=fields, filter=("symbol=" + hs300_str + "&start_date=20160907&end_date=20170907"), orderby="trade_date") assert msg == '0,'
def test_bar(): from jaqs.data.dataservice import RemoteDataService from jaqs.trade.common import QUOTE_TYPE ds = RemoteDataService() df_quotes, msg = ds.bar(symbol='rb1710.SHF,hc1710.SHF', start_time=200000, end_time=160000, trade_date=20170704, freq=QUOTE_TYPE.MIN) for i in range(100): quotes_list = Bar.create_from_df(df_quotes)
def connect(self, db_config): self.ds_props = { 'remote.data.address': db_config['addr'], 'remote.data.username': db_config['user'], 'remote.data.password': db_config['password'], "timeout": 600 } try: ds = RemoteDataService() ds.init_from_config(self.ds_props) self.conn = ds except: raise ValueError('数据库连接失败,请检查配置信息是否正确')
def test_q_add_field(): dv = DataView() dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_q.shape n_securities = len(dv.data_d.columns.levels[0]) ds = RemoteDataService() ds.init_from_config() dv.add_field('net_inc_other_ops', ds) """ dv.add_field('oper_rev', ds) dv.add_field('turnover', ds) """ assert dv.data_q.shape == (nrows, ncols + 1 * n_securities)
def my_globals(request): ds = RemoteDataService() df, msg = ds.daily("000001.SH, 600030.SH, 000300.SH", start_date=20170801, end_date=20170820, fields="open,high,low,close,vwap,preclose") multi_index_names = ['trade_date', 'symbol'] df_multi = df.set_index(multi_index_names, drop=False) df_multi.sort_index(axis=0, level=multi_index_names, inplace=True) dfx = df_multi.loc[pd.IndexSlice[:, :], pd.IndexSlice['close']].unstack() dfy = df_multi.loc[pd.IndexSlice[:, :], pd.IndexSlice['open']].unstack() parser = Parser() request.function.func_globals.update({'parser': parser, 'dfx': dfx, 'dfy': dfy})
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config() dv = DataView() props = {'start_date': 20170201, 'end_date': 20171001, 'universe': '000300.SH', 'fields': ('float_mv,sw2,sw1'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'GroupQuantile(float_mv, sw1, 10)' dv.add_formula('gq30', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_remote_data_service_bar(): ds = RemoteDataService() # test bar res2, msg2 = ds.bar('rb1710.SHF,600662.SH', start_time=200000, end_time=160000, trade_date=20170831, fields="") assert msg2 == '0,' rb2 = res2.loc[res2.loc[:, 'symbol'] == 'rb1710.SHF', :] stk2 = res2.loc[res2.loc[:, 'symbol'] == '600662.SH', :] assert set(rb2.columns) == {u'close', u'code', u'date', u'freq', u'high', u'low', u'oi', u'open', u'settle', u'symbol', u'time', u'trade_date', u'turnover', u'volume', u'vwap'} assert abs(rb2.loc[:, 'settle'].values[0] - 0.0) < 1e-3 assert rb2.shape == (345, 15) assert stk2.shape == (240, 15) assert rb2.loc[:, 'volume'].values[344] == 3366
def test_remote_data_service_daily(): ds = RemoteDataService() # test daily res, msg = ds.daily('rb1710.SHF,600662.SH', fields="", start_date=20170828, end_date=20170831, adjust_mode=None) assert msg == '0,' rb = res.loc[res.loc[:, 'symbol'] == 'rb1710.SHF', :] stk = res.loc[res.loc[:, 'symbol'] == '600662.SH', :] assert set(rb.columns) == {'close', 'code', 'high', 'low', 'oi', 'open', 'settle', 'symbol', 'trade_date', 'trade_status', 'turnover', 'volume', 'vwap'} assert rb.shape == (4, 13) assert rb.loc[:, 'volume'].values[0] == 189616 assert stk.loc[:, 'volume'].values[0] == 7174813
def save_dataview(sub_folder='test_dataview'): ds = RemoteDataService() dv = DataView() props = { 'start_date': 20141114, 'end_date': 20160327, 'universe': '000300.SH', 'fields': ( 'open,high,low,close,vwap,volume,turnover,' # + 'pb,net_assets,' + 's_fa_eps_basic,oper_exp,tot_profit,int_income'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'close > Ts_Max(close, 20)' # 20 days new high factor_name = 'new_high' dv.add_formula(factor_name, factor_formula, is_quarterly=False) dv.save_dataview( folder_path=fileio.join_relative_path('../output/prepared'), sub_folder=sub_folder)
def test_double_ma(): prop_file_path = fileio.join_relative_path("etc/backtest.json") print prop_file_path prop_file = open(prop_file_path, 'r') props = json.load(prop_file) enum_props = {'bar_type': common.QUOTE_TYPE} for k, v in enum_props.iteritems(): props[k] = v.to_enum(props[k]) # strategy = CtaStrategy() strategy = DoubleMaStrategy() gateway = BarSimulatorGateway() data_service = RemoteDataService() context = model.Context() context.register_data_api(data_service) context.register_gateway(gateway) context.register_trade_api(gateway) backtest = EventBacktestInstance() backtest.init_from_config(props, strategy, context=context) # trade.run() backtest.run() report = backtest.generate_report(output_format="")
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() start_date = get_index_basic_information()[2] end_date = get_index_basic_information()[3] props = { 'universe': index, 'start_date': start_date, 'end_date': end_date, 'fields': fields, 'freq': 1 } dv.init_from_config(props, data_api=ds) dv.prepare_data() dv.save_dataview(folder_path=dataview_dir_path)
def test_remote_data_service_industry_df(): from jaqs.data.dataservice import Calendar cal = Calendar() ds = RemoteDataService() ds.init_from_config() arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505) symbol_arr = ','.join(arr) sec = '000008.SZ' type_ = 'ZZ' df_raw = ds.get_industry_raw(symbol=sec, type_=type_) df = ds.get_industry_daily(symbol=symbol_arr, start_date=df_raw['in_date'].min(), end_date=20170505, type_=type_, level=1) for idx, row in df_raw.iterrows(): in_date = row['in_date'] value = row['industry1_code'] if in_date in df.index: assert df.loc[in_date, sec] == value else: idx = cal.get_next_trade_date(in_date) assert df.loc[idx, sec] == value
def test_add_formula_directly(): ds = RemoteDataService() ds.init_from_config() dv = DataView() secs = '600030.SH,000063.SZ,000001.SZ' props = { 'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 'fields': 'open,close', 'freq': 1 } dv.init_from_config(props, data_api=ds) dv.prepare_data() dv.add_formula("myfactor", 'close / open', is_quarterly=False) assert dv.data_d.shape == (281, 39)