def test_remote_data_service_industry(): from jaqs.data.align import align import pandas as pd ds = RemoteDataService() arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505) df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ') df = df.astype(dtype={'in_date': int}) # df_ann = df.loc[:, ['in_date', 'symbol']] # df_ann = df_ann.set_index(['symbol', 'in_date']) # df_ann = df_ann.unstack(level='symbol') from jaqs.data.dataview import DataView dic_sec = DataView._group_df_to_dict(df, by='symbol') dic_sec = {sec: df.reset_index() for sec, df in dic_sec.viewitems()} df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1) df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1) dates_arr = ds.get_trade_date(20140101, 20170505) res = align(df_value, df_ann, dates_arr) # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date') # df_value = df.pivot(index=None, columns='symbol', values='industry1_code') def align_single_df(df_one_sec): df_value = df_one_sec.loc[:, ['industry1_code']] df_ann = df_one_sec.loc[:, ['in_date']] res = align(df_value, df_ann, dates_arr) return res # res_list = [align_single_df(df) for sec, df in dic_sec.viewitems()] res_list = [align_single_df(df) for sec, df in dic_sec.items()[:10]] res = pd.concat(res_list, axis=1)
def test_align(): # ------------------------------------------------------------------------------------- # input and pre-process demo data ds = RemoteDataService() raw, msg = ds.query_lb_fin_stat('income', '600000.SH', 20151225, 20170501, 'oper_rev') assert msg == '0,' idx_list = ['report_date', 'symbol'] raw_idx = raw.set_index(idx_list) raw_idx.sort_index(axis=0, level=idx_list, inplace=True) df_ann = raw_idx.loc[pd.IndexSlice[:, :], 'ann_date'] df_ann = df_ann.unstack(level=1) df_value = raw_idx.loc[pd.IndexSlice[:, :], 'oper_rev'] df_value = df_value.unstack(level=1) date_arr = ds.get_trade_date(20160101, 20170501) df_close = pd.DataFrame(index=date_arr, columns=df_value.columns, data=1e3) # ------------------------------------------------------------------------------------- # demo usage of parser parser = Parser() parser.register_function( 'Myfunc', lambda x: x * 0 + 1) # simultaneously test register function and align expr_formula = 'revenue / Myfunc(close)' expression = parser.parse(expr_formula) for i in range(100): df_res = parser.evaluate({ 'revenue': df_value, 'close': df_close }, df_ann, date_arr) # ------------------------------------------------------------------------------------- sec = '600000.SH' """ # print to validate results print "\n======Expression Formula:\n{:s}".format(expr_formula) print "\n======Report date, ann_date and evaluation value:" tmp = pd.concat([df_ann.loc[:, sec], df_value.loc[:, sec]], axis=1) tmp.columns = ['df_ann', 'df_value'] print tmp print "\n======Selection of result of expansion:" print "20161028 {:.4f}".format(df_res.loc[20161028, sec]) print "20161031 {:.4f}".format(df_res.loc[20161031, sec]) print "20170427 {:.4f}".format(df_res.loc[20170427, sec]) """ assert abs(df_res.loc[20161028, sec] - 82172000000) < 1 assert abs(df_res.loc[20161031, sec] - 120928000000) < 1 assert abs(df_res.loc[20170427, sec] - 42360000000) < 1