def test_load(): dv = DataView() folder_path = '../output/prepared/20160601_20170601_freq=1D' dv.load_dataview(folder=folder_path) assert dv.start_date == 20160601 and set(dv.symbol) == set('000001.SZ,600030.SH,000063.SZ'.split(',')) # test get_snapshot snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') assert snap1.shape == (2, 2) assert set(snap1.columns.values) == {'close', 'pb'} assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} # test get_ts ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) assert ts1.shape == (38, 2) assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} assert ts1.index.values[-1] == 20170302
def store_ic_weight(): """ Calculate IC weight and save it to file """ dv = DataView() dv.load_dataview(folder_path=dataview_dir_path) factorList = ['TO', 'BP', 'REVS20', 'float_mv_factor'] orthFactor_dic = {} for factor in factorList: orthFactor_dic[factor] = {} # add the orthogonalized factor to dataview for trade_date in dv.dates: snapshot = dv.get_snapshot(trade_date) factorPanel = snapshot[factorList] factorPanel = factorPanel.dropna() if len(factorPanel) != 0: orthfactorPanel = Schmidt(factorPanel) orthfactorPanel.columns = [x + '_adj' for x in factorList] snapshot = pd.merge(left=snapshot, right=orthfactorPanel, left_index=True, right_index=True, how='left') for factor in factorList: orthFactor_dic[factor][trade_date] = snapshot[factor] for factor in factorList: dv.append_df(pd.DataFrame(orthFactor_dic[factor]).T, field_name=factor + '_adj', is_quarterly=False) dv.save_dataview(dataview_dir_path) factorList_adj = [x + '_adj' for x in factorList] fileio.save_json(factorList_adj, custom_data_path) w = get_ic_weight(dv) store = pd.HDFStore(ic_weight_hd5_path) store['ic_weight'] = w store.close()
def calculate_pe_pb_of_index_single_day(date): dv = DataView() dv.load_dataview(folder_path=dataview_dir_path) # 计算指数pe和pb的中位数、等权数 data = dv.get_snapshot(date, symbol='', fields='pe_ttm,pb') # 判断数据质量,如果非nan数据占比超过2%,则抛出异常 if len(data.dropna(how='any')) / len(data) <= 0.98: raise Exception('Nan of Data is too much.') else: data.dropna(how='any', inplace=True) # 计算成分股个数 N = len(data) # 计算中位数,以倒数排序可以去掉负数的影响 pe_median = 1 / ((1 / data['pe_ttm']).median()) pb_median = 1 / ((1 / data['pb']).quantile(0.5)) # 计算等权,即调和平均数 pe_equal = N / (1 / data['pe_ttm']).sum() pb_equal = N / (1 / data['pb']).sum() print(data) print(date, pe_median, pe_equal, pb_median, pb_equal) return (date, pe_median, pe_equal, pb_median, pb_equal)