def store_ic_weight(): """ Calculate IC weight and save it to file """ dv = DataView() dv.load_dataview(folder_path=dataview_dir_path) factorList = ['TO', 'BP', 'REVS20', 'float_mv_factor'] orthFactor_dic = {} for factor in factorList: orthFactor_dic[factor] = {} # add the orthogonalized factor to dataview for trade_date in dv.dates: snapshot = dv.get_snapshot(trade_date) factorPanel = snapshot[factorList] factorPanel = factorPanel.dropna() if len(factorPanel) != 0: orthfactorPanel = Schmidt(factorPanel) orthfactorPanel.columns = [x + '_adj' for x in factorList] snapshot = pd.merge(left=snapshot, right=orthfactorPanel, left_index=True, right_index=True, how='left') for factor in factorList: orthFactor_dic[factor][trade_date] = snapshot[factor] for factor in factorList: dv.append_df(pd.DataFrame(orthFactor_dic[factor]).T, field_name=factor + '_adj', is_quarterly=False) dv.save_dataview(dataview_dir_path) factorList_adj = [x + '_adj' for x in factorList] jutil.save_json(factorList_adj, custom_data_path) w = get_ic_weight(dv) store = pd.HDFStore(ic_weight_hd5_path) store['ic_weight'] = w store.close()
def test_load(): dv = DataView() dv.load_dataview(folder_path=daily_path) assert dv.start_date == 20160601 and set(dv.symbol) == set('000001.SZ,600030.SH,000063.SZ'.split(',')) # test get_snapshot snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') assert snap1.shape == (2, 2) assert set(snap1.columns.values) == {'close', 'pb'} assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} # test get_ts ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) assert ts1.shape == (38, 2) assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} assert ts1.index.values[-1] == 20170302
def test_load(): dv = DataView() dv.load_dataview(folder_path=daily_path) assert dv.start_date == 20160601 and set(dv.symbol) == set( '000001.SZ,600030.SH,000063.SZ'.split(',')) # test get_snapshot snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') assert snap1.shape == (2, 2) assert set(snap1.columns.values) == {'close', 'pb'} assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} # test get_ts ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) assert ts1.shape == (38, 2) assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} assert ts1.index.values[-1] == 20170302
#1_读取本地数据 import jaqs_fxdayu jaqs_fxdayu.patch_all() #新加 from jaqs.data import DataView from jaqs.data import RemoteDataService import os import numpy as np import warnings warnings.filterwarnings("ignore") dv = DataView() dataview_folder = 'G:/data/hs300' #文件地址 dv.load_dataview(dataview_folder) #读取dataview_folder #2_读取索引为股票代号的数据 get_snapshot print(dv.get_snapshot(20170504, symbol='600036.SH,600050.SH', fields='')) print( dv.get_snapshot(20170504, symbol='600036.SH,600050.SH', fields='close_adj')) #20170504表示时间,symbol='600036.SH,600050.SH'表示股票(可以添加),fields=''表示因子,若不输则会返回全部的因子 #3_读取时间序列数据 get_ts data1 = dv.get_ts('pb') #返回的是一个DataFrame格式的数据(包含沪深300全部),pb表示平均市净率 print(dv.get_ts('pb').head()) #4_添加自定义算法数据 add_formul roe_pb = dv.add_formula('roe_pb', 'roe/pb', is_quarterly=False, add_data=True) #'roe_pb'表示算法的新名称,'roe/pb'为公式,is_quarterly=False代表是否为季度数据 print(dv.get_ts('roe_pb').head()) #这里用get_ts的方法输入新的名称即可 #5_从数据服务添加新数据至本地
dataview_props = { # Start and end date of back-test 'start_date': start_data, 'end_date': cur_data, # Investment universe and performance benchmark 'universe': UNIVERSE, 'benchmark': '000300.SH', # Data fields that we need 'fields': 'open,close,high,low', # freq = 1 means we use daily data. Please do not change this. 'freq': 1 } ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() dv.init_from_config(dataview_props, ds) dv.prepare_data() # dv.add_formula('tingpai', 'trade_status != \"停牌\"', is_quarterly=False) dv.add_formula('is_yang', '(close > open) && ((close - open) / (high -low) >= 0.4)', is_quarterly=False) dv.add_formula('three_yang', 'is_yang && Delay(is_yang, 1) && Delay(is_yang, 2)', is_quarterly=False) df = dv.get_snapshot(cur_data) # df[df['three_yang'] == 1.0] print(df[df['three_yang'] == 1.0])