def get_(folder): # step 2 因子数据预处理 # 加载dataview数据集 dv = DataView() dataview_folder = folder dv.load_dataview(dataview_folder) # 定义信号过滤条件-非指数成分 # df_index_member = dv.get_ts('index_member') signal = dv.get_ts("pb") price = dv.get_ts("close_adj") group = dv.get_ts("sw1") # mask mask = dv.get_ts('index_member') == 0 # 定义信号过滤条件-非指数成分 # 定义可买入卖出条件——未停牌、未涨跌停 trade_status = dv.get_ts('trade_status') can_trade = trade_status == 1 # 可以交易 # 涨停 up_limit = dv.add_formula( 'up_limit', '(open - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False) # 跌停 down_limit = dv.add_formula( 'down_limit', '(open - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False) can_enter = np.logical_and(up_limit < 1, can_trade) # 未涨停未停牌 can_exit = np.logical_and(down_limit < 1, can_trade) # 未跌停未停牌 benchmark = dv.data_benchmark return signal, price, mask, group, can_enter, can_exit, benchmark
def test_q_add_formula(): dv = DataView() folder_path = '../output/prepared/20160609_20170601_freq=1D' dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'total_oper_rev / close' dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True) df1 = dv.get_ts('myvar1') assert not df1.empty formula2 = 'Delta(oper_exp * myvar1 - open, 3)' dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True) df2 = dv.get_ts('myvar2') assert not df2.empty
def test_load(): dv = DataView() dv.load_dataview(folder_path=daily_path) assert dv.start_date == 20160601 and set(dv.symbol) == set( '000001.SZ,600030.SH,000063.SZ'.split(',')) # test get_snapshot snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') assert snap1.shape == (2, 2) assert set(snap1.columns.values) == {'close', 'pb'} assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} # test get_ts ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) assert ts1.shape == (38, 2) assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} assert ts1.index.values[-1] == 20170302
'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载 save_dataview() # 加载数据 dv = DataView() dv.load_dataview(dataview_folder) print(dv.fields) #查看dv中取得的数据 print(dv.get_ts("pb").head()) #查看dv中取得的市净率 #3_因子分析 #step1:因子收集及处理 import pandas as pd from datetime import datetime factor = dv.get_ts("pb") factor.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), factor.index)) # 改时间索引: 将整数类型的日期转成datetime的格式的日期datetime.strptime (注意pd.Index,I要大写) factor = factor.stack() #修改成Mutiindex格式(Alphalen因子分析必要) 得到一窜Series #这里先定义一个函数,方便下次的转换 def change_index(df):
#1_初始化 from jaqs_fxdayu.data import DataView import warnings warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' dv = DataView() dv.load_dataview(dataview_folder) dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) #直接使用内置的函数,添加新因子,可能之前已经添加过了 dv.get_ts("momentum").head() print(dv.fields) #查看dv中取得的数据 import numpy as np #定义过滤条件 def mask_index_member(): df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) #定义信号过滤条件-非指数成分 return mask_index_member def limit_up_down(): # 定义可买卖条件——未停牌、未涨跌停 trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' # 涨停 dv.add_formula('up_limit',
trade_status = dv.get_ts('trade_status') mask_sus = trade_status != 1 # 涨停 dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False, add_data=True) # 跌停 dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False, add_data=True) can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌 can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌 return can_enter,can_exit mask = mask_index_member() can_enter,can_exit = limit_up_down() print(mask.shape) print(can_enter.shape) print(dv.get_ts('close').shape) from jaqs_fxdayu.research.signaldigger import multi_factor ic = dict() factors_dict = {signal:dv.get_ts(signal) for signal in FactorList} Period=[20] for period in Period: ic[period]=multi_factor.get_factors_ic_df(factors_dict, price=dv.get_ts("close_adj"), high=dv.get_ts("high_adj"), # 可为空 low=dv.get_ts("low_adj"),# 可为空 n_quantiles=5,# quantile分类数 mask=mask,# 过滤条件 can_enter=can_enter,# 是否能进场 can_exit=can_exit,# 是否能出场
FactorList = set({}) for i in range(df.loc[:, :, 'name'].T.values.shape[0]): FactorList |= set(df.loc[:, :, 'name'].T.values[i]) FactorList = list(FactorList) i = 0 for name in FactorList: dv.add_field(name) i = i + 1 print('导入进度%s' % (i / len(FactorList))) print('总体进度%s' % (i / (3 * len(FactorList)))) alpha_signal = list(set(dv.fields) & set(FactorList)) dv.add_field('sw1') sw1 = dv.get_ts('sw1') dict_classify = { '480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融',
jaqs_fxdayu.patch_all() from jaqs_fxdayu.data import DataView from jaqs_fxdayu.data import RemoteDataService import os import numpy as np import warnings warnings.filterwarnings("ignore") dv = DataView() dataview_folder = 'G:/data/hs300' dv.load_dataview(dataview_folder) #2_因子绩效 例子以roe_pb为因子 factor = dv.get_ts('roe_pb') #之前自定义做的roe/pb的因子(需要先运行前面第二节roe_pb) print(factor.tail()) #读取数据: mask = dv.get_ts('mask_index_member') #是否在指数成分里 can_enter = dv.get_ts('can_enter') #能否买入 can_exit = dv.get_ts('can_exit') #能否卖出 price = dv.get_ts('close_adj') #价格 group = dv.get_ts('group') #分类信息 print(can_enter.shape) print(group.shape) #定义函数: import matplotlib.pyplot as plt from jaqs.research import SignalDigger #jaqs.research自带的挖掘器SignalDigger
dv = DataView() ds = LocalDataService(fp=dataview_folder) dv_props = {'start_date': start, 'end_date': end, 'symbol':','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True} dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() dv.add_field('sw1') sw1 = dv.get_ts('sw1') dict_classify = {'480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融', '370000': '医药生物', '710000': '计算机', '280000': '汽车', '340000': '食品饮料', '220000': '化工', '210000': '采掘', '230000': '钢铁', '650000': '国防军工', '110000': '农林牧渔', '420000': '交通运输', '620000': '建筑装饰', '350000': '纺织服装', '610000': '建筑材料', '360000': '轻工制造'} sw1_name = sw1.replace(dict_classify) sw1_name.tail() dv.add_field('close',ds) dv.add_field('high',ds) dv.add_field('low',ds) dv.add_field('turnover',ds) dv.add_field('turnover_ratio',ds) dv.add_field('price_div_dps',ds) dv.add_field('oper_rev',ds) dv.add_field('roa',ds) dv.add_field('total_share',ds) dv.add_field('pe_ttm',ds)
from jaqs_fxdayu.data import DataView import warnings warnings.filterwarnings("ignore") dataview_folder = '../Factor' dv = DataView() dv.load_dataview(dataview_folder) from jaqs_fxdayu.research.signaldigger import process Open = dv.get_ts("open_adj") High = dv.get_ts("high_adj") Low = dv.get_ts("low_adj") Close = dv.get_ts("close_adj") trade_status = dv.get_ts('trade_status') mask_sus = trade_status == 0 # 剔除掉停牌期的数据 再计算指标 open_masked = process._mask_df(Open, mask=mask_sus) high_masked = process._mask_df(High, mask=mask_sus) low_masked = process._mask_df(Low, mask=mask_sus) close_masked = process._mask_df(Close, mask=mask_sus) from jaqs_fxdayu.data import signal_function_mod as sfm cci = sfm.ta(ta_method='CCI', ta_column=0, Open=open_masked, High=high_masked, Low=low_masked, Close=close_masked, Volume=None) dv.append_df(cci, 'CCI')
'freq': 1, 'timeout': 180 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载 save_dataview() # 加载数据 dv = DataView() dv.load_dataview(dataview_folder) factor = dv.get_ts("ps") factor.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), factor.index)) #索引调整为datetime日期格式 factor = factor.stack() #处理成MultiIndex格式(alphalens分析因子必须的格式) print(factor.head()) def change_index(df): df.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index)) #索引调整为datetime日期格式 return df
'timeout': 180 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载 save_dataview() # 加载数据 dv = DataView() dv.load_dataview(dataview_folder) print(dv.get_ts("pe").head()) import numpy as np #定义信号过滤条件-非指数成分 def mask_index_member(): df_index_member = dv.get_ts('index_member') mask_index_member = df_index_member == 0 return mask_index_member # 定义可买卖条件——未停牌、未涨跌停 def limit_up_down(): trade_status = dv.get_ts('trade_status') mask_sus = trade_status == 0
import numpy as np import warnings warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' #档案地址 dv = DataView() dv.load_dataview(dataview_folder) #加载档案地址,结果出现Dataview loaded successfully则成功 print(dv.fields) #查看dv中取得的数据 #2_过滤停牌涨跌停(可买可卖) mask_index_member为要过滤的为True,can_enter与can_exit皆为可交易为True from jaqs_fxdayu.util import dp from jaqs.data.dataapi import DataApi A = dv.get_ts('index_member') #得到一张表,1表示在指数成分里,0表示不在指数成分里 B = dv.get_ts('trade_status') #得到一张表,从中可以得出股票能否交易(或停牌) def mask_index(): df_index_member = dv.get_ts('index_member') mask_index_member = df_index_member == 0 #定义信号过滤条件-非指数成分,若df_index_member==0则mask_index_member=true return mask_index_member def limit_up_down(): # 定义可买卖条件——未停牌、未涨跌停 trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' # 涨停: dv.remove_field('up_limit')
:param high: dataFrame (N) 最高价 用于计算上行收益空间 :param low: dataFrame (N) 最低价 用于计算下行收益空间 :param benchmark_price: dataFrame (N) 基准价格 若不为空收益计算模式为相对benchmark的收益 :param period: int (5) 选股持有期 :param n_quantiles: int (5) :param mask: 过滤条件 dataFrame (N) :param can_enter: dataFrame (N) 是否能进场 :param can_exit: dataFrame (N) 是否能出场 :param forward: bool(True) 是否forward return :param commission: float(0.0008) 手续费率 :param is_event: bool(False) 是否是事件(0/1因子) :param is_quarterly: bool(False) 是否是季度因子 ''' #step1:因子参数优化 price = dv.get_ts('close_adj') high = dv.get_ts('high_adj') low = dv.get_ts('low_adj') price_bench = dv.data_benchmark #指数收盘价 optimizer = Optimizer(dataview=dv, formula='- Correlation(vwap_adj, volume, LEN)', #formula:这里是需要优化的因子, #这里的定义是成交量加权的平均价格与成交量的相关关系的负值;LEN是参数,需要优化,表示时间长度, params={"LEN":range(2,15,1)}, #设置时间长度,这里认为这些时间长度都有可能,我们就是要找到哪些参数是优秀的 name='divert', #因子的名称 price=price, high=high, low=low, benchmark_price=None, #=None求绝对收益 #=price_bench求相对收益 period=30, n_quantiles=5, mask=mask,
dv.init_from_config(zz800_props, ds) dv.load_dataview(dataview_folder) import talib as ta import numpy as np import pandas as pd from datetime import datetime def change_time(df): df.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index)) return df EMA = dv.get_ts('close').ewm(span=5, adjust=False).mean() EMA2 = EMA.ewm(span=5, adjust=False).mean() EMA3 = EMA2.ewm(span=5, adjust=False).mean() dv.append_df(EMA3, 'EMA3') dv.add_formula('a', 'close/Delay(close,1)-1', is_quarterly=False, add_data=True) dv.add_formula('b', '(close/Delay(EMA3,19))^(1/20)-1', is_quarterly=False, add_data=True) Factor1 = dv.add_formula( 'Factor1', '-Log((Ts_Sum(If(a>b,1,0),20)-1)*Ts_Sum(If(a<b,(a-b)^2,0),20)/(Ts_Sum(If(a<b,1,0),20))*Ts_Sum(If(a>b,(a-b)^2,0),20))', is_quarterly=False,
from jaqs_fxdayu.data import DataView import warnings warnings.filterwarnings("ignore") dataview_folder = '../Factor' dv = DataView() dv.load_dataview(dataview_folder) dv.add_formula("Divert", "Corr(volume,close_adj,20)", is_quarterly=False).head() # 添加到数据集dv里,则计算结果之后可以反复调用 dv.add_formula("Divert", "Corr(volume,close_adj,20)", is_quarterly=False, add_data=True) dv.get_ts("Divert").head()
print(group2_code.tail() ) #value="industry1_code"表示以industry2_name为分类标准,不过返回的code类型(类别代号) group3 = dp.daily_sec_industry(api, symbols, start, end, source='zz', value="industry1_name") print(group3.tail()) #source='zz'表示以中证为分类标准,只是分类标准不同而已 group3_code = dp.daily_sec_industry(api, symbols, start, end, source='zz', value="industry1_code") print(group3_code.tail() ) #source='zz'表示以中证为分类标准,只是分类标准不同而已,industry1_code返回的是类别代号(code类型) #3_添加数据保存 dv = DataView() dataview_folder = 'G:/data/hs300' #档案地址 dv.load_dataview(dataview_folder) #加载档案地址 dv.append_df(group, 'group') #将group列加入dv中,后面的'group'为列名 dv.save_dataview('G:/data/hs300') #保存 #获取数据 print(dv.get_ts('group').tail()) A = dv.get_ts('group') #这样看的更清楚点
'fields': "volume,pb,pe,ps,float_mv,sw1", 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() dv.save_dataview(dataview_folder) # 保存数据文件到指定路径,方便下次直接加载 save_dataview() # 加载数据 dv = DataView() dv.load_dataview(dataview_folder) #加载数据 print(dv.fields) #查看dv中取得的数据 print(dv.get_ts("pb").head()) #查看dv中取得的市净率 import numpy as np #定义信号过滤条件-非指数成分 def mask_index_member(): df_index_member = dv.get_ts( 'index_member') #A=dv.get_ts('index_member'),1表示在沪深300中,0表示不在沪深300中 mask_index_member = df_index_member == 0 #过滤 return mask_index_member # 定义可买卖条件——未停牌、未涨跌停 def limit_up_down(): trade_status = dv.get_ts('trade_status')