def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) mask_limit_reached = dv.get_ts('mask_limit_reached') mask_index_member = dv.get_ts('mask_index_member') mask_sus = dv.get_ts('mask_sus') mask_all = np.logical_or( mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data price = dv.get_ts('close_adj') price_bench = dv.data_benchmark dv.add_formula('in_', '(Delay(index_weight, 1) == 0) && (index_weight > 0)', is_quarterly=False) signal = dv.get_ts('in_').shift(1, axis=0) # avoid look-ahead bias # Step.4 analyze! obj = SignalDigger(output_folder='../../output', output_format='pdf') obj.create_binary_event_report(signal, price, mask_all, price_bench, periods=[20, 60, 121, 242], group_by=None)
def test_optimizer(): from jaqs_fxdayu.research import Optimizer dv = DataView() dv.load_dataview(dataview_folder) mask = mask_index_member(dv) can_enter, can_exit = limit_up_down(dv) price = dv.get_ts('close_adj') high = dv.get_ts('high_adj') low = dv.get_ts('low_adj') price_bench = dv.data_benchmark optimizer = Optimizer(dataview=dv, formula='- Correlation(vwap_adj, volume, LEN)', params={"LEN": range(2, 4, 1)}, name='divert', price=price, high=high, low=low, benchmark_price=price_bench, # =None求绝对收益 #=price_bench求相对收益 period=30, n_quantiles=5, mask=mask, can_enter=can_enter, can_exit=can_exit, commission=0.0008, # 手续费 默认0.0008 is_event=False, # 是否是事件(0/1因子) is_quarterly=False) # 是否是季度因子 默认为False ret_best = optimizer.enumerate_optimizer(target_type="top_quantile_ret", # 优化目标类型 target="Ann. IR", # 优化目标 in_sample_range=[20140101, 20160101], # 样本内范围 默认为None,在全样本上优化 ascending=False) # 是否按优化目标升序排列(从小到大)
def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) mask_limit_reached = dv.get_ts('mask_limit_reached') mask_index_member = dv.get_ts('mask_index_member') mask_sus = dv.get_ts('mask_sus') mask_all = np.logical_or(mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data price = dv.get_ts('close_adj') price_bench = dv.data_benchmark dv.add_formula('in_', '(Delay(index_weight, 1) == 0) && (index_weight > 0)', is_quarterly=False) signal = dv.get_ts('in_').shift(1, axis=0) # avoid look-ahead bias # Step.4 analyze! obj = SignalDigger(output_folder='../../output', output_format='pdf') obj.create_binary_event_report(signal, price, mask_all, price_bench, periods=[20, 60, 121, 242], group_by=None)
def load_data(symbol): dv = DataView() dv.load_dataview(folder_path=dataview_store_folder) df = pd.DataFrame() df['close'] = dv.get_ts('close', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] df['open'] = dv.get_ts('open', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] df['high'] = dv.get_ts('high', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] df['low'] = dv.get_ts('low', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] df = df.dropna() return df
def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data target_symbol = '600519.SH' price = dv.get_ts('close_adj', symbol=target_symbol) dv.add_formula('in_', 'open_adj / Delay(close_adj, 1)', is_quarterly=False) signal = dv.get_ts('in_', symbol=target_symbol).shift(1, axis=0) # avoid look-ahead bias # Step.4 analyze! obj = SignalDigger(output_folder='../../output', output_format='pdf') obj.create_single_signal_report(signal, price, [1, 5, 9, 21], 6, mask=None, buy_condition={'cond1': {'column': 'quantile', 'filter': lambda x: x > 3, 'hold': 5}, 'cond2': {'column': 'quantile', 'filter': lambda x: x > 5, 'hold': 5}, 'cond3': {'column': 'quantile', 'filter': lambda x: x > 5, 'hold': 9}, })
def analyze_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌'.encode('utf-8') df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) df_limit_reached = dv.get_ts('limit_reached') mask_limit_reached = df_limit_reached > 0 mask_all = np.logical_or( mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data # dv.add_formula('illi_daily', '(high - low) * 1000000000 / turnover', is_quarterly=False) # dv.add_formula('illi', 'Ewma(illi_daily, 11)', is_quarterly=False) # dv.add_formula('size', 'Log(float_mv)', is_quarterly=False) # dv.add_formula('value', '-1.0/pb', is_quarterly=False) # dv.add_formula('liquidity', 'Ts_Mean(volume, 22) / float_mv', is_quarterly=False) dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False) signal = dv.get_ts('divert').shift(1, axis=0) # avoid look-ahead bias price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! my_period = 5 obj = SignalDigger(output_folder='../../output/test_signal', output_format='pdf') obj.process_signal_before_analysis( signal, price=price, mask=mask_all, n_quantiles=5, period=my_period, benchmark_price=price_bench, ) res = obj.create_full_report()
def test_q_add_formula(): dv = DataView() folder_path = '../output/prepared/20160609_20170601_freq=1D' dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'total_oper_rev / close' dv.add_formula('myvar1', formula, is_quarterly=False) df1 = dv.get_ts('myvar1') assert not df1.empty formula2 = 'Delta(oper_exp * myvar1 - open, 3)' dv.add_formula('myvar2', formula2, is_quarterly=False) df2 = dv.get_ts('myvar2') assert not df2.empty
def test_q_add_formula(): dv = DataView() folder_path = '../output/prepared/20160609_20170601_freq=1D' dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'total_oper_rev / close' dv.add_formula('myvar1', formula, is_quarterly=False) df1 = dv.get_ts('myvar1') assert not df1.empty formula2 = 'Delta(oper_exp * myvar1 - open, 3)' dv.add_formula('myvar2', formula2, is_quarterly=False) df2 = dv.get_ts('myvar2') assert not df2.empty
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() trade_status = dv.get_ts('trade_status') mask_sus = trade_status == '停牌' dv.append_df(mask_sus, 'suspended', is_quarterly=False) dv.add_formula('not_index_member', '!index_member', is_quarterly=False) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20160101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('mask_limit_reached', 'limit_reached > 0', is_quarterly=False) dv.add_formula('mask_index_member', '!(index_member > 0)', is_quarterly=False) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' dv.append_df(mask_sus, 'mask_sus', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20160101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('mask_limit_reached', 'limit_reached > 0', is_quarterly=False) dv.add_formula('mask_index_member', '!(index_member > 0)', is_quarterly=False) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' dv.append_df(mask_sus, 'mask_sus', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌'.encode('utf-8') df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) df_limit_reached = dv.get_ts('limit_reached') mask_limit_reached = df_limit_reached > 0 mask_all = np.logical_or( mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data dv.add_formula('new_high', 'close_adj >= Ts_Max(close_adj, 300)', is_quarterly=False) dv.add_formula('new_high_delay', 'Delay(Ts_Max(new_high, 300), 1)', is_quarterly=False) dv.add_formula('sig', 'new_high && (! new_high_delay)', is_quarterly=False) signal = dv.get_ts('sig').shift(0, axis=0) # avoid look-ahead bias price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! obj = SignalDigger(output_folder=jutil.join_relative_path('../output'), output_format='pdf') obj.create_binary_event_report(signal, price, mask_all, 5, price_bench, periods=[5, 20, 40])
def test_analyze_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) mask = mask_index_member(dv) can_enter, can_exit = limit_up_down(dv) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False, add_data=True) signal = dv.get_ts('divert') price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! my_period = 5 obj = SignalDigger(output_folder='../output/test_signal', output_format='pdf') obj.process_signal_before_analysis( signal=signal, price=price, high=dv.get_ts("high_adj"), # 可为空 low=dv.get_ts("low_adj"), # 可为空 group=dv.get_ts("sw1"), n_quantiles=5, # quantile分类数 mask=mask, # 过滤条件 can_enter=can_enter, # 是否能进场 can_exit=can_exit, # 是否能出场 period=my_period, # 持有期 benchmark_price=price_bench, # 基准价格 可不传入,持有期收益(return)计算为绝对收益 commission=0.0008, ) signal_data = obj.signal_data result = analysis(signal_data, is_event=False, period=my_period) ic = pfm.calc_signal_ic(signal_data, by_group=True) mean_ic_by_group = pfm.mean_information_coefficient(ic, by_group=True) plotting.plot_ic_by_group(mean_ic_by_group) res = obj.create_full_report()
def test_DIY_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # 方法1:add_formula 基于dataview里已有的字段,通过表达式定义因子 dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) # 方法2: append_df 构造一个因子表格(pandas.Dataframe),直接添加到dataview当中 import pandas as pd import talib as ta close = dv.get_ts("close_adj").dropna(how='all', axis=1) slope_df = pd.DataFrame( { sec_symbol: -ta.LINEARREG_SLOPE(value.values, 10) for sec_symbol, value in close.iteritems() }, index=close.index) dv.append_df(slope_df, 'slope') dv.get_ts("slope") # 定义事件 from jaqs_fxdayu.research.signaldigger import process Open = dv.get_ts("open_adj") High = dv.get_ts("high_adj") Low = dv.get_ts("low_adj") Close = dv.get_ts("close_adj") trade_status = dv.get_ts('trade_status') mask_sus = trade_status != 1 # 剔除掉停牌期的数据 再计算指标 open_masked = process._mask_df(Open, mask=mask_sus) high_masked = process._mask_df(High, mask=mask_sus) low_masked = process._mask_df(Low, mask=mask_sus) close_masked = process._mask_df(Close, mask=mask_sus) from jaqs_fxdayu.data import signal_function_mod as sfm MA5 = sfm.ta(ta_method='MA', ta_column=0, Open=open_masked, High=high_masked, Low=low_masked, Close=close_masked, Volume=None, timeperiod=10) MA10 = sfm.ta('MA', Close=close_masked, timeperiod=10) dv.append_df(MA5, 'MA5') dv.append_df(MA10, 'MA10') dv.add_formula("Cross", "(MA5>=MA10)&&(Delay(MA5<MA10, 1))", is_quarterly=False, add_data=True)
def test_analyze_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) df_limit_reached = dv.get_ts('limit_reached') mask_limit_reached = df_limit_reached > 0 mask_all = np.logical_or(mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False) signal = dv.get_ts('divert').shift(1, axis=0) # avoid look-ahead bias price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! my_period = 5 obj = SignalDigger(output_folder='../output/test_signal', output_format='pdf') obj.process_signal_before_analysis(signal, price=price, mask=mask_all, n_quantiles=5, period=my_period, benchmark_price=price_bench, ) res = obj.create_full_report()
def test_load(): dv = DataView() dv.load_dataview(folder_path=daily_path) assert dv.start_date == 20160601 and set(dv.symbol) == set('000001.SZ,600030.SH,000063.SZ'.split(',')) # test get_snapshot snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') assert snap1.shape == (2, 2) assert set(snap1.columns.values) == {'close', 'pb'} assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} # test get_ts ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) assert ts1.shape == (38, 2) assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} assert ts1.index.values[-1] == 20170302
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() trade_status = dv.get_ts('trade_status') mask_sus = trade_status == '停牌' dv.append_df(mask_sus, 'suspended', is_quarterly=False) dv.add_formula('not_index_member', '!index_member', is_quarterly=False) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.save_dataview(dataview_folder)
def test_load(): dv = DataView() dv.load_dataview(folder_path=daily_path) assert dv.start_date == 20160601 and set(dv.symbol) == set( '000001.SZ,600030.SH,000063.SZ'.split(',')) # test get_snapshot snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb') assert snap1.shape == (2, 2) assert set(snap1.columns.values) == {'close', 'pb'} assert set(snap1.index.values) == {'600030.SH', '000063.SZ'} # test get_ts ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302) assert ts1.shape == (38, 2) assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'} assert ts1.index.values[-1] == 20170302
check_factor = ','.join(factor_list) dataview_folder = '/Users/adam/Desktop/intern/test5/fxdayu_adam/data' dataview_folder2 = 'muti_factor/' dv = DataView() #ds = LocalDataService(fp=dataview_folder) data_config = { "remote.data.address": "tcp://data.tushare.org:8910", "remote.data.username": "******", "remote.data.password": "******" } ds = RemoteDataService() ds.init_from_config(data_config) sw1 = dv.get_ts('sw1') dict_classify = { '480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融',
warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' dv = DataView() dv.load_dataview(dataview_folder) #1.计算并作图 #step1 Momentum: ROCR100的计算并作图 def change_index(df): df.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index)) return df data = change_index(dv.get_ts('close_adj').loc[20170105:]) #!!!注意下这个表达,通过字典生成式读取数据 最后一行是典型的字典读取数据并转换为dataframe格式 symbol = ['000001.SZ', '600036.SH', '600050.SH', '000008.SZ', '000009.SZ'] price_dict = {name: data[name] for name in symbol} #name成为字典的key,data[name]成为key对应的元素 data_mom = pd.DataFrame( {item: ta.ROCR100(value.values, 20) for item, value in price_dict.items()}, index=data.index).dropna(axis=0) #dropna()表示删除为空的行,ta.ROCR100(value.values,20)表示计算动量,原式可为:A=ta.ROCR100(price_dict['000001.SZ'],20) (ta.ROCR100在talib中比较特殊) fig = plt.figure(figsize=(15, 7)) #图片大小 plt.plot(data_mom) plt.hlines(100, data_mom.index[0],
from jaqs.data import RemoteDataService import os import numpy as np import warnings warnings.filterwarnings("ignore") dv = DataView() dataview_folder = 'G:/data/hs300_2' dv.load_dataview(dataview_folder) #For Example import talib as ta from datetime import datetime import talib.abstract as abstract data = dv.get_ts('close_adj') #data2 = dv.get_ts('close') print (data.tail()) print (data['600036.SH'].values) #X1=data['600036.SH'] print (type(data['600036.SH'].values)) #X2=data['600036.SH'].values #读取'numpy.ndarray' A=ta.MA(data['600036.SH'].values, 2) #直接读取DataFrame,默认读取cloumns名为close的数据。 B=ta.abstract.MA(data, 2, price='600036.SH').tail() ''' data['SMA'] = ta.abstract.MA(data, 20, price='600036.SH') #普通均线与ta.abstract.MA一样 #data['SMA2'] = ta.abstract.SMA(data, 20, price='600036.SH') #与上面完全一样,普通均线 data['WMA'] = ta.abstract.WMA(data, 20, price='600036.SH') #权重均线(突出中间,用于周期分析) data['TRIMA'] = ta.abstract.TRIMA(data, 20, price='600036.SH') #指数移动平均线 data['EMA'] = ta.abstract.EMA(data, 20, price='600036.SH') #指数移动平均线
} ds = RemoteDataService() ds.init_from_config(data_config) dv_props = { 'start_date': start, 'end_date': end, 'symbol': ','.join(stock_symbol), 'fields': ','.join(factor_list), 'freq': 1, "prepare_fields": True, "benchmark": '000300.SH' } dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() sw1 = dv.get_ts('sw1') dict_classify = { '480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融',
dv_props = { 'start_date': start, 'end_date': end, 'symbol': ','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True } dv.init_from_config(dv_props, data_api=ds) dv.prepare_data() #获取行业情况 dv.add_field('sw1') sw1 = dv.get_ts('sw1') dict_classify = { '480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融',
import talib.abstract as abstract warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' dv = DataView() dv.load_dataview(dataview_folder) def change_index(df): #调整时间索引 df.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index)) return df data = change_index(dv.get_ts( 'close_adj').loc[20170105:]) #A=dv.get_ts('close_adj').loc[20170105:] #example1:均线百分比通道 middleband = ta.abstract.MA(data, timeperiod=20, price='600036.SH') #求均线 upperband = middleband * 1.03 lowerband = middleband * 0.97 data_B = pd.concat([middleband, upperband, lowerband], axis=1) #将三条均线合并在一个dataframe中 data_B.columns = ['middleband', 'upperband', 'lowerband'] plt.figure(figsize=(15, 7)) plt.plot(data['600036.SH']) plt.plot(data_B['middleband'], 'r', alpha=0.3) plt.plot(data_B['upperband'], 'g', alpha=0.3) plt.plot(data_B['lowerband'], 'g', alpha=0.3) plt.show()
from datetime import datetime import matplotlib.pyplot as plt import warnings import talib.abstract as abstract warnings.filterwarnings("ignore") dv = DataView() dataview_folder = 'G:/data/hs300_2' dv.load_dataview(dataview_folder) def change_index(df): df.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , df.index)) return df #读取收盘价 data = change_index(dv.get_ts('close').loc[20170105:]) data['SMA'] = ta.abstract.MA(data, 20, price='600036.SH') #data['SMA2'] = ta.abstract.SMA(data, 20, price='600036.SH') #与上面完全一样 data['WMA'] = ta.abstract.WMA(data, 20, price='600036.SH') data['TRIMA'] = ta.abstract.TRIMA(data, 20, price='600036.SH') data['EMA'] = ta.abstract.EMA(data, 20, price='600036.SH') data['DEMA'] = ta.abstract.DEMA(data, 20, price='600036.SH') data['KAMA'] = ta.abstract.KAMA(data, 20, price='600036.SH') fig = plt.figure(figsize=(15, 7)) plt.plot(data['600036.SH']) plt.plot(data['SMA'], alpha=0.5) #普通均线 plt.plot(data['WMA'], alpha=0.5) #权重均线(突出前段) plt.plot(data['TRIMA'], alpha=0.5) #权重均线(突出中间,用于周期分析)
from datetime import datetime import matplotlib.pyplot as plt import warnings warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' dv = DataView() dv.load_dataview(dataview_folder) def change_index(df): df.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , df.index)) return df #计算并画图 stock = change_index(dv.get_ts('close_adj').loc[20170105:]) hs300 = change_index(dv.data_benchmark.loc[20170105:]) #dv.data_benchmark默认是沪深300指数? Rs = stock['600036.SH']/hs300.close #计算RS Rs = RS.dropna() #删除空行 print (RS.tail()) #Momentum_RS的计算 import talib as ta MOM_Rs = ta.ROCR100(Rs.values, 20) #ta.ROCR100(Mom_Rs,20)表示计算20日动量(增长率)!!! MOM_Mom = ta.ROCR100(MOM_Rs, 20) #求增长率的增长率,相当于加速度!!! data_s = stock['600036.SH'] data1 = pd.Series(MOM_Rs, index=Rs.index) #修改索引 data2 = pd.Series(MOM_Mom, index=Rs.index) data = pd.concat([data_s, Rs, data1, data2], axis=1) data.columns = ['close', 'RS', 'MOM_RS', 'MOM_MOM'] #得到最终的结果:分别表示收盘价,占有率,增长率,加速度 print (data.tail())
def save_data_to_csv(): dv = DataView() dv.load_dataview(folder_path=dataview_store_folder) # df = pd.DataFrame() # df['close'] = dv.get_ts('close', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] # df['open'] = dv.get_ts('open', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] # df['high'] = dv.get_ts('high', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] # df['low'] = dv.get_ts('low', symbol=symbol, start_date=20080101, end_date=20171231)[symbol] # df = df.dropna() # snap1 = dv.get_snapshot(20080424, symbol='600030.SH', fields='open,close,high,low,volume') # ts1 = dv.get_ts('open,close,high,low,close_adj,future_return_2,future_return_3,future_return_4,future_return_5', symbol='600030.SH', start_date=20080101, end_date=20080302) sh_000905 = get_index_info() for symbol in sh_000905['symbol']: # for symbol in ['600030.SH', '600104.SH']: print(symbol) ts_symbol = dv.get_ts( 'open,close,high,low,volume,future_return_2,future_return_3,future_return_4,future_return_5', symbol=symbol, start_date=start_date, end_date=end_date)[symbol] ts_symbol.fillna(0, inplace=True) ts_symbol = ts_symbol[(ts_symbol[['volume']] != 0).all(axis=1)] ts_symbol['date'] = ts_symbol.index ts_symbol['date'] = pd.to_datetime(ts_symbol['date'], format='%Y%m%d') ts_symbol = ts_symbol.reset_index(drop=True) _kdj = trendline.kdj(ts_symbol) _macd = trendline.macd(ts_symbol) _rsi = trendline.rsi(ts_symbol) _vrsi = trendline.vrsi(ts_symbol) _boll = trendline.boll(ts_symbol) _bbiboll = trendline.bbiboll(ts_symbol) _wr = trendline.wr(ts_symbol) _bias = trendline.bias(ts_symbol) _asi = trendline.asi(ts_symbol) _vr_rate = trendline.vr_rate(ts_symbol) _vr = trendline.vr(ts_symbol) _arbr = trendline.arbr(ts_symbol) _dpo = trendline.dpo(ts_symbol) _trix = trendline.trix(ts_symbol) _bbi = trendline.bbi(ts_symbol) _mtm = trendline.mtm(ts_symbol) _obv = trendline.obv(ts_symbol) _cci = trendline.cci(ts_symbol) _priceosc = trendline.priceosc(ts_symbol) _dbcd = trendline.dbcd(ts_symbol) _roc = trendline.roc(ts_symbol) _vroc = trendline.vroc(ts_symbol) _cr = trendline.cr(ts_symbol) _psy = trendline.psy(ts_symbol) _wad = trendline.wad(ts_symbol) _mfi = trendline.mfi(ts_symbol) _vosc = trendline.vosc(ts_symbol) # _jdqs = trendline.jdqs(ts_symbol) # _jdrs = trendline.jdrs(ts_symbol) ts_symbol = trendline.join_frame(ts_symbol, _kdj) ts_symbol = trendline.join_frame(ts_symbol, _macd) ts_symbol = trendline.join_frame(ts_symbol, _rsi) ts_symbol = trendline.join_frame(ts_symbol, _vrsi) ts_symbol = trendline.join_frame(ts_symbol, _boll) ts_symbol = trendline.join_frame(ts_symbol, _bbiboll) ts_symbol = trendline.join_frame(ts_symbol, _wr) ts_symbol = trendline.join_frame(ts_symbol, _bias) ts_symbol = trendline.join_frame(ts_symbol, _asi) ts_symbol = trendline.join_frame(ts_symbol, _vr_rate) ts_symbol = trendline.join_frame(ts_symbol, _vr) ts_symbol = trendline.join_frame(ts_symbol, _arbr) ts_symbol = trendline.join_frame(ts_symbol, _dpo) ts_symbol = trendline.join_frame(ts_symbol, _trix) ts_symbol = trendline.join_frame(ts_symbol, _bbi) ts_symbol = trendline.join_frame(ts_symbol, _mtm) ts_symbol = trendline.join_frame(ts_symbol, _obv) ts_symbol = trendline.join_frame(ts_symbol, _cci) ts_symbol = trendline.join_frame(ts_symbol, _priceosc) ts_symbol = trendline.join_frame(ts_symbol, _dbcd) ts_symbol = trendline.join_frame(ts_symbol, _roc) ts_symbol = trendline.join_frame(ts_symbol, _vroc) ts_symbol = trendline.join_frame(ts_symbol, _cr) ts_symbol = trendline.join_frame(ts_symbol, _psy) ts_symbol = trendline.join_frame(ts_symbol, _wad) ts_symbol = trendline.join_frame(ts_symbol, _mfi) ts_symbol = trendline.join_frame(ts_symbol, _vosc) # ts_symbol = trendline.join_frame(ts_symbol, _jdqs) # ts_symbol = trendline.join_frame(ts_symbol, _jdrs) save_csv(symbol, ts_symbol)
print(dv.fields) #查看dv中取得的数据 #1.数据预处理 def change_columns_index(signal): new_names = {} for c in signal.columns: #signal的每一列 if c.endswith('SZ'): new_names[c] = c.replace('SZ', 'XSHE') #将'SZ'转换为'XSHE'以满足需要的格式 elif c.endswith('SH'): new_names[c] = c.replace('SH', 'XSHG') #将'SH'转换为'XSHE' signal = signal.rename_axis(new_names, axis=1) signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index)) signal.index = pd.Index(map(lambda x: x+timedelta(hours=15), signal.index)) return signal mask = dv.get_ts('mask_fundamental') #是否需要过滤该支股票:这里false表示不用过滤该股票,true表示需要过滤掉该股票??? group = change_columns_index(dv.get_ts('group')) ROE_Data = dv.get_ts('roe') ROE_Data = change_columns_index(dv.get_ts('roe').shift(1, axis=0)[mask==0]) #shift(1, axis=0)会自动提前1天,避免未来函数 prices = change_columns_index(dv.get_ts('close_adj')) def get_largest(df, n=20): #这个方法把ROE最大的20只股票赋值为1,每天都取ROE最大的20支股票 !!??? largest_list = [] for time_index, value in df.iterrows(): largest_list.append(dict.fromkeys(value.nlargest(n).index,1)) largest_df = pd.DataFrame(largest_list, index = df.index) return largest_df stock_df = get_largest(ROE_Data).dropna(how='all', axis=1) stock_df.to_excel('roe_backtest.xlsx') #保存至当前目录下
def change_index(df): df.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index)) return df def formula(positive, negative, total): #市场宽度的计算 return (positive - negative) / total def sumrows(frame): return frame.sum(axis=1) #每一行加总 #2.相关计算 下面有panel的直接构建方法,值得反复学习!!! mask = dv.get_ts('index_member') #打印哪些在指数中 A = dv.get_ts('close_adj').loc[20150105:][mask == 1] #观察变化 B = dv.get_ts('close_adj').loc[20150105:][mask == 1].dropna( how='all', axis=1) #观察变化,这里注意下若'high_adj'=0则表示停牌 PN = pd.Panel({ 'high': change_index( dv.get_ts('high_adj').loc[20150105:][mask == 1].dropna(how='all', axis=1)), 'low': change_index( dv.get_ts('low_adj').loc[20150105:][mask == 1].dropna(how='all', axis=1)), 'close': change_index( dv.get_ts('close_adj').loc[20150105:][mask == 1].dropna(how='all',
warnings.filterwarnings("ignore") dataview_folder = 'G:/data/hs300_2' dv = DataView() dv.load_dataview(dataview_folder) def change_index(df): df.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index)) return df #2.计算与作图 #step1:ADV平均成交量的计算与作图 close = change_index(dv.get_ts('close_adj').loc[20170105:]) #读取后复权价格 volume = change_index(dv.get_ts('volume').loc[20170105:]) #读取成交量 adv10 = ta.abstract.MA(volume, 10, price='600036.SH') #观察下这种计算方式! adv20 = ta.abstract.MA(volume, 20, price='600036.SH') fig, (ax, ax1) = plt.subplots(2, 1, sharex=True, figsize=(15, 7)) ax.plot(close['600036.SH'], label='600036') ax.legend(loc='upper left') #loc='upper left'表示位置,同理也可loc='upper right' ax1.bar(volume.index, volume['600036.SH'], color='g') ax1.plot(adv10, label='Volume_MA10') ax1.plot(adv20, label='Volume_MA20') plt.legend(loc='upper left') plt.show() #step2:OBV & A/D的计算与作图
import warnings warnings.filterwarnings("ignore") dv = DataView() dataview_folder = 'G:/data/hs300' #文件地址 dv.load_dataview(dataview_folder) #读取dataview_folder #2_读取索引为股票代号的数据 get_snapshot print(dv.get_snapshot(20170504, symbol='600036.SH,600050.SH', fields='')) print( dv.get_snapshot(20170504, symbol='600036.SH,600050.SH', fields='close_adj')) #20170504表示时间,symbol='600036.SH,600050.SH'表示股票(可以添加),fields=''表示因子,若不输则会返回全部的因子 #3_读取时间序列数据 get_ts data1 = dv.get_ts('pb') #返回的是一个DataFrame格式的数据(包含沪深300全部),pb表示平均市净率 print(dv.get_ts('pb').head()) #4_添加自定义算法数据 add_formul roe_pb = dv.add_formula('roe_pb', 'roe/pb', is_quarterly=False, add_data=True) #'roe_pb'表示算法的新名称,'roe/pb'为公式,is_quarterly=False代表是否为季度数据 print(dv.get_ts('roe_pb').head()) #这里用get_ts的方法输入新的名称即可 #5_从数据服务添加新数据至本地 #先设置Config data_config = { "remote.data.address": "tcp://data.tushare.org:8910", #地址统一,暂不做修改 "remote.data.username": "******", #quantos账号(手机号码) #quantos账号的API令牌号码
'symbol': ','.join(stock_symbol), 'fields': check_factor, 'freq': 1, "prepare_fields": True } dv.init_from_config(dv_props, data_api=ds) # In[10]: dv.prepare_data() # In[11]: dv.add_field('sw1') sw1 = dv.get_ts('sw1') dict_classify = { '480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融',
dataview_folder = 'G:/data/hs300' dv = DataView() dv.load_dataview(dataview_folder) #2_修改索引与列名 from datetime import timedelta def change_columns_index(signal): # 改名称 new_names = {} for c in signal.columns: if c.endswith('SZ'): #若是上证股票 new_names[c] = c.replace('SZ', 'XSHE') #则将SZ改为XSHE elif c.endswith('SH'): #若是深证股票 new_names[c] = c.replace('SH', 'XSHG') #则将SH改为XSHG signal = signal.rename_axis(new_names, axis=1) # 改时间索引: # 改时间索引: 将整数类型的日期转成datetime的格式的日期datetime.strptime : signal.index = pd.Index( map(lambda x: datetime.strptime(str(x), "%Y%m%d"), signal.index)) # 然后加15个小时: (x+timedelta(hours=15)) : signal.index = pd.Index( map(lambda x: x + timedelta(hours=15), signal.index)) return signal factor = change_columns_index(dv.get_ts('roe_pb_Q5')) #对roe_pb_Q5进行修改 print(factor.tail())
def test_multi_factor(): from jaqs_fxdayu.research.signaldigger import multi_factor, process dv = DataView() dv.load_dataview(dataview_folder) dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) mask = mask_index_member(dv) can_enter, can_exit = limit_up_down(dv) ic = dict() factors_dict = { signal: dv.get_ts(signal) for signal in ["pb", "pe", "ps", "momentum"] } for period in [5, 15]: ic[period] = multi_factor.get_factors_ic_df( factors_dict, price=dv.get_ts("close_adj"), high=dv.get_ts("high_adj"), # 可为空 low=dv.get_ts("low_adj"), # 可为空 n_quantiles=5, # quantile分类数 mask=mask, # 过滤条件 can_enter=can_enter, # 是否能进场 can_exit=can_exit, # 是否能出场 period=period, # 持有期 benchmark_price=dv. data_benchmark, # 基准价格 可不传入,持有期收益(return)计算为绝对收益 commission=0.0008, ) factor_dict = dict() index_member = dv.get_ts("index_member") for name in ["pb", "pe", "ps", "momentum"]: signal = -1 * dv.get_ts(name) # 调整符号 process.winsorize(factor_df=signal, alpha=0.05, index_member=index_member) # 去极值 signal = process.rank_standardize( signal, index_member) # 因子在截面排序并归一化到0-1(只保留排序信息) signal = process.standardize(signal, index_member) # z-score标准化 保留排序信息和分布信息 # 行业市值中性化 signal = process.neutralize( signal, group=dv.get_ts("sw1"), float_mv=dv.get_ts("float_mv"), index_member=index_member, # 是否只处理时只考虑指数成份股 ) factor_dict[name] = signal # 因子间存在较强同质性时,使用施密特正交化方法对因子做正交化处理,用得到的正交化残差作为因子 new_factors = multi_factor.orthogonalize( factors_dict=factor_dict, standardize_type="rank", # 输入因子标准化方法,有"rank"(排序标准化),"z_score"(z-score标准化)两种("rank"/"z_score") winsorization=False, # 是否对输入因子去极值 index_member=index_member) # 是否只处理指数成分股 # 多因子组合-动态加权参数配置 props = { 'price': dv.get_ts("close_adj"), 'high': dv.get_ts("high_adj"), # 可为空 'low': dv.get_ts("low_adj"), # 可为空 'ret_type': 'return', # 可选参数还有upside_ret/downside_ret 则组合因子将以优化潜在上行、下行空间为目标 'benchmark_price': dv.data_benchmark, # 为空计算的是绝对收益 不为空计算相对收益 'period': 30, # 30天的持有期 'mask': mask, 'can_enter': can_enter, 'can_exit': can_exit, 'forward': True, 'commission': 0.0008, "covariance_type": "shrink", # 协方差矩阵估算方法 还可以为"simple" "rollback_period": 120 } # 滚动窗口天数 comb_factors = dict() for method in [ "equal_weight", "ic_weight", "ir_weight", "max_IR", "max_IC", "factors_ret_weight" ]: comb_factors[method] = multi_factor.combine_factors( factor_dict, standardize_type="rank", winsorization=False, weighted_method=method, props=props)
# 'universe': UNIVERSE, # Investment universe and performance benchmark # 'benchmark': '000300.SH', 'fields': 'open,high,low,close,volume', # Data fields that we need 'freq': 1 # freq = 1 means we use daily data. Please do not change this. } # RemoteDataService communicates with a remote server to fetch data ds = RemoteDataService() # Use username and password in data_config to login ds.init_from_config(data_config) # DataView utilizes RemoteDataService to get various data and store them dv = DataView() dv.init_from_config(dataview_props, ds) dv.prepare_data() print(dv.prepare_data()) dv.save_dataview(folder_path=dataview_store_folder) # save_data() dv = DataView() dv.load_dataview('/dataview') df = dv.get_ts('open,close,high,low', symbol='000300.SH', start_date=20170101, end_date=20170302) df.columns = ['open', 'close', 'high', 'low'] print(df)
# 跌停 down_limit = dv.add_formula( 'down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False) can_enter = np.logical_and(up_limit < 1, ~mask_sus) # 未涨停未停牌 can_exit = np.logical_and(down_limit < 1, ~mask_sus) # 未跌停未停牌 return can_enter, can_exit id_member = pd.concat([id_zz500[columns_500], id_hs300], axis=1) mask = ~id_member can_enter, can_exit = limit_up_down() alpha_signal = factor_lis price = dv.get_ts('close_adj') sw1 = dv.get_ts('sw1') dict_classify = { '480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子',