def get_live(sec_id, block_size=400): """ :param sec_id: list, wind股票代码,如果是全市场,可输入'fulla'或者'ashare' :param block_size: 内部调用wsq接口一次提取的数量,默认400支 :return: pd.DataFrame, index=sec id, header = [rt_open,rt_high,rt_low,rt_last,rt_vol,rt_amt,rt_vol_ratio,rt_pct_chg_5min] """ factor = FactorLoader(start_date=None, end_date=None, factor_name='LIVE', sec_id=sec_id, block_size=block_size) ret = factor.load_data() return ret
def factor_load(start_date, end_date, factor_name, save_file=None, **kwargs): """ :param start_date: str, 读取因子数据的开始日期 :param end_date: str, 读取因子数据的结束日期 :param factor_name: str, 因子名称,不区分大小写 :param save_file: str, optional, 保存数据的文件名,可写成 '*.csv' 或者 '*.pkl' :param kwargs: dict, optional freq: str, optional, 因子数据的频率, 可选'M', 'W', 'Q', 'S', 'Y', 参见enums.py - FreqType tenor: str, optional, 因子数据的周期, 对于截面数据(如换手率,收益率),需要给定数据区间(向前), 可选数字+FreqType, 如'1Q' sec_id, str/list, optional, 股票代码或者是指数代码 output_data_format: enum, optional, 参见enums.py - FreqType MULTI_INDEX_DF: multi-index DataFrame, index=[date, secID], value = factor PITVOT_TABLE_DF: DataFrame, index=date, columns = secID is_index: bool, optional, True: 输入的sec_id是指数,实际需要读取的是该指数成分股的因子数据, False: 直接读取sec_id的因子数据 date_format: str, optional, 日期的格式, 默认'%Y-%m-%d' :return: pd.DataFrame 整理好的因子数据 """ if isinstance(factor_name, list): kwargs = merge(kwargs, {'output_data_format': OutputFormat.MULTI_INDEX_DF}) factor_names = factor_name else: factor_names = [factor_name] ret = pd.DataFrame() for factor_name in factor_names: LOGGER.info('Loading factor data {0}'.format(factor_name)) factor_loader = FactorLoader(start_date=start_date, end_date=end_date, factor_name=factor_name, **kwargs) factor_data = factor_loader.load_data() LOGGER.info('factor data {0} is loaded '.format(factor_name)) ret = pd.concat([ret, factor_data], axis=1) if kwargs.get('reset_col_names'): ret.columns = factor_names if save_file: save_data_to_file(ret, save_file) LOGGER.critical('Data saved in {0}'.format(save_file)) return ret
def test_query_data(self): # query_data(api, sec_id, indicator, extra_params=None, start_date=None, end_date=None) sec_id = ['000001.SZ', '000002.SZ'] start_date = '2017-01-03' end_date = '2017-01-04' # w.wsd && factor_name = 'PB' api = 'w.wsd' indicator = 'pb_lf' raw_data = wind_data_provider.query_data(api, sec_id, indicator, start_date=start_date, end_date=end_date) calculated = WindData(data=raw_data.Data, codes=raw_data.Codes, error_code=raw_data.ErrorCode, fields=raw_data.Fields, times=raw_data.Times) expected = WindData(data=[[0.8822379112243652, 0.8822379112243652], [2.279218912124634, 2.292412519454956]], codes=['000001.SZ', '000002.SZ'], error_code=0, fields=['PB_LF'], times=[date(2017, 1, 3), date(2017, 1, 4)]) self.assertEqual(calculated, expected) # w.wss && factor_name = 'RETURN' api = 'w.wss' factor_name = 'RETURN' indicator = 'pct_chg_per' factor_loader = FactorLoader(start_date, end_date, factor_name, sec_id=sec_id, is_index=False, tenor='3M') main_params, extra_params = wind_query_helper.get_query_params( factor_name) extra_params[Header.TENOR.value] = factor_loader._get_enum_value(factor_loader.tenor) \ if factor_loader.tenor is not None else None merged_extra_params = factor_loader._merge_query_params( extra_params, start_date) raw_data = wind_data_provider.query_data( api, sec_id, indicator, extra_params=merged_extra_params, start_date=start_date, end_date=start_date) calculated = WindData(data=raw_data.Data, codes=raw_data.Codes, error_code=raw_data.ErrorCode, fields=raw_data.Fields, times=len(raw_data.Times)) expected = WindData(data=[[0.43859649122806044, -20.177127454755485]], codes=['000001.SZ', '000002.SZ'], error_code=0, fields=['PCT_CHG_PER'], times=1) self.assertEqual(calculated, expected) # w.wsi && factor_name = 'OHLCV_MIN' api = 'w.wsi' indicator = 'open,high,low,close,volume' start_date = '2017-01-03 09:30:00' end_date = '2017-01-03 09:32:00' factor_name = 'OHLCV_MIN' factor_loader = FactorLoader(start_date, end_date, factor_name, sec_id=sec_id) main_params, extra_params = wind_query_helper.get_query_params( factor_name) merged_extra_params = factor_loader._merge_query_params( extra_params, start_date) raw_data = wind_data_provider.query_data( api, sec_id, indicator, extra_params=merged_extra_params, start_date=start_date, end_date=end_date) calculated = WindData(data=raw_data.Data, codes=raw_data.Codes, error_code=raw_data.ErrorCode, fields=raw_data.Fields, times=raw_data.Times) expected = WindData(data=[[ datetime(2017, 1, 3, 9, 30), datetime(2017, 1, 3, 9, 31), datetime(2017, 1, 3, 9, 30), datetime(2017, 1, 3, 9, 31) ], ['000001.SZ', '000001.SZ', '000002.SZ', '000002.SZ'], [ 8.977128569075722, 8.967274421359942, 19.87384420457807, 19.951211967904893 ], [ 8.977128569075722, 8.977128569075722, 19.970553908736598, 19.970553908736598 ], [ 8.967274421359942, 8.967274421359942, 19.87384420457807, 19.922199056657337 ], [ 8.967274421359942, 8.967274421359942, 19.951211967904893, 19.970553908736598 ], [673660L, 433800L, 119900L, 146700L]], codes=['MultiCodes'], error_code=0, fields=[ 'time', 'windcode', 'open', 'high', 'low', 'close', 'volume' ], times=[ datetime(2017, 1, 3, 9, 30), datetime(2017, 1, 3, 9, 31), datetime(2017, 1, 3, 9, 30), datetime(2017, 1, 3, 9, 31) ]) self.assertEqual(calculated, expected) # # w.wsq && factor_name = 'LIVE' api = 'w.wsq' indicator = 'rt_open,rt_high,rt_low,rt_last,rt_vol,rt_amt,rt_vol_ratio,rt_pct_chg_5min' raw_data = wind_data_provider.query_data(api, sec_id, indicator) calculated = WindData(data=len(raw_data.Data), codes=raw_data.Codes, error_code=raw_data.ErrorCode, fields=raw_data.Fields, times=len(raw_data.Times)) expected = WindData(data=8, codes=['000001.SZ', '000002.SZ'], error_code=0, fields=[ 'RT_OPEN', 'RT_HIGH', 'RT_LOW', 'RT_LAST', 'RT_VOL', 'RT_AMT', 'RT_VOL_RATIO', 'RT_PCT_CHG_5MIN' ], times=1) self.assertEqual(calculated, expected) # # w.wset && factor_name = 'INDUSTRY_WEIGHT_C1' api = 'w.wset' index_id = '000300.SH' short_params = 'windcode=' + index_id params = short_params if start_date is None else short_params + ';date=' + str( start_date) raw_data = w.wset('IndexConstituent', params) calculated = WindData(data=len(raw_data.Data), codes=len(raw_data.Codes), error_code=raw_data.ErrorCode, fields=raw_data.Fields, times=len(raw_data.Times)) expected = WindData( data=4, codes=300, error_code=0, fields=['date', 'wind_code', 'sec_name', 'i_weight'], times=1) self.assertEqual(calculated, expected) raw_data = w.wset('IndexConstituent', short_params) calculated = WindData(data=len(raw_data.Data), codes=len(raw_data.Codes), error_code=raw_data.ErrorCode, fields=raw_data.Fields, times=len(raw_data.Times)) expected = WindData( data=4, codes=300, error_code=0, fields=['date', 'wind_code', 'sec_name', 'i_weight'], times=1) self.assertEqual(calculated, expected)