def _load_industry_weight(self): ret = pd.DataFrame() dates = WIND_DATA_PROVIDER.biz_days_list(start_date=self.start_date, end_date=self.end_date, freq=self.freq) extra_params = self._check_industry_params(self.factor_name) for date in dates: date = date_convert_2_str(date) index_info = WIND_DATA_PROVIDER.get_universe(self.sec_id, date=date, output_weight=True) class_info = WIND_DATA_PROVIDER.query_data( api='w.wsd', # sec_id=index_info[1], sec_id=index_info.index.tolist(), indicator='indexcode_sw', extra_params=extra_params, start_date=date, end_date=date) industry_weight = pd.DataFrame(data={ 'sec_id': index_info.index, 'class_id': class_info.Data[0], 'sec_weight': index_info['weight'] }, index=index_info.index) tmp = industry_weight.groupby('class_id').sum().T tmp.index = [date] tmp = WIND_QUERY_HELPER.convert_2_multi_index(tmp) \ if self.output_data_format == OutputFormat.MULTI_INDEX_DF else tmp ret = ret.append(tmp) return ret
def _retrieve_data(self, main_params, extra_params, output_data_format): output_data = pd.DataFrame() api = main_params[Header.API] if api == 'w.wsi': merged_extra_params = self._merge_query_params(extra_params, date=self.end_date) raw_data = WIND_DATA_PROVIDER.query_data(api=api, sec_id=self.sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=self.start_date, end_date=self.end_date) multi_factors = True if extra_params[Header.MULTIFACTORS] == 'Y' else False output_data = WIND_QUERY_HELPER.reformat_wind_data(raw_data=raw_data, date=self.end_date, multi_factors=multi_factors) else: dates = WIND_DATA_PROVIDER.biz_days_list(start_date=self.start_date, end_date=self.end_date, freq=self.freq) for fetch_date in dates: if not pd.isnull(extra_params[Header.REPORTADJ]): date = WIND_QUERY_HELPER.latest_report_date(fetch_date) else: date = fetch_date date = date_convert_2_str(date) sec_id = WIND_DATA_PROVIDER.get_universe(self.sec_id, date=date) \ if self.is_index else self.sec_id if api == 'w.wsd': merged_extra_params = self._merge_query_params(extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data(api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=date, end_date=date) elif api == 'w.wss': py_assert(not pd.isnull(extra_params[Header.TENOR]), ValueError, 'tenor must be given for query factor {0}'.format(self.factor_name)) merged_extra_params = self._merge_query_params(extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data(api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params) else: raise ValueError('FactorLoader._retrieve_data: unacceptable value of parameter api') multi_factors = True if extra_params[Header.MULTIFACTORS] == 'Y' else False tmp = WIND_QUERY_HELPER.reformat_wind_data(raw_data=raw_data, date=fetch_date, output_data_format=output_data_format, multi_factors=multi_factors) output_data = pd.concat([output_data, tmp], axis=0) return output_data
def _retrieve_data(self, main_params, extra_params, output_data_format): output_data = pd.DataFrame() api = main_params[Header.API] if api == 'w.wsq': loop_times = int(len(self.sec_id) / self.block_size) + 1 for j in range(loop_times): code_set = self.sec_id[j * self.block_size:(j + 1) * self.block_size] raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=code_set, indicator=main_params[Header.INDICATOR]) length = len(raw_data.Data[0]) output_data = pd.concat([ output_data, pd.DataFrame( np.concatenate( ([raw_data.Times * length], raw_data.Data))).T ], axis=0) output_data.columns = ['date'] + [ field[3:] for field in main_params[Header.INDICATOR].split(',') ] elif api == 'w.wsi': merged_extra_params = self._merge_query_params(extra_params, date=self.end_date) raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=self.sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=self.start_date, end_date=self.end_date) multi_factors = True if extra_params[ Header.MULTIFACTORS] == 'Y' else False output_data = WIND_QUERY_HELPER.reformat_wind_data( raw_data=raw_data, date=self.end_date, multi_factors=multi_factors) else: dates = WIND_DATA_PROVIDER.biz_days_list( start_date=self.start_date, end_date=self.end_date, freq=self.freq) for fetch_date in dates: if not pd.isnull(extra_params[Header.REPORTADJ]): date = WIND_QUERY_HELPER.latest_report_date(fetch_date) else: date = fetch_date date = date_convert_2_str(date) sec_id = WIND_DATA_PROVIDER.get_universe(self.sec_id, date=date) \ if self.is_index else self.sec_id if api == 'w.wsd': merged_extra_params = self._merge_query_params( extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=date, end_date=date) elif api == 'w.wss': # py_assert(not pd.isnull(extra_params[Header.TENOR]), ValueError, # 'tenor must be given for query factor {0}'.format(self.factor_name)) merged_extra_params = self._merge_query_params( extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params) else: raise ValueError( 'FactorLoader._retrieve_data: unacceptable value of parameter api' ) multi_factors = True if extra_params[ Header.MULTIFACTORS] == 'Y' else False tmp = WIND_QUERY_HELPER.reformat_wind_data( raw_data=raw_data, date=fetch_date, output_data_format=output_data_format, multi_factors=multi_factors) output_data = pd.concat([output_data, tmp], axis=0) return output_data