def update_industry_name(start, end): all_dates = get_trade_days(start, end) for column, indutryparams in industry_classes.items(): l = [] for idate in all_dates: ids = get_ashare(idate) l.append(get_stock_industryname(ids, idate, *indutryparams)) industry = pd.concat(l).to_frame().rename(columns={0: column}) industry = get_industry_code(column, industry) h5.save_factor(industry, '/indexes/')
def update_trade_status(start, end): dates = get_trade_days(start, end) st = sec.get_st(dates) suspend = sec.get_suspend(dates) uplimit = sec.get_uplimit(dates) downlimit = sec.get_downlimit(dates) trade_status = pd.concat([st, suspend, uplimit, downlimit], axis=1) trade_status = trade_status.where(pd.isnull(trade_status), other=1) trade_status.fillna(0, inplace=True) trade_status.columns = ['st', 'suspend', 'uplimit', 'downlimit'] trade_status['no_trading'] = trade_status.any(axis=1).astype('int32') h5.save_factor(trade_status, '/trade_status/')
def onlist(start, end): """股票的上市日期""" d = get_ashare(end) idx = pd.MultiIndex.from_product( [[DateStr2Datetime("19000101")], [x[:6] for x in d]], names=['date', 'IDs']) data = w.wsd(d, "ipo_date", end, end, "") list_date = [x.strftime("%Y%m%d") for x in data.Data[0]] list_date = pd.DataFrame(list_date, index=idx, columns=['list_date']) data = w.wsd(d, "backdoordate", end, end, "") backdoordate = [ x.strftime("%Y%m%d") if x is not None else np.nan for x in data.Data[0] ] backdoordate = pd.DataFrame(backdoordate, index=idx, columns=['backdoordate']) backdoordate.fillna('21000101', inplace=True) h5.save_factor(list_date, '/stocks/') h5.save_factor(backdoordate, '/stocks/')
def update_price(start, end): """更新价量行情数据""" # 股票价量数据 field_names = "收盘价 涨跌幅 最高价 最低价 成交量" data = get_history_bar(field_names.split(), start, end, **{'复权方式': '不复权'}) data.columns = ['close', 'daily_returns_%', 'high', 'low', 'volume'] data['volume'] = data['volume'] / 100 data['daily_returns'] = data['daily_returns_%'] / 100 h5.save_factor(data, '/stocks/') field_names = "总市值 A股市值(不含限售股)" data = get_history_bar(field_names.split(), start, end) data.columns = ['total_mkt_value', 'float_mkt_value'] data = data / 10000 h5.save_factor(data, '/stocks/') # 股票后复权收盘价 field_names = "收盘价" data = get_history_bar(field_names.split(), start, end, **{'复权方式': '后复权'}) data.columns = ['adj_close'] h5.save_factor(data, '/stocks/') field_names = "换手率 换手率(基准.自由流通股本)" data = get_history_bar(field_names.split(), start, end) data.columns = ['turn', 'freeturn'] h5.save_factor(data, '/stock_liquidity/') # 指数价量数据 field_names = "开盘价 最高价 最低价 收盘价 成交量 成交额 涨跌幅" data = get_history_bar(field_names.split(), start, end, id_type='index') data.columns = [ 'open', 'high', 'low', 'close', 'vol', 'amt', 'daily_returns_%' ] data['amt'] = data['amt'] / 10000 data['vol'] = data['vol'] / 100 h5.save_factor(data, '/indexprices/')
def update_sector(start, end): """更新成分股信息""" all_dates = get_trade_days(start, end) for index_id in index_members: d = updateSectorConstituent(all_dates, index_id) h5.save_factor(d, '/indexes/') for column_mark, sectorid in sector_members.items(): d = updateSectorConstituent2(all_dates, sectorid, column_mark) if column_mark == 'ashare': h5.save_factor(d, '/indexes/') else: h5.save_factor(d, '/stocks/')
def update_industry_index_prices(start, end): from data_source.update_data.ths_data_source import _updateHistoryBar from const import CS_INDUSTRY_CODES fields = ['open', 'high', 'low', 'close', 'changeper', 'volume'] data = _updateHistoryBar(CS_INDUSTRY_CODES, start, end, fields, 1) h5.save_factor(data, '/indexprices/cs_level_1/')
def update_idx_weight(start, end): """更新指数权重""" all_dates = get_trade_days(start, end) for index_id in index_weights: d = index_weight_panel(all_dates, index_id) / 100 h5.save_factor(d, '/indexes/')
# -*- coding: utf-8 -*- """从兴业因子数据中读取因子,保存成h5格式""" from data_source import h5 import pandas as pd import os root = 'D:/data/XYData20170731/XYData' dirs = [x for x in os.listdir(root) if x not in ['基础数据']] for d in dirs: print(d) xy_path = root + '/' + d + '/' # 读取数据 all_files=os.listdir(xy_path) for file in all_files: data = pd.read_csv(os.path.join(xy_path, file), header=0, index_col=0, parse_dates=True) data.columns = data.columns.str[:6] data = data.stack().to_frame().rename_axis(['date', 'IDs']). \ rename_axis({0: file[:-4].replace('-', '_')}, axis=1) h5.save_factor(data, xy_path[22:])
def _param2str(param_dict): _s = [] for k, v in param_dict.items(): _s.append("%s=%s"%(k, v)) return ";".join(_s) def _adjust_params(params, kwargs): p_dict = _params2dict(params) p_dict.update(kwargs) return _param2str(p_dict) def _load_wsd_data(ids, fields, start, end, **kwargs): if isinstance(fields, str): fields = [fields] params = _adjust_params("", kwargs) ids = ",".join(ids) _l = [] for field in fields: d = w.wsd(ids, field, start, end, params) _l.append(_bar_to_dataframe(d)) data = pd.concat(_l, axis=1) return data if __name__ == '__main__': from const import CS_INDUSTRY_DICT codes = [x+'.WI' for x in CS_INDUSTRY_DICT] pct_change = _load_wsd_data(codes, 'pct_chg', '20110101', '20170709') pct_change.columns=['changeper'] h5.save_factor(pct_change, '/indexprices/cs_level_1/')