def import_trade_date(): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 :return: """ w = WindRest(WIND_REST_URL) engine = get_db_engine() trade_date_start = None with get_db_session(engine) as session: try: trade_date_max = session.execute( 'select max(trade_date) from wind_trade_date').first()[0] if trade_date_max is not None: trade_date_start = ( trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE) except Exception as exp: logging.exception("交易日获取异常") if trade_date_start is None: trade_date_start = '1990-01-01' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE) date_df = w.tdays(trade_date_start, end_date_str) if date_df is not None: print(date_df.shape) date_df = date_df.set_index('date').rename( columns={'date': 'trade_date'}) date_df.to_sql('wind_trade_date', engine, if_exists='append') logging.info('%d trade date has been imported', date_df.shape[0])
def update_factors(): rest = WindRest(WIND_REST_URL) sql_str = 'SELECT max(Trade_Date) as trade_date_latest FROM stock_facexposure' with get_db_session() as session: trade_date_latest = session.execute(sql_str).fetchone()[0] date_start_str = rest.tdaysoffset(1, trade_date_latest) date_today_str = date.today().strftime('%Y-%m-%d') date_end_str = rest.tdaysoffset(-1, date_today_str) if datetime.strptime(date_start_str, STR_FORMAT_DATE) <= datetime.strptime(date_end_str, STR_FORMAT_DATE): save_factor_explore(date_start_str, date_end_str)
def update_wind_fund_info(get_df=False, mode='replace_insert'): # 更新 基金信息 table_name = 'wind_fund_info' rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 engine = get_db_engine() # 初始化数据库,并获取旧表信息 old = pd.read_sql_query('select wind_code from %s' % table_name, engine) old_set = set(old['wind_code']) # 从万得获取最新基金列表 types = { u'股票多头策略': 1000023122000000, u'股票多空策略': 1000023123000000, u'其他股票策略': 1000023124000000, u'阿尔法策略': 1000023125000000, u'其他市场中性策略': 1000023126000000, u'事件驱动策略': 1000023113000000, u'债券策略': 1000023114000000, u'套利策略': 1000023115000000, u'宏观策略': 1000023116000000, u'管理期货策略': 1000023117000000, u'组合基金策略': 1000023118000000, u'货币市场策略': 1000023119000000, u'多策略': 100002312000000, u'其他策略': 1000023121000000 } df = pd.DataFrame() yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d') for i in types.keys(): temp = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (yestday, str(types[i]))) temp['strategy_type'] = i logging.info('%s sectorconstituent %s df.shape:%s', yestday, i, temp.shape) df = pd.concat([df, temp], axis=0) fund_types_df = df[['wind_code', 'strategy_type']] # , 'sec_name' 后续wss接口可以获得 new_set = set(fund_types_df['wind_code']) target_set = new_set.difference(old_set) # in new_set but not old_set fund_types_df.set_index('wind_code', inplace=True) # # 获取新成立基金各项基本面信息 fund_info_df = get_fund_info_df_by_wind(list(target_set)) fund_info_df = fund_types_df.join(fund_info_df, how='right') return save_fund_info(fund_info_df, get_df, mode)
def get_fund_info_df_by_wind(wind_code_list): """ 获取新成立基金各项基本面信息 :param wind_code_list: :return: """ rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 code_count = len(wind_code_list) seg_count = 2500 # fund_info_df = None fund_info_df = pd.DataFrame() for n in range(math.ceil(float(code_count) / seg_count)): num_start = n * seg_count num_end = (n + 1) * seg_count num_end = num_end if num_end <= code_count else code_count if num_start <= code_count: codes = ','.join(wind_code_list[num_start:num_end]) # 分段获取基金成立日期数据 info2_df = rest.wss( codes, "sec_name,fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager" ) logging.info('%05d ) [%d %d]' % (n, num_start, num_end)) fund_info_df = fund_info_df.append(info2_df) else: break if fund_info_df.shape[0] > 0: # 整理数据插入 wind_fund_info 表 # fund_info_df['FUND_SETUPDATE'] = pd.to_datetime(fund_info_df['FUND_SETUPDATE']).apply(lambda x: x.date()) fund_info_df['FUND_SETUPDATE'] = fund_info_df['FUND_SETUPDATE'].apply( str_2_date) # fund_info_df['FUND_MATURITYDATE'] = pd.to_datetime(fund_info_df['FUND_MATURITYDATE']).apply(lambda x: x.date()) fund_info_df['FUND_MATURITYDATE'] = fund_info_df[ 'FUND_MATURITYDATE'].apply(str_2_date) return fund_info_df
def save_factor_explore(date_start, date_end): logging.info('save_factor_explore', date_start, date_end) rest = WindRest(WIND_REST_URL) date_start = rest.tdaysoffset(0, date_start) # 获取起始交易日 date_end = rest.tdaysoffset(0, date_end) # 获取结束交易日 # 提取数据 # 全部数据按照20天计算 lagnum = 20 date_start_getdata = rest.tdaysoffset(-1 * (lagnum - 1), date_start) # 首先,提取股票代码 options_set = "date = %s; windcode = 881001.WI" % date_end stock_info_df = rest.wset("sectorconstituent", options_set) stock_code_s = stock_info_df['wind_code'] stock_name_s = stock_info_df['sec_name'] stock_count = len(stock_name_s) # 获取各个因子 options_stock = "unit=1;currencyType=;PriceAdj=B" data_shortnames = "close,pe_ttm,mkt_cap_float,turn" logging.info('date between %s and %s', date_start_getdata, date_end) stock_num = 0 logging.info('Need To update %d Stocks' % len(stock_code_s)) for stock_code, stock_name in zip(stock_code_s, stock_name_s): stock_factor_df = rest.wsd(stock_code, data_shortnames, date_start_getdata, date_end, options_stock) stock_factor_df['fac_Inverse_Moment'] = stock_factor_df[ 'CLOSE'] / stock_factor_df['CLOSE'].shift(19) # 19日涨跌判断 stock_factor_df['fac_Mv'] = np.log10(stock_factor_df.MKT_CAP_FLOAT) stock_factor_df['fac_Pe'] = stock_factor_df['PE_TTM'] stock_factor_df['fac_Turn'] = stock_factor_df['TURN'].rolling( lagnum).mean() stock_factor_df['fac_Vol'] = ( stock_factor_df['CLOSE'] / stock_factor_df['CLOSE'].shift(1)).rolling(lagnum).std() stock_factor_df['Stock_Name'] = stock_name stock_factor_df['Stock_Code'] = stock_code stock_factor_df.dropna(how='any', inplace=True) factor_name_saved = [ 'Stock_Name', 'Stock_Code', 'fac_Inverse_Moment', 'fac_Mv', 'fac_Pe', 'fac_Turn', 'fac_Vol' ] engine = get_db_engine() stock_factor_df[factor_name_saved].to_sql('Stock_FacExposure', engine, if_exists='append', index_label=['Trade_Date']) stock_num += 1 logging.info('Successful Input %s [%d / %d] stock' % (stock_name, stock_num, stock_count))
# -*- coding: utf-8 -*- """ Created on 2017/4/14 @author: MG """ import logging import math from sqlalchemy.types import String, Date from fh_tools.windy_utils_rest import WindRest import pandas as pd from datetime import datetime, date, timedelta from config_fh import WIND_REST_URL, STR_FORMAT_DATE, get_db_engine, get_db_session w = WindRest(WIND_REST_URL) def get_stock_code_set(date_fetch): date_fetch_str = date_fetch.strftime(STR_FORMAT_DATE) stock_df = w.wset("sectorconstituent", "date=%s;sectorid=a001010100000000" % date_fetch_str) if stock_df is None: logging.warning('%s 获取股票代码失败', date_fetch_str) return None stock_count = stock_df.shape[0] logging.info('get %d stocks on %s', stock_count, date_fetch_str) return set(stock_df['wind_code']) def import_wind_stock_info(refresh=False): # 获取全市场股票代码及名称 if refresh:
def update_wind_fund_nav(get_df=False, wind_code_list=None): table_name = 'wind_fund_nav' rest = WindRest(WIND_REST_URL) # 初始化数据下载端口 # 初始化数据库engine engine = get_db_engine() # 链接数据库,并获取fundnav旧表 with get_db_session(engine) as session: table = session.execute( 'select wind_code, max(trade_date) from wind_fund_nav group by wind_code' ) fund_trade_date_latest_in_nav_dic = dict(table.fetchall()) wind_code_set_existed_in_nav = set( fund_trade_date_latest_in_nav_dic.keys()) # 获取wind_fund_info表信息 fund_info_df = pd.read_sql_query( """SELECT DISTINCT wind_code as wind_code,fund_setupdate,fund_maturitydate,trade_date_latest from fund_info group by wind_code""", engine) trade_date_latest_dic = { wind_code: trade_date_latest for wind_code, trade_date_latest in zip(fund_info_df['wind_code'], fund_info_df['fund_setupdate']) } fund_info_df.set_index('wind_code', inplace=True) if wind_code_list is None: wind_code_list = list(fund_info_df.index) else: wind_code_list = list(set(wind_code_list) & set(fund_info_df.index)) date_end = date.today() - timedelta(days=1) date_end_str = date_end.strftime(STR_FORMAT_DATE) fund_nav_all_df = [] no_data_count = 0 code_count = len(wind_code_list) # 对每个新获取的基金名称进行判断,若存在 fundnav 中,则只获取部分净值 wind_code_trade_date_latest = {} try: for i, wind_code in enumerate(wind_code_list): # 设定数据获取的起始日期 wind_code_trade_date_latest[wind_code] = date_end if wind_code in wind_code_set_existed_in_nav: trade_latest = fund_trade_date_latest_in_nav_dic[wind_code] if trade_latest >= date_end: continue date_begin = trade_latest + timedelta(days=1) else: date_begin = trade_date_latest_dic[wind_code] if date_begin is None: continue elif isinstance(date_begin, str): date_begin = datetime.strptime(date_begin, STR_FORMAT_DATE).date() if isinstance(date_begin, date): if date_begin.year < 1900: continue if date_begin > date_end: continue date_begin = date_begin.strftime('%Y-%m-%d') else: continue # 尝试获取 fund_nav 数据 for k in range(2): try: fund_nav_tmp_df = rest.wsd(codes=wind_code, fields='nav,NAV_acc,NAV_date', begin_time=date_begin, end_time=date_end_str, options='Fill=Previous') break except Exception as exp: logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp))) continue else: del wind_code_trade_date_latest[wind_code] fund_nav_tmp_df = None if fund_nav_tmp_df is None: logger.info('%s No data', wind_code) del wind_code_trade_date_latest[wind_code] no_data_count += 1 logger.warning('%d funds no data', no_data_count) else: fund_nav_tmp_df.dropna(how='all', inplace=True) df_len = fund_nav_tmp_df.shape[0] if df_len == 0: continue fund_nav_tmp_df['wind_code'] = wind_code # 此处删除 trade_date_latest 之后再加上,主要是为了避免因抛出异常而导致的该条数据也被记录更新 del wind_code_trade_date_latest[wind_code] trade_date_latest = fund_nav_df_2_sql(table_name, fund_nav_tmp_df, engine, is_append=True) if trade_date_latest is None: logger.error('%s[%d] data insert failed', wind_code) else: wind_code_trade_date_latest[wind_code] = trade_date_latest logger.info('%d) %s updated, %d funds left', i, wind_code, code_count - i) if get_df: fund_nav_all_df = fund_nav_all_df.append( fund_nav_tmp_df) finally: import_wind_fund_nav_to_fund_nav() update_trade_date_latest(wind_code_trade_date_latest) try: update_fund_mgrcomp_info() except: # 新功能上线前由于数据库表不存在,可能导致更新失败,属于正常现象 logger.exception('新功能上线前由于数据库表不存在,可能导致更新失败,属于正常现象') return fund_nav_all_df
# -*- coding: utf-8 -*- """ Created on Thu Apr 6 11:11:26 2017 @author: Yupeng Guo """ from fh_tools.windy_utils_rest import WindRest from fh_tools.fh_utils import get_first, get_last import pandas as pd from datetime import date, timedelta from sqlalchemy.types import String, Date from config_fh import get_db_engine, WIND_REST_URL, get_db_session import logging rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 def import_wind_index_daily_first(table_name, wind_codes): # wind_codes 可以是字符串,也可以是字符串的集合, table_name是数据库的表名 engine = get_db_engine() yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d') info = rest.wss(wind_codes, "basedate,sec_name") for code in info.index: begin_date = info.loc[code, 'BASEDATE'].strftime('%Y-%m-%d') index_name = info.loc[code, 'SEC_NAME'] temp = rest.wsd(code, "open,high,low,close,volume,amt", begin_date, yestday) temp.reset_index(inplace=True) temp.rename(columns={'index': 'trade_date'}, inplace=True) temp.trade_date = pd.to_datetime(temp.trade_date)
def update_wind_fund_info(get_df=False, mode='replace_insert'): table_name = 'wind_fund_info' rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 engine = get_db_engine() # 初始化数据库,并获取旧表信息 old = pd.read_sql_query('select wind_code from %s' % table_name, engine) old_set = set(old['wind_code']) # 从万得获取最新基金列表 types = {u'股票多头策略': 1000023122000000, u'股票多空策略': 1000023123000000, u'其他股票策略': 1000023124000000, u'阿尔法策略': 1000023125000000, u'其他市场中性策略': 1000023126000000, u'事件驱动策略': 1000023113000000, u'债券策略': 1000023114000000, u'套利策略': 1000023115000000, u'宏观策略': 1000023116000000, u'管理期货策略': 1000023117000000, u'组合基金策略': 1000023118000000, u'货币市场策略': 1000023119000000, u'多策略': 100002312000000, u'其他策略': 1000023121000000} df = pd.DataFrame() yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d') for i in types.keys(): temp = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (yestday, str(types[i]))) temp['strategy_type'] = i logging.info('%s sectorconstituent %s df.shape:%s', yestday, i, temp.shape) df = pd.concat([df, temp], axis=0) fund_types_df = df[['wind_code', 'sec_name', 'strategy_type']] new_set = set(fund_types_df['wind_code']) target_set = new_set.difference(old_set) # in new_set but not old_set fund_types_df.set_index('wind_code', inplace=True) # 获取新成立基金各项基本面信息 code_list = list(target_set) code_count = len(code_list) seg_count = 2500 # info_df = None info_df = pd.DataFrame() for n in range(math.ceil(float(code_count) / seg_count)): num_start = n * seg_count num_end = (n + 1) * seg_count num_end = num_end if num_end <= code_count else code_count if num_start <= code_count: codes = ','.join(code_list[num_start:num_end]) # 分段获取基金成立日期数据 info2_df = rest.wss(codes, "fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager") logging.info('%05d ) [%d %d]' % (n, num_start, num_end)) info_df = info_df.append(info2_df) else: break # 没有新增基金,直接退出 if info_df.shape[0] == 0: return # 整理数据插入 wind_fund_info 表 # info_df['FUND_SETUPDATE'] = pd.to_datetime(info_df['FUND_SETUPDATE']).apply(lambda x: x.date()) info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply(str_2_date) # info_df['FUND_MATURITYDATE'] = pd.to_datetime(info_df['FUND_MATURITYDATE']).apply(lambda x: x.date()) info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply(str_2_date) info_df = fund_types_df.join(info_df, how='right') info_df.rename(columns={'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate', 'FUND_MGRCOMP': 'fund_mgrcomp', 'FUND_EXISTINGYEAR': 'fund_existingyear', 'FUND_PTMYEAR': 'fund_ptmyear', 'FUND_TYPE': 'fund_type', 'FUND_FUNDMANAGER': 'fund_fundmanager' }, inplace=True) info_df.index.names = ['wind_code'] info_df.drop_duplicates(inplace=True) if mode == 'append': info_df.to_sql(table_name, engine, if_exists='append', dtype={ 'wind_code': String(200), 'sec_name': String(200), 'strategy_type': String(200), 'fund_setupdate': Date, 'fund_maturitydate': Date }) elif mode == 'replace_insert': info_df.reset_index(inplace=True) data_list = list(info_df.T.to_dict().values()) sql_str = """REPLACE INTO wind_fund_info (wind_code,sec_name,strategy_type,fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager) VALUES (:wind_code,:sec_name,:strategy_type,:fund_setupdate,:fund_maturitydate,:fund_mgrcomp,:fund_existingyear,:fund_ptmyear,:fund_type,:fund_fundmanager); """ with get_db_session() as session: session.execute(sql_str, data_list) else: raise ValueError('mode="%s" is not available' % mode) logging.info('%d funds inserted' % info_df.shape[0]) # wind_fund_info 表中增量数据插入到 fund_info sql_str = """insert into fund_info(wind_code, sec_name, strategy_type, fund_setupdate, fund_maturitydate, fund_mgrcomp, fund_existingyear, fund_ptmyear, fund_type, fund_fundmanager) select wfi.wind_code, wfi.sec_name, wfi.strategy_type, wfi.fund_setupdate, wfi.fund_maturitydate, wfi.fund_mgrcomp, wfi.fund_existingyear, wfi.fund_ptmyear, wfi.fund_type, wfi.fund_fundmanager from wind_fund_info wfi left outer join fund_info fi on wfi.wind_code=fi.wind_code where fi.wind_code is null""" with get_db_session(engine) as session: table = session.execute(sql_str) logging.info('new data was inserted into fund_info from wind_fund_info table') if get_df: return info_df
def wind_fund_info_import(table_name, get_df=False): rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 types = { u'股票多头策略': 1000023122000000, u'股票多空策略': 1000023123000000, u'其他股票策略': 1000023124000000, u'阿尔法策略': 1000023125000000, u'其他市场中性策略': 1000023126000000, u'事件驱动策略': 1000023113000000, u'债券策略': 1000023114000000, u'套利策略': 1000023115000000, u'宏观策略': 1000023116000000, u'管理期货': 1000023117000000, u'组合基金策略': 1000023118000000, u'货币市场策略': 1000023119000000, u'多策略': 100002312000000, u'其他策略': 1000023121000000 } df = pd.DataFrame() today = date.today().strftime('%Y-%m-%d') for i in types.keys(): temp = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (today, str(types[i]))) temp['strategy_type'] = i df = pd.concat([df, temp], axis=0) # 插入数据库 # 初始化数据库engine engine = get_db_engine() # 整理数据 fund_types_df = df[['wind_code', 'sec_name', 'strategy_type']] fund_types_df.set_index('wind_code', inplace=True) # 获取基金基本面信息 code_list = list(fund_types_df.index) # df['wind_code'] code_count = len(code_list) seg_count = 5000 info_df = pd.DataFrame() for n in range(int(code_count / seg_count) + 1): num_start = n * seg_count num_end = (n + 1) * seg_count num_end = num_end if num_end <= code_count else code_count if num_start <= code_count: codes = ','.join(code_list[num_start:num_end]) # 分段获取基金成立日期数据 info2_df = rest.wss( codes, "fund_setupdate,fund_maturitydate,fund_mgrcomp,\ fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager" ) logging.info('%05d ) [%d %d]' % (n, num_start, num_end)) info_df = info_df.append(info2_df) else: break # 整理数据插入数据库 info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply( lambda x: x.date()) info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply( lambda x: x.date()) info_df = fund_types_df.join(info_df, how='right') info_df.rename(columns={ 'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate', 'FUND_MGRCOMP': 'fund_mgrcomp', 'FUND_EXISTINGYEAR': 'fund_existingyear', 'FUND_PTMYEAR': 'fund_ptmyear', 'FUND_TYPE': 'fund_type', 'FUND_FUNDMANAGER': 'fund_fundmanager' }, inplace=True) info_df.index.names = ['wind_code'] info_df.drop_duplicates(inplace=True) info_df.to_sql(table_name, engine, if_exists='append', dtype={ 'wind_code': String(200), 'sec_name': String(200), 'strategy_type': String(200), 'fund_setupdate': Date, 'fund_maturitydate': Date }) logging.info('%d funds inserted' % len(info_df)) if get_df: return info_df
def import_wind_future_info(): # 获取已存在合约列表 sql_str = 'select wind_code, ipo_date from wind_future_info' engine = get_db_engine() with get_db_session(engine) as session: table = session.execute(sql_str) wind_code_ipo_date_dic = dict(table.fetchall()) # 通过wind获取合约列表 # w.start() rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 future_sectorid_dic_list = [ { 'subject_name': 'CFE 沪深300', 'regex': r"IF\d{4}\.CFE", 'sectorid': 'a599010102000000', 'date_establish': '2010-4-16' }, { 'subject_name': 'CFE 上证50', 'regex': r"IH\d{4}\.CFE", 'sectorid': '1000014871000000', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 中证500', 'regex': r"IC\d{4}\.CFE", 'sectorid': '1000014872000000', 'date_establish': '2015-4-16' }, { 'subject_name': 'SHFE 黄金', 'regex': r"AU\d{4}\.SHF", 'sectorid': 'a599010205000000', 'date_establish': '2008-01-09' }, { 'subject_name': 'SHFE 沪银', 'regex': r"AG\d{4}\.SHF", 'sectorid': '1000006502000000', 'date_establish': '2012-05-10' }, { 'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\d{4}\.SHF", 'sectorid': 'a599010206000000', 'date_establish': '2009-03-27' }, { 'subject_name': 'SHFE 热卷', 'regex': r"HC\d{4}\.SHF", 'sectorid': '1000011455000000', 'date_establish': '2014-03-21' }, { 'subject_name': 'DCE 焦炭', 'regex': r"J\d{4}\.SHF", 'sectorid': '1000002976000000', 'date_establish': '2011-04-15' }, { 'subject_name': 'DCE 焦煤', 'regex': r"JM\d{4}\.SHF", 'sectorid': '1000009338000000', 'date_establish': '2013-03-22' }, { 'subject_name': '铁矿石', 'regex': r"I\d{4}\.SHF", 'sectorid': '1000006502000000', 'date_establish': '2013-10-18' }, ] wind_code_set = set() ndays_per_update = 60 # 获取历史期货合约列表信息 for future_sectorid_dic in future_sectorid_dic_list: subject_name = future_sectorid_dic['subject_name'] sector_id = future_sectorid_dic['sectorid'] regex_str = future_sectorid_dic['regex'] date_establish = datetime.strptime( future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date() date_since = get_date_since(wind_code_ipo_date_dic, regex_str, date_establish) date_yestoday = date.today() - timedelta(days=1) while date_since <= date_yestoday: date_since_str = date_since.strftime(STR_FORMAT_DATE) # w.wset("sectorconstituent","date=2017-05-02;sectorid=a599010205000000") # future_info_df = wset_cache(w, "sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) future_info_df = rest.wset( "sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) wind_code_set |= set(future_info_df['wind_code']) # future_info_df = future_info_df[['wind_code', 'sec_name']] # future_info_dic_list = future_info_df.to_dict(orient='records') # for future_info_dic in future_info_dic_list: # wind_code = future_info_dic['wind_code'] # if wind_code not in wind_code_future_info_dic: # wind_code_future_info_dic[wind_code] = future_info_dic if date_since >= date_yestoday: break else: date_since += timedelta(days=ndays_per_update) if date_since > date_yestoday: date_since = date_yestoday # 获取合约列表 wind_code_list = [ wc for wc in wind_code_set if wc not in wind_code_ipo_date_dic ] # 获取合约基本信息 # w.wss("AU1706.SHF,AG1612.SHF,AU0806.SHF", "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code") # future_info_df = wss_cache(w, wind_code_list, # "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code") if len(wind_code_list) > 0: future_info_df = rest.wss( wind_code_list, "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code" ) future_info_df['MFPRICE'] = future_info_df['MFPRICE'].apply( mfprice_2_num) future_info_count = future_info_df.shape[0] future_info_df.rename( columns={c: str.lower(c) for c in future_info_df.columns}, inplace=True) future_info_df.index.rename('wind_code', inplace=True) future_info_df.to_sql('wind_future_info', engine, if_exists='append', dtype={ 'wind_code': String(20), 'trade_code': String(20), 'sec_name': String(50), 'sec_englishname': String(50), 'exch_eng': String(50), 'ipo_date': Date, 'lasttrade_date': Date, 'lastdelivery_date': Date, 'dlmonth': String(20), 'lprice': Float, 'sccode': String(20), 'margin': Float, 'punit': String(20), 'changelt': Float, 'mfprice': Float, 'contractmultiplier': Float, 'ftmargins': String(100), }) print('%d data import' % future_info_count)
def import_stock_daily(): w = WindRest(WIND_REST_URL) engine = get_db_engine() with get_db_session(engine) as session: # 获取每只股票最新交易日数据 sql_str = 'select wind_code, max(Trade_date) from wind_stock_daily group by wind_code' table = session.execute(sql_str) stock_trade_date_latest_dic = dict(table.fetchall()) # 获取市场有效交易日数据 sql_str = "select trade_date from wind_trade_date where trade_date > '2005-1-1'" table = session.execute(sql_str) trade_date_sorted_list = [t[0] for t in table.fetchall()] trade_date_sorted_list.sort() # 获取每只股票上市日期、退市日期 table = session.execute('SELECT wind_code, ipo_date, delist_date FROM wind_stock_info') stock_date_dic = {wind_code: (ipo_date, delist_date if delist_date is None or delist_date > UN_AVAILABLE_DATE else None) for wind_code, ipo_date, delist_date in table.fetchall()} today_t_1 = date.today() - ONE_DAY data_df_list = [] try: for wind_code, date_pair in stock_date_dic.items(): date_ipo, date_delist = date_pair # 获取 date_from if wind_code in stock_trade_date_latest_dic: date_latest_t1 = stock_trade_date_latest_dic[wind_code] + ONE_DAY date_from = max([date_latest_t1, DATE_BASE, date_ipo]) else: date_from = max([DATE_BASE, date_ipo]) date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from) # 获取 date_to if date_delist is None: date_to = today_t_1 else: date_to = min([date_delist, today_t_1]) date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to) if date_from is None or date_to is None or date_from > date_to: continue # 获取股票量价等行情数据 wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \ "swing,turn,free_turn,trade_status,susp_days" data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to) if data_df is None: logging.warning('%s has no data during %s %s', wind_code, date_from, date_to) continue logging.info('%d data of %s', data_df.shape[0], wind_code) data_df['wind_code'] = wind_code data_df_list.append(data_df) finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('trade_date', inplace=True) data_df_all.reset_index(inplace=True) data_df_all.set_index(['wind_code', 'trade_date'], inplace=True) data_df_all.to_sql('wind_stock_daily', engine, if_exists='append', dtype={ 'wind_code': String(20), 'trade_date': Date, 'open': Float, 'high': Float, 'low': Float, 'close': Float, 'adjfactor': Float, 'volume': Float, 'amt': Float, 'pct_chg': Float, 'maxupordown': Integer, 'swing': Float, 'turn': Float, 'free_turn': Float, 'trade_status': String(20), 'susp_days': Integer, } ) logging.info('%d data imported', data_df_all.shape[0])
def fill_history(): """补充历史股票日线数据""" # 获取每只股票ipo 日期 及 最小的交易日前一天 sql_str = """select si.wind_code, ipo_date, td_to from wind_stock_info si, (select wind_code, max(trade_date) td_max, date_sub(min(trade_date), interval 1 day) td_to from wind_stock_daily group by wind_code) sd where si.wind_code = sd.wind_code and ipo_date < td_to""" w = WindRest(WIND_REST_URL) engine = get_db_engine() with get_db_session(engine) as session: table = session.execute(sql_str) stock_ipo_trade_date_min_dic = { content[0]: (content[1], content[2]) for content in table.fetchall() } data_df_list = [] try: for wind_code, date_pair in stock_ipo_trade_date_min_dic.items(): date_from, date_to = date_pair # 获取股票量价等行情数据 wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \ "swing,turn,free_turn,trade_status,susp_days,total_shares,free_float_shares" data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to) if data_df is None: logger.warning('%s has no data during %s %s', wind_code, date_from, date_to) continue logger.info('%d data of %s between %s and %s', data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df_list.append(data_df) finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('trade_date', inplace=True) data_df_all.reset_index(inplace=True) data_df_all.set_index(['wind_code', 'trade_date'], inplace=True) data_df_all.to_sql('wind_stock_daily', engine, if_exists='append', dtype={ 'wind_code': String(20), 'trade_date': Date, 'open': Float, 'high': Float, 'low': Float, 'close': Float, 'adjfactor': Float, 'volume': Float, 'amt': Float, 'pct_chg': Float, 'maxupordown': Integer, 'swing': Float, 'turn': Float, 'free_turn': Float, 'trade_status': String(20), 'susp_days': Integer, 'total_shares': Float, 'free_float_shares': Float, }) logger.info('%d data imported', data_df_all.shape[0])
def fill_col(): """补充历史col数据""" col_name = 'ev2_to_ebitda' # 获取每只股票ipo 日期 及 最小的交易日前一天 # sql_str = """select si.wind_code, td_from, td_to # from wind_stock_info si, # (select wind_code, min(trade_date) td_from, max(trade_date) td_to from wind_stock_daily where ev2_to_ebitda is null group by wind_code) sd # where si.wind_code = sd.wind_code""" sql_str = """select wind_code, if(min_trade_date<'1998-12-31','1998-12-31',min_trade_date) date_from , if(min_date_ev2_to_ebitda<max_trade_date, min_date_ev2_to_ebitda,max_trade_date) date_to, min_date_ev2_to_ebitda, min_trade_date, max_trade_date FROM ( select wind_code, min(IF(ev2_to_ebitda is null, '2018-01-01', trade_date)) min_date_ev2_to_ebitda, min(trade_date) min_trade_date, max(trade_date) max_trade_date from wind_stock_daily group by wind_code HAVING min_date_ev2_to_ebitda>'1998-12-31' ) aaa where if(min_trade_date<'1998-12-31','1998-12-31',min_trade_date) < if(min_date_ev2_to_ebitda<max_trade_date, min_date_ev2_to_ebitda,max_trade_date)""" w = WindRest(WIND_REST_URL) engine = get_db_engine() with get_db_session(engine) as session: table = session.execute(sql_str) stock_trade_date_range_dic = { content[0]: (content[1], content[2]) for content in table.fetchall() } data_df_list = [] try: for n, (wind_code, (date_from, date_to)) in enumerate(stock_trade_date_range_dic.items()): # 获取股票量价等行情数据 wind_indictor_str = col_name data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to) if data_df is None: logger.warning('%d) %s has no data during %s %s', n, wind_code, date_from, date_to) continue logger.info('%d) %d data of %s between %s and %s', n, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df_list.append(data_df) # if n > 5: # break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('trade_date', inplace=True) data_df_all.reset_index(inplace=True) data_df_all.rename(columns={col_name.upper(): col_name}, inplace=True) data_df_all.dropna(inplace=True) data_dic_list = data_df_all.to_dict(orient='records') sql_str = "update wind_stock_daily set %s=:%s where wind_code=:wind_code and trade_date=:trade_date" % ( col_name, col_name) with get_db_session(engine) as session: table = session.execute(sql_str, params=data_dic_list) # data_df_all.set_index(['wind_code', 'trade_date'], inplace=True) # data_df_all.to_sql('wind_stock_daily', engine, if_exists='append', # dtype={ # 'wind_code': String(20), # 'trade_date': Date, # 'open': Float, # 'high': Float, # 'low': Float, # 'close': Float, # 'adjfactor': Float, # 'volume': Float, # 'amt': Float, # 'pct_chg': Float, # 'maxupordown': Integer, # 'swing': Float, # 'turn': Float, # 'free_turn': Float, # 'trade_status': String(20), # 'susp_days': Integer, # 'total_shares': Float, # 'free_float_shares': Float, # } # ) logger.info('%d data imported', data_df_all.shape[0]) else: logger.warning('no data for update')
def import_wind_bonus(): w = WindRest(WIND_REST_URL) engine = get_db_engine() with get_db_session(engine) as session: table = session.execute( 'SELECT wind_code, ipo_date, delist_date FROM wind_stock_info') stock_date_dic = { wind_code: (ipo_date, delist_date if delist_date > UN_AVAILABLE_DATE else None) for wind_code, ipo_date, delist_date in table.fetchall() } print(len(stock_date_dic)) DATE_LIST = [ datetime.strptime('2010-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2011-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2012-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2013-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2014-12-31', STR_FORMAT_DATE).date(), datetime.strptime('2015-12-31', STR_FORMAT_DATE).date(), ] dic_exdate_df_list = [] for rep_date in DATE_LIST: rep_date_str = rep_date.strftime('%Y%m%d') stock_list = [ s for s, date_pair in stock_date_dic.items() if date_pair[0] < rep_date and ( rep_date < date_pair[1] if date_pair[1] is not None else True) ] dic_exdate_df = w.wss( stock_list, "div_cashbeforetax2,div_cashaftertax2,div_stock2,div_capitalization2,div_capitalization,div_stock,div_cashaftertax,div_cashbeforetax,div_cashandstock,div_recorddate,div_exdate,div_paydate,div_trddateshareb,div_preDisclosureDate,div_prelandate,div_smtgdate,div_impdate", "rptDate=%s;currencyType=BB" % rep_date_str) dic_exdate_df_list.append(dic_exdate_df) dic_exdate_df_all = pd.concat(dic_exdate_df_list) dic_exdate_df_all.index.rename('wind_code', inplace=True) dic_exdate_df_all.drop_duplicates(inplace=True) dic_exdate_df_all['DIV_EXDATE'] = dic_exdate_df_all['DIV_EXDATE'].apply( str_date) dic_exdate_df_all['DIV_PAYDATE'] = dic_exdate_df_all['DIV_PAYDATE'].apply( str_date) dic_exdate_df_all['DIV_IMPDATE'] = dic_exdate_df_all['DIV_IMPDATE'].apply( str_date) dic_exdate_df_all['DIV_RECORDDATE'] = dic_exdate_df_all[ 'DIV_RECORDDATE'].apply(str_date) dic_exdate_df_all['DIV_PREDISCLOSUREDATE'] = dic_exdate_df_all[ 'DIV_PREDISCLOSUREDATE'].apply(str_date) dic_exdate_df_all['DIV_PRELANDATE'] = dic_exdate_df_all[ 'DIV_PRELANDATE'].apply(str_date) dic_exdate_df_all['DIV_SMTGDATE'] = dic_exdate_df_all[ 'DIV_SMTGDATE'].apply(str_date) dic_exdate_df_all['DIV_TRDDATESHAREB'] = dic_exdate_df_all[ 'DIV_TRDDATESHAREB'].apply(str_date) condition = ~( dic_exdate_df_all['DIV_EXDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_PAYDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_IMPDATE'].apply(lambda x: x is None) & dic_exdate_df_all['DIV_RECORDDATE'].apply(lambda x: x is None)) dic_exdate_df_available = dic_exdate_df_all[condition] dic_exdate_df_available.to_sql('wind_stock_bonus', engine, if_exists='append', dtype={ 'wind_code': String(20), 'div_cashbeforetax2': FLOAT, 'div_cashaftertax2': FLOAT, 'div_stock2': FLOAT, 'div_capitalization2': FLOAT, 'div_capitalization': FLOAT, 'div_stock': FLOAT, 'div_cashaftertax': FLOAT, 'div_cashbeforetax': FLOAT, 'div_cashandstock': FLOAT, 'div_recorddate': Date, 'div_exdate': Date, 'div_paydate': Date, 'div_trddateshareb': Date, 'div_preDisclosureDate': Date, 'div_prelandate': Date, 'div_smtgdate': Date, 'div_impdate': Date })