예제 #1
0
def import_trade_date():
    """
    增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据
    :return: 
    """
    w = WindRest(WIND_REST_URL)
    engine = get_db_engine()
    trade_date_start = None
    with get_db_session(engine) as session:
        try:
            trade_date_max = session.execute(
                'select max(trade_date) from wind_trade_date').first()[0]
            if trade_date_max is not None:
                trade_date_start = (
                    trade_date_max +
                    timedelta(days=1)).strftime(STR_FORMAT_DATE)
        except Exception as exp:
            logging.exception("交易日获取异常")
        if trade_date_start is None:
            trade_date_start = '1990-01-01'

    end_date_str = (date.today() +
                    timedelta(days=310)).strftime(STR_FORMAT_DATE)
    date_df = w.tdays(trade_date_start, end_date_str)
    if date_df is not None:
        print(date_df.shape)
        date_df = date_df.set_index('date').rename(
            columns={'date': 'trade_date'})
        date_df.to_sql('wind_trade_date', engine, if_exists='append')
        logging.info('%d trade date has been imported', date_df.shape[0])
예제 #2
0
def update_factors():
    rest = WindRest(WIND_REST_URL)
    sql_str = 'SELECT max(Trade_Date) as trade_date_latest FROM stock_facexposure'
    with get_db_session() as session:
        trade_date_latest = session.execute(sql_str).fetchone()[0]
        date_start_str = rest.tdaysoffset(1, trade_date_latest)
        date_today_str = date.today().strftime('%Y-%m-%d')
        date_end_str = rest.tdaysoffset(-1, date_today_str)

    if datetime.strptime(date_start_str, STR_FORMAT_DATE) <= datetime.strptime(date_end_str, STR_FORMAT_DATE):
        save_factor_explore(date_start_str, date_end_str)
예제 #3
0
def update_wind_fund_info(get_df=False, mode='replace_insert'):
    # 更新 基金信息
    table_name = 'wind_fund_info'
    rest = WindRest(WIND_REST_URL)  # 初始化服务器接口,用于下载万得数据
    engine = get_db_engine()
    # 初始化数据库,并获取旧表信息
    old = pd.read_sql_query('select wind_code from %s' % table_name, engine)
    old_set = set(old['wind_code'])
    # 从万得获取最新基金列表
    types = {
        u'股票多头策略': 1000023122000000,
        u'股票多空策略': 1000023123000000,
        u'其他股票策略': 1000023124000000,
        u'阿尔法策略': 1000023125000000,
        u'其他市场中性策略': 1000023126000000,
        u'事件驱动策略': 1000023113000000,
        u'债券策略': 1000023114000000,
        u'套利策略': 1000023115000000,
        u'宏观策略': 1000023116000000,
        u'管理期货策略': 1000023117000000,
        u'组合基金策略': 1000023118000000,
        u'货币市场策略': 1000023119000000,
        u'多策略': 100002312000000,
        u'其他策略': 1000023121000000
    }
    df = pd.DataFrame()
    yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d')
    for i in types.keys():
        temp = rest.wset("sectorconstituent",
                         "date=%s;sectorid=%s" % (yestday, str(types[i])))
        temp['strategy_type'] = i
        logging.info('%s sectorconstituent %s df.shape:%s', yestday, i,
                     temp.shape)
        df = pd.concat([df, temp], axis=0)
    fund_types_df = df[['wind_code',
                        'strategy_type']]  # , 'sec_name' 后续wss接口可以获得
    new_set = set(fund_types_df['wind_code'])
    target_set = new_set.difference(old_set)  # in new_set but not old_set
    fund_types_df.set_index('wind_code', inplace=True)
    # # 获取新成立基金各项基本面信息
    fund_info_df = get_fund_info_df_by_wind(list(target_set))
    fund_info_df = fund_types_df.join(fund_info_df, how='right')
    return save_fund_info(fund_info_df, get_df, mode)
예제 #4
0
def get_fund_info_df_by_wind(wind_code_list):
    """
    获取新成立基金各项基本面信息
    :param wind_code_list: 
    :return: 
    """
    rest = WindRest(WIND_REST_URL)  # 初始化服务器接口,用于下载万得数据
    code_count = len(wind_code_list)
    seg_count = 2500
    #    fund_info_df = None
    fund_info_df = pd.DataFrame()
    for n in range(math.ceil(float(code_count) / seg_count)):
        num_start = n * seg_count
        num_end = (n + 1) * seg_count
        num_end = num_end if num_end <= code_count else code_count
        if num_start <= code_count:
            codes = ','.join(wind_code_list[num_start:num_end])
            # 分段获取基金成立日期数据
            info2_df = rest.wss(
                codes,
                "sec_name,fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager"
            )
            logging.info('%05d ) [%d %d]' % (n, num_start, num_end))
            fund_info_df = fund_info_df.append(info2_df)
        else:
            break
    if fund_info_df.shape[0] > 0:
        # 整理数据插入 wind_fund_info 表
        # fund_info_df['FUND_SETUPDATE'] = pd.to_datetime(fund_info_df['FUND_SETUPDATE']).apply(lambda x: x.date())
        fund_info_df['FUND_SETUPDATE'] = fund_info_df['FUND_SETUPDATE'].apply(
            str_2_date)
        # fund_info_df['FUND_MATURITYDATE'] = pd.to_datetime(fund_info_df['FUND_MATURITYDATE']).apply(lambda x: x.date())
        fund_info_df['FUND_MATURITYDATE'] = fund_info_df[
            'FUND_MATURITYDATE'].apply(str_2_date)

    return fund_info_df
예제 #5
0
def save_factor_explore(date_start, date_end):
    logging.info('save_factor_explore', date_start, date_end)
    rest = WindRest(WIND_REST_URL)
    date_start = rest.tdaysoffset(0, date_start)  # 获取起始交易日
    date_end = rest.tdaysoffset(0, date_end)  # 获取结束交易日
    # 提取数据
    # 全部数据按照20天计算
    lagnum = 20
    date_start_getdata = rest.tdaysoffset(-1 * (lagnum - 1), date_start)
    # 首先,提取股票代码
    options_set = "date = %s; windcode = 881001.WI" % date_end
    stock_info_df = rest.wset("sectorconstituent", options_set)
    stock_code_s = stock_info_df['wind_code']
    stock_name_s = stock_info_df['sec_name']
    stock_count = len(stock_name_s)
    # 获取各个因子
    options_stock = "unit=1;currencyType=;PriceAdj=B"
    data_shortnames = "close,pe_ttm,mkt_cap_float,turn"
    logging.info('date between %s and %s', date_start_getdata, date_end)
    stock_num = 0
    logging.info('Need To update %d Stocks' % len(stock_code_s))
    for stock_code, stock_name in zip(stock_code_s, stock_name_s):
        stock_factor_df = rest.wsd(stock_code, data_shortnames,
                                   date_start_getdata, date_end, options_stock)
        stock_factor_df['fac_Inverse_Moment'] = stock_factor_df[
            'CLOSE'] / stock_factor_df['CLOSE'].shift(19)  # 19日涨跌判断
        stock_factor_df['fac_Mv'] = np.log10(stock_factor_df.MKT_CAP_FLOAT)
        stock_factor_df['fac_Pe'] = stock_factor_df['PE_TTM']
        stock_factor_df['fac_Turn'] = stock_factor_df['TURN'].rolling(
            lagnum).mean()
        stock_factor_df['fac_Vol'] = (
            stock_factor_df['CLOSE'] /
            stock_factor_df['CLOSE'].shift(1)).rolling(lagnum).std()
        stock_factor_df['Stock_Name'] = stock_name
        stock_factor_df['Stock_Code'] = stock_code
        stock_factor_df.dropna(how='any', inplace=True)
        factor_name_saved = [
            'Stock_Name', 'Stock_Code', 'fac_Inverse_Moment', 'fac_Mv',
            'fac_Pe', 'fac_Turn', 'fac_Vol'
        ]
        engine = get_db_engine()
        stock_factor_df[factor_name_saved].to_sql('Stock_FacExposure',
                                                  engine,
                                                  if_exists='append',
                                                  index_label=['Trade_Date'])
        stock_num += 1
        logging.info('Successful Input %s [%d / %d] stock' %
                     (stock_name, stock_num, stock_count))
예제 #6
0
# -*- coding: utf-8 -*-
"""
Created on 2017/4/14
@author: MG
"""
import logging
import math
from sqlalchemy.types import String, Date
from fh_tools.windy_utils_rest import WindRest
import pandas as pd
from datetime import datetime, date, timedelta
from config_fh import WIND_REST_URL, STR_FORMAT_DATE, get_db_engine, get_db_session

w = WindRest(WIND_REST_URL)


def get_stock_code_set(date_fetch):
    date_fetch_str = date_fetch.strftime(STR_FORMAT_DATE)
    stock_df = w.wset("sectorconstituent", "date=%s;sectorid=a001010100000000" % date_fetch_str)
    if stock_df is None:
        logging.warning('%s 获取股票代码失败', date_fetch_str)
        return None
    stock_count = stock_df.shape[0]
    logging.info('get %d stocks on %s', stock_count, date_fetch_str)
    return set(stock_df['wind_code'])


def import_wind_stock_info(refresh=False):

    # 获取全市场股票代码及名称
    if refresh:
예제 #7
0
def update_wind_fund_nav(get_df=False, wind_code_list=None):
    table_name = 'wind_fund_nav'
    rest = WindRest(WIND_REST_URL)  # 初始化数据下载端口
    # 初始化数据库engine
    engine = get_db_engine()
    # 链接数据库,并获取fundnav旧表
    with get_db_session(engine) as session:
        table = session.execute(
            'select wind_code, max(trade_date) from wind_fund_nav group by wind_code'
        )
        fund_trade_date_latest_in_nav_dic = dict(table.fetchall())
        wind_code_set_existed_in_nav = set(
            fund_trade_date_latest_in_nav_dic.keys())
    # 获取wind_fund_info表信息
    fund_info_df = pd.read_sql_query(
        """SELECT DISTINCT wind_code as wind_code,fund_setupdate,fund_maturitydate,trade_date_latest
from fund_info group by wind_code""", engine)
    trade_date_latest_dic = {
        wind_code: trade_date_latest
        for wind_code, trade_date_latest in zip(fund_info_df['wind_code'],
                                                fund_info_df['fund_setupdate'])
    }
    fund_info_df.set_index('wind_code', inplace=True)
    if wind_code_list is None:
        wind_code_list = list(fund_info_df.index)
    else:
        wind_code_list = list(set(wind_code_list) & set(fund_info_df.index))
    date_end = date.today() - timedelta(days=1)
    date_end_str = date_end.strftime(STR_FORMAT_DATE)
    fund_nav_all_df = []
    no_data_count = 0
    code_count = len(wind_code_list)
    # 对每个新获取的基金名称进行判断,若存在 fundnav 中,则只获取部分净值
    wind_code_trade_date_latest = {}
    try:
        for i, wind_code in enumerate(wind_code_list):
            # 设定数据获取的起始日期
            wind_code_trade_date_latest[wind_code] = date_end
            if wind_code in wind_code_set_existed_in_nav:
                trade_latest = fund_trade_date_latest_in_nav_dic[wind_code]
                if trade_latest >= date_end:
                    continue
                date_begin = trade_latest + timedelta(days=1)
            else:
                date_begin = trade_date_latest_dic[wind_code]
            if date_begin is None:
                continue
            elif isinstance(date_begin, str):
                date_begin = datetime.strptime(date_begin,
                                               STR_FORMAT_DATE).date()

            if isinstance(date_begin, date):
                if date_begin.year < 1900:
                    continue
                if date_begin > date_end:
                    continue
                date_begin = date_begin.strftime('%Y-%m-%d')
            else:
                continue

            # 尝试获取 fund_nav 数据
            for k in range(2):
                try:
                    fund_nav_tmp_df = rest.wsd(codes=wind_code,
                                               fields='nav,NAV_acc,NAV_date',
                                               begin_time=date_begin,
                                               end_time=date_end_str,
                                               options='Fill=Previous')
                    break
                except Exception as exp:
                    logger.error("%s Failed, ErrorMsg: %s" %
                                 (wind_code, str(exp)))
                    continue
            else:
                del wind_code_trade_date_latest[wind_code]
                fund_nav_tmp_df = None

            if fund_nav_tmp_df is None:
                logger.info('%s No data', wind_code)
                del wind_code_trade_date_latest[wind_code]
                no_data_count += 1
                logger.warning('%d funds no data', no_data_count)
            else:
                fund_nav_tmp_df.dropna(how='all', inplace=True)
                df_len = fund_nav_tmp_df.shape[0]
                if df_len == 0:
                    continue
                fund_nav_tmp_df['wind_code'] = wind_code
                # 此处删除 trade_date_latest 之后再加上,主要是为了避免因抛出异常而导致的该条数据也被记录更新
                del wind_code_trade_date_latest[wind_code]
                trade_date_latest = fund_nav_df_2_sql(table_name,
                                                      fund_nav_tmp_df,
                                                      engine,
                                                      is_append=True)
                if trade_date_latest is None:
                    logger.error('%s[%d] data insert failed', wind_code)
                else:
                    wind_code_trade_date_latest[wind_code] = trade_date_latest
                    logger.info('%d) %s updated, %d funds left', i, wind_code,
                                code_count - i)
                    if get_df:
                        fund_nav_all_df = fund_nav_all_df.append(
                            fund_nav_tmp_df)
    finally:
        import_wind_fund_nav_to_fund_nav()
        update_trade_date_latest(wind_code_trade_date_latest)
        try:
            update_fund_mgrcomp_info()
        except:
            # 新功能上线前由于数据库表不存在,可能导致更新失败,属于正常现象
            logger.exception('新功能上线前由于数据库表不存在,可能导致更新失败,属于正常现象')
    return fund_nav_all_df
예제 #8
0
# -*- coding: utf-8 -*-
"""
Created on Thu Apr  6 11:11:26 2017

@author: Yupeng Guo
"""

from fh_tools.windy_utils_rest import WindRest
from fh_tools.fh_utils import get_first, get_last
import pandas as pd
from datetime import date, timedelta
from sqlalchemy.types import String, Date
from config_fh import get_db_engine, WIND_REST_URL, get_db_session
import logging

rest = WindRest(WIND_REST_URL)  # 初始化服务器接口,用于下载万得数据


def import_wind_index_daily_first(table_name, wind_codes):
    # wind_codes 可以是字符串,也可以是字符串的集合, table_name是数据库的表名
    engine = get_db_engine()
    yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d')
    info = rest.wss(wind_codes, "basedate,sec_name")
    for code in info.index:
        begin_date = info.loc[code, 'BASEDATE'].strftime('%Y-%m-%d')
        index_name = info.loc[code, 'SEC_NAME']
        temp = rest.wsd(code, "open,high,low,close,volume,amt", begin_date,
                        yestday)
        temp.reset_index(inplace=True)
        temp.rename(columns={'index': 'trade_date'}, inplace=True)
        temp.trade_date = pd.to_datetime(temp.trade_date)
예제 #9
0
def update_wind_fund_info(get_df=False, mode='replace_insert'):
    table_name = 'wind_fund_info'
    rest = WindRest(WIND_REST_URL)  # 初始化服务器接口,用于下载万得数据
    engine = get_db_engine()
    # 初始化数据库,并获取旧表信息
    old = pd.read_sql_query('select wind_code from %s' % table_name, engine)
    old_set = set(old['wind_code'])
    # 从万得获取最新基金列表
    types = {u'股票多头策略': 1000023122000000,
             u'股票多空策略': 1000023123000000,
             u'其他股票策略': 1000023124000000,
             u'阿尔法策略': 1000023125000000,
             u'其他市场中性策略': 1000023126000000,
             u'事件驱动策略': 1000023113000000,
             u'债券策略': 1000023114000000,
             u'套利策略': 1000023115000000,
             u'宏观策略': 1000023116000000,
             u'管理期货策略': 1000023117000000,
             u'组合基金策略': 1000023118000000,
             u'货币市场策略': 1000023119000000,
             u'多策略': 100002312000000,
             u'其他策略': 1000023121000000}
    df = pd.DataFrame()
    yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d')
    for i in types.keys():
        temp = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (yestday, str(types[i])))
        temp['strategy_type'] = i
        logging.info('%s sectorconstituent %s df.shape:%s', yestday, i, temp.shape)
        df = pd.concat([df, temp], axis=0)
    fund_types_df = df[['wind_code', 'sec_name', 'strategy_type']]
    new_set = set(fund_types_df['wind_code'])
    target_set = new_set.difference(old_set)  # in new_set but not old_set

    fund_types_df.set_index('wind_code', inplace=True)

    # 获取新成立基金各项基本面信息
    code_list = list(target_set)
    code_count = len(code_list)
    seg_count = 2500
    #    info_df = None
    info_df = pd.DataFrame()
    for n in range(math.ceil(float(code_count) / seg_count)):
        num_start = n * seg_count
        num_end = (n + 1) * seg_count
        num_end = num_end if num_end <= code_count else code_count
        if num_start <= code_count:
            codes = ','.join(code_list[num_start:num_end])
            # 分段获取基金成立日期数据
            info2_df = rest.wss(codes, "fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager")
            logging.info('%05d ) [%d %d]' % (n, num_start, num_end))
            info_df = info_df.append(info2_df)
        else:
            break

    # 没有新增基金,直接退出
    if info_df.shape[0] == 0:
        return

    # 整理数据插入 wind_fund_info 表
    # info_df['FUND_SETUPDATE'] = pd.to_datetime(info_df['FUND_SETUPDATE']).apply(lambda x: x.date())
    info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply(str_2_date)
    # info_df['FUND_MATURITYDATE'] = pd.to_datetime(info_df['FUND_MATURITYDATE']).apply(lambda x: x.date())
    info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply(str_2_date)
    info_df = fund_types_df.join(info_df, how='right')

    info_df.rename(columns={'FUND_SETUPDATE': 'fund_setupdate',
                            'FUND_MATURITYDATE': 'fund_maturitydate',
                            'FUND_MGRCOMP': 'fund_mgrcomp',
                            'FUND_EXISTINGYEAR': 'fund_existingyear',
                            'FUND_PTMYEAR': 'fund_ptmyear',
                            'FUND_TYPE': 'fund_type',
                            'FUND_FUNDMANAGER': 'fund_fundmanager'
                            }, inplace=True)

    info_df.index.names = ['wind_code']
    info_df.drop_duplicates(inplace=True)
    if mode == 'append':
        info_df.to_sql(table_name, engine, if_exists='append',
                       dtype={
                           'wind_code': String(200),
                           'sec_name': String(200),
                           'strategy_type': String(200),
                           'fund_setupdate': Date,
                           'fund_maturitydate': Date
                       })
    elif mode == 'replace_insert':
        info_df.reset_index(inplace=True)
        data_list = list(info_df.T.to_dict().values())
        sql_str = """REPLACE INTO wind_fund_info
(wind_code,sec_name,strategy_type,fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager)
VALUES
(:wind_code,:sec_name,:strategy_type,:fund_setupdate,:fund_maturitydate,:fund_mgrcomp,:fund_existingyear,:fund_ptmyear,:fund_type,:fund_fundmanager);
"""
        with get_db_session() as session:
            session.execute(sql_str, data_list)
    else:
        raise ValueError('mode="%s" is not available' % mode)
    logging.info('%d funds inserted' % info_df.shape[0])

    # wind_fund_info 表中增量数据插入到 fund_info
    sql_str = """insert into fund_info(wind_code, sec_name, strategy_type, fund_setupdate, fund_maturitydate, fund_mgrcomp, 
fund_existingyear, fund_ptmyear, fund_type, fund_fundmanager)
select wfi.wind_code, wfi.sec_name, wfi.strategy_type, wfi.fund_setupdate, wfi.fund_maturitydate, wfi.fund_mgrcomp, 
wfi.fund_existingyear, wfi.fund_ptmyear, wfi.fund_type, wfi.fund_fundmanager
from wind_fund_info wfi left outer join fund_info fi on wfi.wind_code=fi.wind_code
where fi.wind_code is null"""
    with get_db_session(engine) as session:
        table = session.execute(sql_str)
    logging.info('new data was inserted into fund_info from wind_fund_info table')
    if get_df:
        return info_df
예제 #10
0
def wind_fund_info_import(table_name, get_df=False):
    rest = WindRest(WIND_REST_URL)  # 初始化服务器接口,用于下载万得数据
    types = {
        u'股票多头策略': 1000023122000000,
        u'股票多空策略': 1000023123000000,
        u'其他股票策略': 1000023124000000,
        u'阿尔法策略': 1000023125000000,
        u'其他市场中性策略': 1000023126000000,
        u'事件驱动策略': 1000023113000000,
        u'债券策略': 1000023114000000,
        u'套利策略': 1000023115000000,
        u'宏观策略': 1000023116000000,
        u'管理期货': 1000023117000000,
        u'组合基金策略': 1000023118000000,
        u'货币市场策略': 1000023119000000,
        u'多策略': 100002312000000,
        u'其他策略': 1000023121000000
    }
    df = pd.DataFrame()
    today = date.today().strftime('%Y-%m-%d')
    for i in types.keys():
        temp = rest.wset("sectorconstituent",
                         "date=%s;sectorid=%s" % (today, str(types[i])))
        temp['strategy_type'] = i
        df = pd.concat([df, temp], axis=0)

    # 插入数据库
    # 初始化数据库engine
    engine = get_db_engine()

    # 整理数据
    fund_types_df = df[['wind_code', 'sec_name', 'strategy_type']]
    fund_types_df.set_index('wind_code', inplace=True)

    # 获取基金基本面信息
    code_list = list(fund_types_df.index)  # df['wind_code']
    code_count = len(code_list)
    seg_count = 5000
    info_df = pd.DataFrame()
    for n in range(int(code_count / seg_count) + 1):
        num_start = n * seg_count
        num_end = (n + 1) * seg_count
        num_end = num_end if num_end <= code_count else code_count
        if num_start <= code_count:
            codes = ','.join(code_list[num_start:num_end])
            # 分段获取基金成立日期数据
            info2_df = rest.wss(
                codes, "fund_setupdate,fund_maturitydate,fund_mgrcomp,\
                                  fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager"
            )
            logging.info('%05d ) [%d %d]' % (n, num_start, num_end))
            info_df = info_df.append(info2_df)
        else:
            break
            # 整理数据插入数据库
    info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply(
        lambda x: x.date())
    info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply(
        lambda x: x.date())
    info_df = fund_types_df.join(info_df, how='right')

    info_df.rename(columns={
        'FUND_SETUPDATE': 'fund_setupdate',
        'FUND_MATURITYDATE': 'fund_maturitydate',
        'FUND_MGRCOMP': 'fund_mgrcomp',
        'FUND_EXISTINGYEAR': 'fund_existingyear',
        'FUND_PTMYEAR': 'fund_ptmyear',
        'FUND_TYPE': 'fund_type',
        'FUND_FUNDMANAGER': 'fund_fundmanager'
    },
                   inplace=True)

    info_df.index.names = ['wind_code']
    info_df.drop_duplicates(inplace=True)
    info_df.to_sql(table_name,
                   engine,
                   if_exists='append',
                   dtype={
                       'wind_code': String(200),
                       'sec_name': String(200),
                       'strategy_type': String(200),
                       'fund_setupdate': Date,
                       'fund_maturitydate': Date
                   })
    logging.info('%d funds inserted' % len(info_df))
    if get_df:
        return info_df
예제 #11
0
def import_wind_future_info():

    # 获取已存在合约列表
    sql_str = 'select wind_code, ipo_date from wind_future_info'
    engine = get_db_engine()
    with get_db_session(engine) as session:
        table = session.execute(sql_str)
        wind_code_ipo_date_dic = dict(table.fetchall())

    # 通过wind获取合约列表
    # w.start()
    rest = WindRest(WIND_REST_URL)  # 初始化服务器接口,用于下载万得数据
    future_sectorid_dic_list = [
        {
            'subject_name': 'CFE 沪深300',
            'regex': r"IF\d{4}\.CFE",
            'sectorid': 'a599010102000000',
            'date_establish': '2010-4-16'
        },
        {
            'subject_name': 'CFE 上证50',
            'regex': r"IH\d{4}\.CFE",
            'sectorid': '1000014871000000',
            'date_establish': '2015-4-16'
        },
        {
            'subject_name': 'CFE 中证500',
            'regex': r"IC\d{4}\.CFE",
            'sectorid': '1000014872000000',
            'date_establish': '2015-4-16'
        },
        {
            'subject_name': 'SHFE 黄金',
            'regex': r"AU\d{4}\.SHF",
            'sectorid': 'a599010205000000',
            'date_establish': '2008-01-09'
        },
        {
            'subject_name': 'SHFE 沪银',
            'regex': r"AG\d{4}\.SHF",
            'sectorid': '1000006502000000',
            'date_establish': '2012-05-10'
        },
        {
            'subject_name': 'SHFE 螺纹钢',
            'regex': r"RB\d{4}\.SHF",
            'sectorid': 'a599010206000000',
            'date_establish': '2009-03-27'
        },
        {
            'subject_name': 'SHFE 热卷',
            'regex': r"HC\d{4}\.SHF",
            'sectorid': '1000011455000000',
            'date_establish': '2014-03-21'
        },
        {
            'subject_name': 'DCE 焦炭',
            'regex': r"J\d{4}\.SHF",
            'sectorid': '1000002976000000',
            'date_establish': '2011-04-15'
        },
        {
            'subject_name': 'DCE 焦煤',
            'regex': r"JM\d{4}\.SHF",
            'sectorid': '1000009338000000',
            'date_establish': '2013-03-22'
        },
        {
            'subject_name': '铁矿石',
            'regex': r"I\d{4}\.SHF",
            'sectorid': '1000006502000000',
            'date_establish': '2013-10-18'
        },
    ]
    wind_code_set = set()
    ndays_per_update = 60
    # 获取历史期货合约列表信息
    for future_sectorid_dic in future_sectorid_dic_list:
        subject_name = future_sectorid_dic['subject_name']
        sector_id = future_sectorid_dic['sectorid']
        regex_str = future_sectorid_dic['regex']
        date_establish = datetime.strptime(
            future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date()
        date_since = get_date_since(wind_code_ipo_date_dic, regex_str,
                                    date_establish)
        date_yestoday = date.today() - timedelta(days=1)
        while date_since <= date_yestoday:
            date_since_str = date_since.strftime(STR_FORMAT_DATE)
            # w.wset("sectorconstituent","date=2017-05-02;sectorid=a599010205000000")
            # future_info_df = wset_cache(w, "sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id))
            future_info_df = rest.wset(
                "sectorconstituent",
                "date=%s;sectorid=%s" % (date_since_str, sector_id))
            wind_code_set |= set(future_info_df['wind_code'])
            # future_info_df = future_info_df[['wind_code', 'sec_name']]
            # future_info_dic_list = future_info_df.to_dict(orient='records')
            # for future_info_dic in future_info_dic_list:
            #     wind_code = future_info_dic['wind_code']
            #     if wind_code not in wind_code_future_info_dic:
            #         wind_code_future_info_dic[wind_code] = future_info_dic
            if date_since >= date_yestoday:
                break
            else:
                date_since += timedelta(days=ndays_per_update)
                if date_since > date_yestoday:
                    date_since = date_yestoday

    # 获取合约列表
    wind_code_list = [
        wc for wc in wind_code_set if wc not in wind_code_ipo_date_dic
    ]
    # 获取合约基本信息
    # w.wss("AU1706.SHF,AG1612.SHF,AU0806.SHF", "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code")
    # future_info_df = wss_cache(w, wind_code_list,
    #                            "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code")
    if len(wind_code_list) > 0:
        future_info_df = rest.wss(
            wind_code_list,
            "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code"
        )

        future_info_df['MFPRICE'] = future_info_df['MFPRICE'].apply(
            mfprice_2_num)
        future_info_count = future_info_df.shape[0]

        future_info_df.rename(
            columns={c: str.lower(c)
                     for c in future_info_df.columns},
            inplace=True)
        future_info_df.index.rename('wind_code', inplace=True)
        future_info_df.to_sql('wind_future_info',
                              engine,
                              if_exists='append',
                              dtype={
                                  'wind_code': String(20),
                                  'trade_code': String(20),
                                  'sec_name': String(50),
                                  'sec_englishname': String(50),
                                  'exch_eng': String(50),
                                  'ipo_date': Date,
                                  'lasttrade_date': Date,
                                  'lastdelivery_date': Date,
                                  'dlmonth': String(20),
                                  'lprice': Float,
                                  'sccode': String(20),
                                  'margin': Float,
                                  'punit': String(20),
                                  'changelt': Float,
                                  'mfprice': Float,
                                  'contractmultiplier': Float,
                                  'ftmargins': String(100),
                              })
        print('%d data import' % future_info_count)
예제 #12
0
def import_stock_daily():
    w = WindRest(WIND_REST_URL)
    engine = get_db_engine()
    with get_db_session(engine) as session:
        # 获取每只股票最新交易日数据
        sql_str = 'select wind_code, max(Trade_date) from wind_stock_daily group by wind_code'
        table = session.execute(sql_str)
        stock_trade_date_latest_dic = dict(table.fetchall())
        # 获取市场有效交易日数据
        sql_str = "select trade_date from wind_trade_date where trade_date > '2005-1-1'"
        table = session.execute(sql_str)
        trade_date_sorted_list = [t[0] for t in table.fetchall()]
        trade_date_sorted_list.sort()
        # 获取每只股票上市日期、退市日期
        table = session.execute('SELECT wind_code, ipo_date, delist_date FROM wind_stock_info')
        stock_date_dic = {wind_code: (ipo_date, delist_date if delist_date is None or delist_date > UN_AVAILABLE_DATE else None) for
                          wind_code, ipo_date, delist_date in table.fetchall()}
    today_t_1 = date.today() - ONE_DAY
    data_df_list = []

    try:
        for wind_code, date_pair in stock_date_dic.items():
            date_ipo, date_delist = date_pair
            # 获取 date_from
            if wind_code in stock_trade_date_latest_dic:
                date_latest_t1 = stock_trade_date_latest_dic[wind_code] + ONE_DAY
                date_from = max([date_latest_t1, DATE_BASE, date_ipo])
            else:
                date_from = max([DATE_BASE, date_ipo])
            date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from)
            # 获取 date_to
            if date_delist is None:
                date_to = today_t_1
            else:
                date_to = min([date_delist, today_t_1])
            date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to)
            if date_from is None or date_to is None or date_from > date_to:
                continue
            # 获取股票量价等行情数据
            wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \
                                "swing,turn,free_turn,trade_status,susp_days"
            data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to)
            if data_df is None:
                logging.warning('%s has no data during %s %s', wind_code, date_from, date_to)
                continue
            logging.info('%d data of %s', data_df.shape[0], wind_code)
            data_df['wind_code'] = wind_code
            data_df_list.append(data_df)
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_df_all.index.rename('trade_date', inplace=True)
            data_df_all.reset_index(inplace=True)
            data_df_all.set_index(['wind_code', 'trade_date'], inplace=True)
            data_df_all.to_sql('wind_stock_daily', engine, if_exists='append',
                               dtype={
                                   'wind_code': String(20),
                                   'trade_date': Date,
                                   'open': Float,
                                   'high': Float,
                                   'low': Float,
                                   'close': Float,
                                   'adjfactor': Float,
                                   'volume': Float,
                                   'amt': Float,
                                   'pct_chg': Float,
                                   'maxupordown': Integer,
                                   'swing': Float,
                                   'turn': Float,
                                   'free_turn': Float,
                                   'trade_status': String(20),
                                   'susp_days': Integer,
                               }
                               )
            logging.info('%d data imported', data_df_all.shape[0])
예제 #13
0
def fill_history():
    """补充历史股票日线数据"""
    # 获取每只股票ipo 日期 及 最小的交易日前一天
    sql_str = """select si.wind_code, ipo_date, td_to
from wind_stock_info si,
(select wind_code, max(trade_date) td_max, date_sub(min(trade_date), interval 1 day) td_to from wind_stock_daily group by wind_code) sd
where si.wind_code = sd.wind_code
and ipo_date < td_to"""
    w = WindRest(WIND_REST_URL)
    engine = get_db_engine()
    with get_db_session(engine) as session:
        table = session.execute(sql_str)
        stock_ipo_trade_date_min_dic = {
            content[0]: (content[1], content[2])
            for content in table.fetchall()
        }
    data_df_list = []
    try:
        for wind_code, date_pair in stock_ipo_trade_date_min_dic.items():
            date_from, date_to = date_pair
            # 获取股票量价等行情数据
            wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \
                                "swing,turn,free_turn,trade_status,susp_days,total_shares,free_float_shares"
            data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to)
            if data_df is None:
                logger.warning('%s has no data during %s %s', wind_code,
                               date_from, date_to)
                continue
            logger.info('%d data of %s between %s and %s', data_df.shape[0],
                        wind_code, date_from, date_to)
            data_df['wind_code'] = wind_code
            data_df_list.append(data_df)
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_df_all.index.rename('trade_date', inplace=True)
            data_df_all.reset_index(inplace=True)
            data_df_all.set_index(['wind_code', 'trade_date'], inplace=True)
            data_df_all.to_sql('wind_stock_daily',
                               engine,
                               if_exists='append',
                               dtype={
                                   'wind_code': String(20),
                                   'trade_date': Date,
                                   'open': Float,
                                   'high': Float,
                                   'low': Float,
                                   'close': Float,
                                   'adjfactor': Float,
                                   'volume': Float,
                                   'amt': Float,
                                   'pct_chg': Float,
                                   'maxupordown': Integer,
                                   'swing': Float,
                                   'turn': Float,
                                   'free_turn': Float,
                                   'trade_status': String(20),
                                   'susp_days': Integer,
                                   'total_shares': Float,
                                   'free_float_shares': Float,
                               })
            logger.info('%d data imported', data_df_all.shape[0])
예제 #14
0
def fill_col():
    """补充历史col数据"""
    col_name = 'ev2_to_ebitda'
    # 获取每只股票ipo 日期 及 最小的交易日前一天
    #     sql_str = """select si.wind_code, td_from, td_to
    # from wind_stock_info si,
    # (select wind_code, min(trade_date) td_from, max(trade_date) td_to from wind_stock_daily where ev2_to_ebitda is null group by wind_code) sd
    # where si.wind_code = sd.wind_code"""
    sql_str = """select wind_code, if(min_trade_date<'1998-12-31','1998-12-31',min_trade_date) date_from , if(min_date_ev2_to_ebitda<max_trade_date, min_date_ev2_to_ebitda,max_trade_date) date_to, min_date_ev2_to_ebitda, min_trade_date, max_trade_date FROM
(
select wind_code, min(IF(ev2_to_ebitda is null, '2018-01-01', trade_date)) min_date_ev2_to_ebitda, min(trade_date) min_trade_date, max(trade_date) max_trade_date 
from wind_stock_daily group by wind_code
HAVING min_date_ev2_to_ebitda>'1998-12-31'
) aaa
where if(min_trade_date<'1998-12-31','1998-12-31',min_trade_date) < if(min_date_ev2_to_ebitda<max_trade_date, min_date_ev2_to_ebitda,max_trade_date)"""
    w = WindRest(WIND_REST_URL)
    engine = get_db_engine()
    with get_db_session(engine) as session:
        table = session.execute(sql_str)
        stock_trade_date_range_dic = {
            content[0]: (content[1], content[2])
            for content in table.fetchall()
        }
    data_df_list = []
    try:
        for n, (wind_code,
                (date_from,
                 date_to)) in enumerate(stock_trade_date_range_dic.items()):
            # 获取股票量价等行情数据
            wind_indictor_str = col_name
            data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to)
            if data_df is None:
                logger.warning('%d) %s has no data during %s %s', n, wind_code,
                               date_from, date_to)
                continue
            logger.info('%d) %d data of %s between %s and %s', n,
                        data_df.shape[0], wind_code, date_from, date_to)
            data_df['wind_code'] = wind_code
            data_df_list.append(data_df)
            # if n > 5:
            #     break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_df_all.index.rename('trade_date', inplace=True)
            data_df_all.reset_index(inplace=True)
            data_df_all.rename(columns={col_name.upper(): col_name},
                               inplace=True)
            data_df_all.dropna(inplace=True)
            data_dic_list = data_df_all.to_dict(orient='records')
            sql_str = "update wind_stock_daily set %s=:%s where wind_code=:wind_code and trade_date=:trade_date" % (
                col_name, col_name)
            with get_db_session(engine) as session:
                table = session.execute(sql_str, params=data_dic_list)
            # data_df_all.set_index(['wind_code', 'trade_date'], inplace=True)
            # data_df_all.to_sql('wind_stock_daily', engine, if_exists='append',
            #                    dtype={
            #                        'wind_code': String(20),
            #                        'trade_date': Date,
            #                        'open': Float,
            #                        'high': Float,
            #                        'low': Float,
            #                        'close': Float,
            #                        'adjfactor': Float,
            #                        'volume': Float,
            #                        'amt': Float,
            #                        'pct_chg': Float,
            #                        'maxupordown': Integer,
            #                        'swing': Float,
            #                        'turn': Float,
            #                        'free_turn': Float,
            #                        'trade_status': String(20),
            #                        'susp_days': Integer,
            #                        'total_shares': Float,
            #                        'free_float_shares': Float,
            #                    }
            #                    )
            logger.info('%d data imported', data_df_all.shape[0])
        else:
            logger.warning('no data for update')
예제 #15
0
def import_wind_bonus():
    w = WindRest(WIND_REST_URL)
    engine = get_db_engine()
    with get_db_session(engine) as session:
        table = session.execute(
            'SELECT wind_code, ipo_date, delist_date FROM wind_stock_info')
        stock_date_dic = {
            wind_code:
            (ipo_date,
             delist_date if delist_date > UN_AVAILABLE_DATE else None)
            for wind_code, ipo_date, delist_date in table.fetchall()
        }
    print(len(stock_date_dic))
    DATE_LIST = [
        datetime.strptime('2010-12-31', STR_FORMAT_DATE).date(),
        datetime.strptime('2011-12-31', STR_FORMAT_DATE).date(),
        datetime.strptime('2012-12-31', STR_FORMAT_DATE).date(),
        datetime.strptime('2013-12-31', STR_FORMAT_DATE).date(),
        datetime.strptime('2014-12-31', STR_FORMAT_DATE).date(),
        datetime.strptime('2015-12-31', STR_FORMAT_DATE).date(),
    ]
    dic_exdate_df_list = []
    for rep_date in DATE_LIST:
        rep_date_str = rep_date.strftime('%Y%m%d')
        stock_list = [
            s for s, date_pair in stock_date_dic.items()
            if date_pair[0] < rep_date and (
                rep_date < date_pair[1] if date_pair[1] is not None else True)
        ]
        dic_exdate_df = w.wss(
            stock_list,
            "div_cashbeforetax2,div_cashaftertax2,div_stock2,div_capitalization2,div_capitalization,div_stock,div_cashaftertax,div_cashbeforetax,div_cashandstock,div_recorddate,div_exdate,div_paydate,div_trddateshareb,div_preDisclosureDate,div_prelandate,div_smtgdate,div_impdate",
            "rptDate=%s;currencyType=BB" % rep_date_str)
        dic_exdate_df_list.append(dic_exdate_df)

    dic_exdate_df_all = pd.concat(dic_exdate_df_list)
    dic_exdate_df_all.index.rename('wind_code', inplace=True)
    dic_exdate_df_all.drop_duplicates(inplace=True)
    dic_exdate_df_all['DIV_EXDATE'] = dic_exdate_df_all['DIV_EXDATE'].apply(
        str_date)
    dic_exdate_df_all['DIV_PAYDATE'] = dic_exdate_df_all['DIV_PAYDATE'].apply(
        str_date)
    dic_exdate_df_all['DIV_IMPDATE'] = dic_exdate_df_all['DIV_IMPDATE'].apply(
        str_date)
    dic_exdate_df_all['DIV_RECORDDATE'] = dic_exdate_df_all[
        'DIV_RECORDDATE'].apply(str_date)
    dic_exdate_df_all['DIV_PREDISCLOSUREDATE'] = dic_exdate_df_all[
        'DIV_PREDISCLOSUREDATE'].apply(str_date)
    dic_exdate_df_all['DIV_PRELANDATE'] = dic_exdate_df_all[
        'DIV_PRELANDATE'].apply(str_date)
    dic_exdate_df_all['DIV_SMTGDATE'] = dic_exdate_df_all[
        'DIV_SMTGDATE'].apply(str_date)
    dic_exdate_df_all['DIV_TRDDATESHAREB'] = dic_exdate_df_all[
        'DIV_TRDDATESHAREB'].apply(str_date)

    condition = ~(
        dic_exdate_df_all['DIV_EXDATE'].apply(lambda x: x is None)
        & dic_exdate_df_all['DIV_PAYDATE'].apply(lambda x: x is None)
        & dic_exdate_df_all['DIV_IMPDATE'].apply(lambda x: x is None)
        & dic_exdate_df_all['DIV_RECORDDATE'].apply(lambda x: x is None))
    dic_exdate_df_available = dic_exdate_df_all[condition]
    dic_exdate_df_available.to_sql('wind_stock_bonus',
                                   engine,
                                   if_exists='append',
                                   dtype={
                                       'wind_code': String(20),
                                       'div_cashbeforetax2': FLOAT,
                                       'div_cashaftertax2': FLOAT,
                                       'div_stock2': FLOAT,
                                       'div_capitalization2': FLOAT,
                                       'div_capitalization': FLOAT,
                                       'div_stock': FLOAT,
                                       'div_cashaftertax': FLOAT,
                                       'div_cashbeforetax': FLOAT,
                                       'div_cashandstock': FLOAT,
                                       'div_recorddate': Date,
                                       'div_exdate': Date,
                                       'div_paydate': Date,
                                       'div_trddateshareb': Date,
                                       'div_preDisclosureDate': Date,
                                       'div_prelandate': Date,
                                       'div_smtgdate': Date,
                                       'div_impdate': Date
                                   })