Example #1
0
def import_pub_fund_daily(wind_code_set, chain_param=None):
    """
    导入公募基金日线数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return: 
    """
    logging.info("更新 wind_pub_fund_daily 开始")
    table_name = "wind_pub_fund_daily"
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
        SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to
            FROM
            (
            SELECT info.wind_code, ifnull(nav_date, fund_setupdate) date_frm, fund_maturitydate,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                wind_pub_fund_info info 
            LEFT OUTER JOIN
                (SELECT wind_code, adddate(max(nav_date),1) nav_date FROM {table_name} GROUP BY wind_code) daily
            ON info.wind_code = daily.wind_code
            ) tt
            WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) 
            ORDER BY wind_code;""".format(table_name=table_name)
    else:
        logger.warning(
            'wind_pub_fund_daily 不存在,仅使用 wind_pub_fund_info 表进行计算日期范围')
        sql_str = """
            SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to
            FROM
              (
                SELECT info.wind_code, fund_setupdate date_frm, fund_maturitydate,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_pub_fund_info info 
              ) tt
            WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) 
            ORDER BY wind_code
        """

    # with with_db_session(engine_md) as session:
    #     # 获取每只股票最新交易日数据
    #     sql_str = 'select wind_code, max(nav_date) from wind_pub_fund_daily group by wind_code'
    #     table = session.execute(sql_str)
    #     trade_date_latest_dic = dict(table.fetchall())
    #     # 获取市场有效交易日数据
    #     sql_str = "select trade_date from wind_trade_date where trade_date > '1997-1-1'"
    #     table = session.execute(sql_str)
    #     trade_date_sorted_list = [t[0] for t in table.fetchall()]
    #     trade_date_sorted_list.sort()
    #     # 获取每只股票上市日期、退市日期
    #     table = session.execute('SELECT wind_code, setup_date, maturity_date FROM wind_pub_fund_info')
    #
    #
    #     wind_code_date_dic = {
    #     wind_code: (setup_date, maturity_date if maturity_date is None or maturity_date > UN_AVAILABLE_DATE else None)
    #     for
    #     wind_code, setup_date, maturity_date in table.fetchall()}
    # date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today()
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        trade_date_latest_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    data_df_list = []
    wind_code_date_count = len(trade_date_latest_dic)

    logger.info('%d pub fund will been import into wind_pub_fund_daily',
                wind_code_date_count)
    # 获取股票量价等行情数据
    field_col_name_list = [
        ('NAV_date', Date),
        ('NAV_acc', String(20)),
        ('netasset_total', String(20)),
    ]
    wind_indictor_str = ",".join(key.lower() for key, _ in field_col_name_list)
    upper_col_2_name_dic = {
        name.upper(): name.lower()
        for name, _ in field_col_name_list
    }
    dtype = {key.lower(): val for key, val in field_col_name_list}
    dtype['wind_code'] = String(20)
    try:
        data_tot = 0
        for data_num, (wind_code,
                       (date_from,
                        date_to)) in enumerate(trade_date_latest_dic.items()):
            # 初次加载阶段全量载入,以后 ipo_date为空的情况,直接warning跳过
            # if setup_date is None:
            #     # date_ipo = DATE_BASE
            #     logging.warning("%d/%d) %s 缺少 ipo date", data_num, wind_code_date_count, wind_code)
            #     continue
            # # 获取 date_from
            # if wind_code in trade_date_latest_dic:
            #     date_latest_t1 = trade_date_latest_dic[wind_code] + ONE_DAY
            #     date_from = max([date_latest_t1, DATE_BASE, setup_date])
            # else:
            #     date_from = max([DATE_BASE, setup_date])
            # date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from)
            # # 获取 date_to
            # if maturity_date is None:
            #     date_to = date_ending
            # else:
            #     date_to = min([maturity_date, date_ending])
            # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to)
            # if date_from is None or date_to is None or date_from > date_to:
            #     continue
            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                      date_to, "unit=1;Days=Weekdays")
            except APIError as exp:
                logger.exception("%d/%d) %s 执行异常", data_num,
                                 wind_code_date_count, wind_code)
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break

            if data_df is None:
                logger.warning('%d/%d) %s has no ohlc data during %s %s',
                               data_num, wind_code_date_count, wind_code,
                               date_from, date_to)
                continue
            # 对数据进行 清理,整理,整合
            data_df = data_df.drop_duplicates().dropna()
            data_df.rename(columns=upper_col_2_name_dic, inplace=True)
            logger.info('%d/%d) %d data of %s between %s and %s', data_num,
                        wind_code_date_count, data_df.shape[0], wind_code,
                        date_from, date_to)
            data_df['wind_code'] = wind_code
            data_tot += data_df.shape[0]
            data_df_list.append(data_df)
            if data_tot > 10000:
                bunch_insert_on_duplicate_update(data_df,
                                                 table_name,
                                                 engine_md,
                                                 dtype=dtype)
                data_df_list = []

                data_tot = 0
            # 仅仅调试时使用
            # if DEBUG and len(data_df) > 2000:
            #     break

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            bunch_insert_on_duplicate_update(data_df,
                                             table_name,
                                             engine_md,
                                             dtype=dtype)
            logging.info("更新 wind_pub_fund_daily 结束 %d 条信息被更新",
                         len(data_df_list))
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Example #2
0
def merge_stock_daily(date_from=None):
    """
    合并 wind,ifind 数据到对应名称的表中
    :param date_from:
    :return:
    """
    table_name = 'stock_daily'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(trade_date),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name)
    ifind_table_his_name = 'ifind_{table_name}_his'.format(
        table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    if date_from is None:
        ifind_his_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_ds_name)
        ifind_ds_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_his_name)
        wind_sql_str = "select * from {table_name}".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md)  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md)  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str,
                              engine_md)  # , index_col='wind_code'
    else:
        ifind_his_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_ds_name)
        ifind_ds_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_his_name)
        wind_sql_str = "select * from {table_name} where trade_date >= %s".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md,
                                   params=[date_from
                                           ])  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md,
                                  params=[date_from])  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str, engine_md,
                              params=[date_from])  # , index_col='wind_code'

    ifind_df = pd.merge(ifind_his_df,
                        ifind_ds_df,
                        how='outer',
                        on=['ths_code', 'time'])
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on=['ths_code', 'time'],
                         right_on=['wind_code', 'trade_date'],
                         indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'trade_date': (Date, prefer_left, {
            'left_key': 'time',
            'right_key': 'trade_date'
        }),
        'open': (DOUBLE, mean_value, {
            'left_key': 'open_x',
            'right_key': 'open_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'high': (DOUBLE, mean_value, {
            'left_key': 'high_x',
            'right_key': 'high_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'low': (DOUBLE, mean_value, {
            'left_key': 'low_x',
            'right_key': 'low_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        # TODO: 原因不详,wind接口取到的部分 close 数据不准确
        'close': (DOUBLE, prefer_left, {
            'left_key': 'close_x',
            'right_key': 'close_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'volume': (DOUBLE, mean_value, {
            'left_key': 'volume_x',
            'right_key': 'volume_y',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        'amount': (DOUBLE, mean_value, {
            'left_key': 'amount',
            'right_key': 'amt',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        # 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准
        # 因此出现冲突时应该以 wind 为准
        'total_shares': (DOUBLE, prefer_right, {
            'left_key': 'totalShares',
            'right_key': 'total_shares'
        }),
        # 'susp_days': (Integer, '***', {
        #     'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status',
        #  'primary_keys': ('ths_code', 'time')}),
        'max_up_or_down': (Integer, max_up_or_down, {
            'ths_key': 'ths_up_and_down_status_stock',
            'wind_key': 'maxupordown',
            'primary_keys': ('ths_code', 'time')
        }),
        'total_capital': (DOUBLE, get_value, {
            'key': 'totalCapital'
        }),
        'float_capital': (DOUBLE, get_value, {
            'key': 'floatCapitalOfAShares'
        }),
        'pct_chg': (DOUBLE, mean_value, {
            'left_key': 'changeRatio',
            'right_key': 'pct_chg',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'float_a_shares': (DOUBLE, get_value, {
            'key': 'floatSharesOfAShares'
        }),  # 对应wind float_a_shares
        'free_float_shares': (DOUBLE, get_value, {
            'key': 'free_float_shares'
        }),  # 对应 ths ths_free_float_shares_stock
        # PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准
        # 因此应该如有不同应该以 wind 为准
        'pe_ttm': (DOUBLE, prefer_right, {
            'left_key': 'ths_pe_ttm_stock',
            'right_key': 'pe_ttm',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'pe': (DOUBLE, get_value, {
            'key': 'pe'
        }),
        'pb': (DOUBLE, get_value, {
            'key': 'pb'
        }),
        'ps': (DOUBLE, get_value, {
            'key': 'ps'
        }),
        'pcf': (DOUBLE, get_value, {
            'key': 'pcf'
        }),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df
Example #3
0
def merge_ifind_stock_daily(ths_code_set: set = None, date_from=None):
    """将ds his 以及财务数据合并为 daily 数据"""
    table_name = 'ifind_stock_daily'
    logging.info("合成 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(`time`),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    # 獲取各個表格數據
    ifind_his_df = get_ifind_daily_df('ifind_stock_daily_his', date_from)
    ifind_ds_df = get_ifind_daily_df('ifind_stock_daily_ds', date_from)
    ifind_report_date_df = get_ifind_report_date_df('ifind_stock_report_date',
                                                    None)
    ifind_fin_df = get_ifind_daily_df('ifind_stock_fin', None)
    ifind_fin_df_g = ifind_fin_df.groupby('ths_code')
    ths_code_set_4_daily = set(ifind_fin_df_g.size().index)
    # 合并 ds his 数据
    ifind_his_ds_df = pd.merge(ifind_his_df,
                               ifind_ds_df,
                               how='outer',
                               on=['ths_code', 'time'])  # 拼接後續有nan,無數據
    ifind_his_ds_df_g = ifind_his_ds_df.groupby('ths_code')
    logger.debug("提取数据完成")
    # 计算 财报披露时间
    report_date_dic_dic = {}
    for report_date_g in [
            ifind_report_date_df.groupby(
                ['ths_code', 'ths_regular_report_actual_dd_stock'])
    ]:
        for num, ((ths_code, report_date), data_df) in enumerate(report_date_g,
                                                                 start=1):
            if ths_code_set is not None and ths_code not in ths_code_set:
                continue
            if is_nan_or_none(report_date):
                continue
            report_date_dic = report_date_dic_dic.setdefault(ths_code, {})
            if ths_code not in ths_code_set_4_daily:
                logger.error('fin 表中不存在 %s 的財務數據', ths_code)
                continue
            ifind_fin_df_temp = ifind_fin_df_g.get_group(ths_code)
            if report_date not in report_date_dic_dic:
                ifind_fin_df_temp = ifind_fin_df_temp[
                    ifind_fin_df_temp['time'] <= report_date]
                if ifind_fin_df_temp.shape[0] > 0:
                    report_date_dic[
                        report_date] = ifind_fin_df_temp.sort_values(
                            'time').iloc[0]

    # # 设置 dtype
    dtype = {'report_date': Date}
    for dic in [
            DTYPE_STOCK_DAILY_DS, DTYPE_STOCK_REPORT_DATE,
            DTYPE_STOCK_DAILY_FIN, DTYPE_STOCK_DAILY_HIS
    ]:
        for key, val in dic.items():
            dtype[key] = val

    logger.debug("计算财报日期完成")
    # 整理 data_df 数据
    tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len(
        report_date_dic_dic)
    try:
        for num, (ths_code,
                  report_date_dic) in enumerate(report_date_dic_dic.items(),
                                                start=1):  # key:ths_code
            # TODO: 檢查判斷 ths_code 是否存在在ifind_fin_df_g 裏面,,size暫時使用  以後在驚醒改進
            if ths_code not in ifind_his_ds_df_g.size():
                logger.error('fin 表中不存在 %s 的財務數據', ths_code)
                continue
            # open low  等 is NAN 2438
            ifind_his_ds_df_cur_ths_code = ifind_his_ds_df_g.get_group(
                ths_code)  # shape[1] 30
            logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ths_code,
                         ifind_his_ds_df_cur_ths_code.shape[0])
            report_date_list = list(report_date_dic.keys())
            report_date_list.sort()
            for report_date_from, report_date_to in iter_2_range(
                    report_date_list):
                logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count,
                             ths_code, date_2_str(report_date_from),
                             date_2_str(report_date_to))
                # 计算有效的日期范围
                if report_date_from is None:
                    is_fit = ifind_his_ds_df_cur_ths_code[
                        'time'] < report_date_to
                elif report_date_to is None:
                    is_fit = ifind_his_ds_df_cur_ths_code[
                        'time'] >= report_date_from
                else:
                    is_fit = (ifind_his_ds_df_cur_ths_code['time'] <
                              report_date_to) & (
                                  ifind_his_ds_df_cur_ths_code['time'] >=
                                  report_date_from)
                # 获取日期范围内的数据
                ifind_his_ds_df_segment = ifind_his_ds_df_cur_ths_code[
                    is_fit].copy()
                segment_count = ifind_his_ds_df_segment.shape[0]
                if segment_count == 0:
                    continue
                fin_s = report_date_dic[
                    report_date_from] if report_date_from is not None else None
                for key in DTYPE_STOCK_DAILY_FIN.keys():
                    if key in ('ths_code', 'time'):
                        continue
                    ifind_his_ds_df_segment[key] = fin_s[
                        key] if fin_s is not None and key in fin_s else None
                ifind_his_ds_df_segment['report_date'] = report_date_from
                # 添加数据到列表
                data_df_list.append(ifind_his_ds_df_segment)
                data_count += segment_count

            if DEBUG and len(data_df_list) > 1:
                break

            # 保存数据库
            if data_count > 10000:
                # 保存到数据库
                data_df = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_count, data_df_list = 0, []

    finally:
        # 保存到数据库
        if len(data_df_list) > 0:
            data_df = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md, dtype)
            tot_data_count += data_count

        logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Example #4
0
def import_smfund_info(chain_param=None):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = "wind_smfund_info"
    has_table = engine_md.has_table(table_name)
    # w.start()
    types = {
        u'主动股票型分级母基金': 1000007766000000,
        u'被动股票型分级母基金': 1000007767000000,
        u'纯债券型分级母基金': 1000007768000000,
        u'混合债券型分级母基金': 1000007769000000,
        u'混合型分级母基金': 1000026143000000,
        u'QDII分级母基金': 1000019779000000
    }
    col_name_param_list = [
        ('wind_code', String(20)),
        ('fund_type', String(20)),
        ('sec_name', String(50)),
        ('class_a_code', String(20)),
        ('class_a_name', String(50)),
        ('class_b_code', String(20)),
        ('class_b_name', String(50)),
        ('track_indexcode', String(20)),
        ('track_indexname', String(50)),
        ('a_pct', DOUBLE),
        ('b_pct', DOUBLE),
        ('upcv_nav', DOUBLE),
        ('downcv_nav', DOUBLE),
        ('max_purchasefee', DOUBLE),
        ('max_redemptionfee', DOUBLE),
    ]
    col_name = ",".join([col_name for col_name, _ in col_name_param_list])
    # 获取各个历史时段的分级基金列表,并汇总全部基金代码
    dates = [
        '2011-01-01', '2013-01-01', '2015-01-01', '2017-01-01', '2018-01-01'
    ]  # 分三个时间点获取市场上所有分级基金产品
    df = pd.DataFrame()
    # 获取接数据
    for date_p in dates:
        temp_df = invoker.wset("sectorconstituent",
                               "date=%s;sectorid=1000006545000000" % date_p)
        df = df.append(temp_df)
    wind_code_all = df['wind_code'].unique()
    # 设置dtype
    dtype = {key: val for key, val in col_name_param_list}
    dtype['wind_code'] = String(20)
    dtype["tradable"] = String(20)
    dtype["fund_setupdate"] = Date
    dtype["fund_maturitydate"] = Date
    if has_table:
        with with_db_session(engine_md) as session:
            table = session.execute("SELECT wind_code FROM wind_smfund_info")
            wind_code_existed = set(
                [content[0] for content in table.fetchall()])
        wind_code_new = list(set(wind_code_all) - wind_code_existed)
    else:
        wind_code_new = list(set(wind_code_all))
    # if len(wind_code_new) == 0:
    #     print('no sm fund imported')
    # 查询数据库,剔除已存在的基金代码
    wind_code_new = [code for code in wind_code_new if code.find('!') < 0]
    info_df = invoker.wss(wind_code_new, 'fund_setupdate, fund_maturitydate')
    if info_df is None:
        raise Exception('no data')
    info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply(
        lambda x: str_2_date(x))
    info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply(
        lambda x: str_2_date(x))
    info_df.rename(columns={
        'FUND_SETUPDATE': 'fund_setupdate',
        'FUND_MATURITYDATE': 'fund_maturitydate'
    },
                   inplace=True)
    field = col_name
    # field = "fund_type,wind_code,sec_name,class_a_code,class_a_name,class_b_code,class_b_name,a_pct,b_pct,upcv_nav,downcv_nav,track_indexcode,track_indexname,max_purchasefee,max_redemptionfee"

    for code in info_df.index:
        beginDate = info_df.loc[code, 'fund_setupdate'].strftime('%Y-%m-%d')
        temp_df = invoker.wset("leveragedfundinfo",
                               "date=%s;windcode=%s;field=%s" %
                               (beginDate, code, field))  # ;field=%s  , field
        df = df.append(temp_df)
        if DEBUG and len(df) > 10:
            break
    df.set_index('wind_code', inplace=True)
    df['tradable'] = df.index.map(lambda x: x if 'S' in x else None)
    # df.index = df.index.map(lambda x: x[:-2] + 'OF')
    info_df = info_df.join(df, how='outer')
    # TODO: 需要检查一下代码
    info_df.rename(
        columns={
            'a_nav': 'nav_a',
            'b_nav': 'nav_b',
            'a_fs_inc': 'fs_inc_a',
            'b_fs_inc': 'fs_inc_b'
        })
    info_df.index.rename('wind_code', inplace=True)
    info_df.reset_index(inplace=True)
    bunch_insert_on_duplicate_update(info_df,
                                     table_name,
                                     engine_md,
                                     dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, len(info_df))
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
Example #5
0
def import_index_daily_ds(chain_param=None,
                          ths_code_set: set = None,
                          begin_time=None):
    """
    通过date_serise接口将历史数据保存到 ifind_index_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_index_daily_ds'
    has_table = engine_md.has_table(table_name)
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_DS],
        sep=';')
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_index_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code""".format(table_name=table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code;"""
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围' % table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)

        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_to is not None and date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            end_time = date_2_str(end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS)
            tot_data_count += data_count

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_report_date(chain_param=None,
                                ths_code_set: set = None,
                                begin_time=None,
                                interval='Q'):
    """
    通过date_serise接口将历史财务数据保存到 ifind_stock_fin,国内财务数据按季度发布,因此获取周期为季度(默认)
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :param interval: Q 季度 M 月 W 周 D 日
    :return:
    """
    table_name = 'ifind_stock_hk_report_date'
    info_table_name = 'ifind_stock_hk_info'
    has_table = engine_md.has_table(table_name)
    # jsonIndicator='ths_perf_briefing_fore_dsclsr_date_hks;ths_perf_brief_actual_dd_hks;ths_perf_report_foredsclsr_date_hks;ths_perf_report_actual_dd_hks'
    # jsonparam=';'
    json_indicator, json_param = unzip_join(
        [(key, val)
         for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_REPORT_DATE],
        sep=';')
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                "Days:Tradedays,Fill:Previous,Interval:{interval}".format(
                    interval=interval), begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md,
                    DTYPE_STOCK_HK_REPORT_DATE)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE)
            tot_data_count += data_count

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_fin_by_report_date_weekly(chain_param=None,
                                              ths_code_set: set = None,
                                              begin_time=None,
                                              refresh=False):
    """
    通过date_serise接口将历史数据保存到 import_stock_hk_fin
    该数据作为 为周度获取
    以财务报表发布日期为进准,[ 财务报表发布日-14天 ~ 财务报表发布日],周度获取财务数据
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :param refresh: 全部刷新
    :return:
    """
    table_name = 'ifind_stock_hk_fin'
    info_table_name = 'ifind_stock_hk_info'
    # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks
    # jsonparam='2013,100,OC;2013,100,OC;OC,101'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN],
        sep=';')
    has_table = engine_md.has_table(table_name)
    ths_code_report_date_str = """select distinct ths_code, subdate(report_date, 14), report_date from
        (
        select ths_code, ths_perf_brief_actual_dd_hks report_date from ifind_stock_hk_report_date
        union
        select ths_code, ths_perf_report_actual_dd_hks report_date from ifind_stock_hk_report_date
        ) tt
        where report_date is not null
        order by ths_code, report_date"""

    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取报告日-10天到报告日日期范围列表
        table = session.execute(ths_code_report_date_str)
        ths_code_report_date_range_list_dic, ths_code_report_date_range_list_dic_tmp = {}, {}
        for ths_code, date_from, date_to in table.fetchall():
            if ths_code_set is None or ths_code in ths_code_set:
                ths_code_report_date_range_list_dic_tmp.setdefault(
                    ths_code, []).append((date_from, date_to))

        # 获取每只股票需要获取日线数据的日期区间
        if not refresh:
            # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制
            table = session.execute(sql_str)
            code_date_range_dic = {
                ths_code: (date_from if begin_time is None else min(
                    [date_from, begin_time]), date_to)
                for ths_code, date_from, date_to in table.fetchall()
                if ths_code_set is None or ths_code in ths_code_set
            }

            if TRIAL:
                date_from_min = date.today() - timedelta(days=(365 * 5))
                # 试用账号只能获取近5年数据
                code_date_range_dic = {
                    ths_code: (max([date_from, date_from_min]), date_to)
                    for ths_code, (date_from,
                                   date_to) in code_date_range_dic.items()
                    if date_from_min <= date_to
                }
        else:
            code_date_range_dic = {}

    # 合并重叠的日期
    for ths_code, date_range_list in ths_code_report_date_range_list_dic_tmp.items(
    ):
        if not refresh and ths_code in code_date_range_dic:
            code_date_range = code_date_range_dic[ths_code]
        else:
            code_date_range = None

        # date_range_list 按照 起始日期 顺序排序,下层循环主要作用是将具有重叠日期的日期范围进行合并
        date_range_list_new, date_from_last, date_to_last = [], None, None
        for date_from, date_to in date_range_list:
            if code_date_range is not None:
                # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制
                if not refresh and (date_to < code_date_range[0]
                                    or code_date_range[1] < date_from):
                    continue

            if date_from_last is None:
                # 首次循环 设置 date_from_last
                date_from_last = date_from
            elif date_from < date_to_last:
                # 日期重叠,需要合并
                pass
            else:
                # 日期未重叠,保存 range
                date_range_list_new.append((date_from_last, date_to_last))
                date_from_last = date_from

            # 循环底部,设置 date_to_last
            date_to_last = date_to

        # 循环结束,保存 range
        if date_from_last is not None and date_to_last is not None:
            date_range_list_new.append((date_from_last, date_to_last))

        if len(date_range_list_new) > 0:
            ths_code_report_date_range_list_dic[ths_code] = date_range_list_new

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        ths_code_report_date_range_list_dic)
    try:
        for num, (ths_code, date_range_list) in enumerate(
                ths_code_report_date_range_list_dic.items(), start=1):
            for begin_time, end_time in date_range_list:
                logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                             begin_time, end_time)
                data_df = invoker.THS_DateSerial(
                    ths_code, json_indicator, json_param,
                    'Days:Tradedays,Fill:Previous,Interval:W', begin_time,
                    end_time)
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 仅调试使用
                if DEBUG and len(data_df_list) > 0:
                    break

                # 大于阀值有开始插入
                if data_count >= 2000:
                    tot_data_df = pd.concat(data_df_list)
                    # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                    bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                     engine_md,
                                                     DTYPE_STOCK_HK_FIN)
                    tot_data_count += data_count
                    data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md, DTYPE_STOCK_HK_FIN)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Example #8
0
def import_stock_quertarly_hk(chain_param=None,
                              wind_code_set=None,
                              begin_time=None):
    """
    插入股票日线数据到最近一个工作日-1
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_set:
    :param begin_time:
    :return:
    """
    table_name = "wind_stock_quertarly_hk"
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   wind_stock_info_hk info 
               LEFT OUTER JOIN
                   (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ths_code) quertarly
               ON info.wind_code = quertarly.wind_code
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code""".format(table_name=table_name)
    else:
        logger.warning(
            'wind_stock_quertarly_hk 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围')
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ipo_date date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM wind_stock_info_hk info 
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code"""
        logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        # 获取date_from,date_to,将date_from,date_to做为value值
        stock_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    data_df_list = []
    logger.info('%d data will been import into %s', len(stock_date_dic),
                table_name)
    # 获取股票量价等行情数据
    field_col_name_list = [
        ('roic_ttm', String(20)),
        ('yoyprofit', String(20)),
        ('ebit', String(20)),
        ('ebit2', String(20)),
        ('ebit2_ttm', String(20)),
        ('surpluscapitalps', String(20)),
        ('undistributedps', String(20)),
        ('stm_issuingdate', Date),
    ]
    # 获取列表属性名
    dtype = {key: val for key, val in field_col_name_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date
    wind_indictor_str = ",".join(key for key, _ in field_col_name_list)
    upper_col_2_name_dic = {
        name.upper(): val
        for name, val in field_col_name_list
    }
    # 获取接口数据
    try:
        for stock_num, (wind_code,
                        (date_from,
                         date_to)) in enumerate(stock_date_dic.items()):
            data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                  date_to, "unit=1;rptType=1;Period=Q")
            if data_df is None:
                logger.warning('%d) %s has no data during %s %s', stock_num,
                               wind_code, date_from, date_to)
                continue
            data_df.rename(columns=upper_col_2_name_dic, inplace=True)
            # 清理掉期间全空的行
            for trade_date in list(data_df.index):
                is_all_none = data_df.loc[trade_date].apply(
                    lambda x: x is None).all()
                if is_all_none:
                    logger.warning("%s %s 数据全部为空", wind_code, trade_date)
                    data_df.drop(trade_date, inplace=True)
            logger.info('%d) %d data of %s between %s and %s', stock_num,
                        data_df.shape[0], wind_code, date_from, date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df_list.append(data_df)
            # 仅供调试使用
            if DEBUG and len(data_df_list) > 5:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)

            # data_df_all.to_sql('wind_stock_quertarly_hk', engine_md, if_exists='append')
            bunch_insert_on_duplicate_update(data_df_all,
                                             table_name,
                                             engine_md,
                                             dtype=dtype)
            logging.info("更新 wind_stock_quertarly_hk 结束 %d 条信息被更新",
                         data_df_all.shape[0])
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Example #9
0
def import_stock_info_hk(chain_param=None, refresh=False):
    """
    获取全市场股票代码及名称 导入 港股股票信息 到 wind_stock_info_hk
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param refresh: 默认为False,True 则进行全部更新
    :return:
    """
    table_name = 'wind_stock_info_hk'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    param_list = [
        ('sec_name', String(20)),
        ('trade_code', String(20)),
        ('ipo_date', Date),
        ('delist_date', Date),
        ('mkt', String(20)),
        ('exch_city', String(20)),
        ('exch_eng', String(20)),
        ('prename', String(2000)),
    ]
    # 获取列属性名,以逗号进行分割
    param = ",".join([key for key, _ in param_list])
    rename_col_dic = {key.upper(): key.lower() for key, _ in param_list}
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    if refresh:
        date_fetch = DATE_BASE
    else:
        date_fetch = date.today()
    date_end = date.today()
    stock_code_set = set()
    while date_fetch < date_end:
        stock_code_set_sub = get_stock_code_set(date_fetch)
        if stock_code_set_sub is not None:
            stock_code_set |= stock_code_set_sub
        date_fetch += timedelta(days=365)
    stock_code_set_sub = get_stock_code_set(date_end)
    if stock_code_set_sub is not None:
        stock_code_set |= stock_code_set_sub
    # 获取股票对应上市日期,及摘牌日期
    # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng")
    stock_code_list = list(stock_code_set)
    seg_count = 1000
    stock_info_df_list = []
    for stock_code_list_sub in split_chunk(stock_code_list, seg_count):
        # 尝试将 stock_code_list_sub 直接传递给wss,是否可行
        # stock_info_df = invoker.wss(stock_code_list_sub,
        #                             "sec_name,trade_code,ipo_date,delist_date,mkt,exch_city,exch_eng,prename")
        # 获取接口文档数据信息
        stock_info_df = invoker.wss(stock_code_list_sub, param)
        stock_info_df_list.append(stock_info_df)
        if DEBUG:
            break
    stock_info_all_df = pd.concat(stock_info_df_list)
    stock_info_all_df.index.rename('wind_code', inplace=True)
    stock_info_all_df.rename(columns=rename_col_dic, inplace=True)
    logging.info('%d data will be import', stock_info_all_df.shape[0])
    stock_info_all_df.reset_index(inplace=True)

    # data_list = list(stock_info_all_df.T.to_dict().values())
    #  sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)".format(
    #      table_name=table_name
    #  )
    # # sql_str = "insert INTO wind_stock_info_hk (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)"
    #  with with_db_session(engine_md) as session:
    #      session.execute(sql_str, data_list)
    #      stock_count = session.execute('select count(*) from {table_name}'.format(table_name=table_name)).first()[0]
    # 创建表格数据
    data_count = bunch_insert_on_duplicate_update(stock_info_all_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
def import_private_fund_nav_daily(chain_param=None, wind_code_list=None):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_list:
    :return:
    """
    table_name = 'wind_fund_nav_daily'
    # 初始化数据下载端口
    # 初始化数据库engine
    # 链接数据库,并获取fundnav旧表
    # with get_db_session(engine) as session:
    #     table = session.execute('select wind_code, ADDDATE(max(trade_date),1) from wind_fund_nav group by wind_code')
    #     fund_trade_date_begin_dic = dict(table.fetchall())
    # 获取wind_fund_info表信息
    col_name_param_list = [
        ('trade_date', Date),
        ('nav', DOUBLE),
        ('nav_acc', DOUBLE),
        ('nav_date', Date),
    ]
    dtype = {col_name: val for col_name, val in col_name_param_list}
    dtype['wind_code'] = String(200)
    has_table = engine_md.has_table(table_name)
    if has_table:
        fund_info_df = pd.read_sql_query(
            """SELECT DISTINCT fi.wind_code AS wind_code, 
                IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from,
                if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to 
                FROM fund_info fi
                LEFT JOIN
                (
                SELECT wind_code, ADDDATE(max(trade_date),1) trade_date_from FROM wind_fund_nav_daily
                GROUP BY wind_code
                ) wfn
                ON fi.wind_code = wfn.wind_code""",
            engine_md)
    else:
        fund_info_df = pd.read_sql_query(
            """SELECT DISTINCT fi.wind_code AS wind_code, 
                IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from,
                if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to 
                FROM fund_info fi
                ORDER BY wind_code;""",
            engine_md)

        wind_code_date_frm_to_dic = {wind_code: (str_2_date(date_from), str_2_date(date_to)) for
                                     wind_code, date_from, date_to in
                                     zip(fund_info_df['wind_code'], fund_info_df['date_from'], fund_info_df['date_to'])}
        fund_info_df.set_index('wind_code', inplace=True)
        if wind_code_list is None:
            wind_code_list = list(fund_info_df.index)
        else:
            wind_code_list = list(set(wind_code_list) & set(fund_info_df.index))
        # 结束时间
        date_last_day = date.today() - timedelta(days=1)
        # date_end_str = date_end.strftime(STR_FORMAT_DATE)

        fund_nav_all_df = []
        no_data_count = 0
        code_count = len(wind_code_list)
        # 对每个新获取的基金名称进行判断,若存在 fundnav 中,则只获取部分净值
        wind_code_trade_date_latest_dic = {}
        date_gap = timedelta(days=10)
        try:
            for num, wind_code in enumerate(wind_code_list):
                date_begin, date_end = wind_code_date_frm_to_dic[wind_code]

                # if date_end > date_last_day:
                #     date_end = date_last_day
                if date_begin > date_end:
                    continue
                # 设定数据获取的起始日期
                # wind_code_trade_date_latest_dic[wind_code] = date_to
                # if wind_code in fund_trade_date_begin_dic:
                #     trade_latest = fund_trade_date_begin_dic[wind_code]
                #     if trade_latest > date_end:
                #         continue
                #     date_begin = max([date_begin, trade_latest])
                # if date_begin is None:
                #     continue
                # elif isinstance(date_begin, str):
                #     date_begin = datetime.strptime(date_begin, STR_FORMAT_DATE).date()

                if isinstance(date_begin, date):
                    if date_begin.year < 1900:
                        continue
                    if date_begin > date_end:
                        continue
                    date_begin_str = date_begin.strftime('%Y-%m-%d')
                else:
                    logger.error("%s date_begin:%s", wind_code, date_begin)
                    continue

                if isinstance(date_end, date):
                    if date_begin.year < 1900:
                        continue
                    if date_begin > date_end:
                        continue
                    date_end_str = date_end.strftime('%Y-%m-%d')
                else:
                    logger.error("%s date_end:%s", wind_code, date_end)
                    continue
                # 尝试获取 fund_nav 数据
                for k in range(2):
                    try:
                        fund_nav_tmp_df = invoker.wsd(codes=wind_code, fields='nav,NAV_acc,NAV_date',
                                                      beginTime=date_begin_str,
                                                      endTime=date_end_str, options='Fill=Previous')
                        trade_date_latest = datetime.strptime(date_end_str, '%Y-%m-%d').date() - date_gap
                        wind_code_trade_date_latest_dic[wind_code] = trade_date_latest
                        break
                    except APIError as exp:
                        # -40520007z
                        if exp.ret_dic.setdefault('error_code', 0) == -40520007:
                            trade_date_latest = datetime.strptime(date_end_str, '%Y-%m-%d').date() - date_gap
                            wind_code_trade_date_latest_dic[wind_code] = trade_date_latest
                        logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp)))
                        continue
                    except Exception as exp:
                        logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp)))
                        continue
                else:
                    fund_nav_tmp_df = None

                if fund_nav_tmp_df is None:
                    logger.info('%s No data', wind_code)
                    # del wind_code_trade_date_latest_dic[wind_code]
                    no_data_count += 1
                    logger.warning('%d funds no data', no_data_count)
                else:
                    fund_nav_tmp_df.dropna(how='all', inplace=True)
                    df_len = fund_nav_tmp_df.shape[0]
                    if df_len == 0:
                        continue
                    fund_nav_tmp_df['wind_code'] = wind_code
                    # 此处删除 trade_date_latest 之后再加上,主要是为了避免因抛出异常而导致的该条数据也被记录更新
                    # del wind_code_trade_date_latest_dic[wind_code]
                    trade_date_latest = fund_nav_df_2_sql(table_name, fund_nav_tmp_df, engine_md, is_append=True)
                    if trade_date_latest is None:
                        logger.error('%s[%d] data insert failed', wind_code)
                    else:
                        wind_code_trade_date_latest_dic[wind_code] = trade_date_latest
                        logger.info('%d) %s updated, %d funds left', num, wind_code, code_count - num)

                if DEBUG and num > 1:  # 调试使用
                    break
            #
        finally:
            import_wind_fund_nav_to_nav()
            update_trade_date_latest(wind_code_trade_date_latest_dic)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
        return fund_nav_all_df
Example #11
0
def import_stock_daily_hk(chain_param=None,
                          wind_code_set=None,
                          begin_time=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'wind_stock_daily_hk'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('OPEN', DOUBLE),
        ('HIGH', DOUBLE),
        ('LOW', DOUBLE),
        ('CLOSE', DOUBLE),
        ('ADJFACTOR', DOUBLE),
        ('VOLUME', DOUBLE),
        ('AMT', DOUBLE),
        ('PCT_CHG', DOUBLE),
        ('MAXUPORDOWN', Integer),
        ('SWING', DOUBLE),
        ('TURN', DOUBLE),
        ('FREE_TURN', DOUBLE),
        ('TRADE_STATUS', String(20)),
        ('SUSP_DAYS', Integer),
        ('TOTAL_SHARES', DOUBLE),
        ('FREE_FLOAT_SHARES', DOUBLE),
        ('EV2_TO_EBITDA', DOUBLE),
        ('PS_TTM', DOUBLE),
        ('PE_TTM', DOUBLE),
        ('PB_MRQ', DOUBLE),
    ]
    # 将列表列名转化为小写
    col_name_dic = {
        col_name.upper(): col_name.lower()
        for col_name, _ in param_list
    }
    # 获取列表列名
    col_name_list = [col_name.lower() for col_name in col_name_dic.keys()]
    # wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \
    #                     "swing,turn,free_turn,trade_status,susp_days," + \
    #                     "total_shares,free_float_shares,ev2_to_ebitda"
    wind_indictor_str = ",".join(col_name_list)
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   wind_stock_info_hk info 
               LEFT OUTER JOIN
                   (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily
               ON info.wind_code = daily.wind_code
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code""".format(table_name=table_name)
    else:
        logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name)
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ipo_date date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM wind_stock_info_hk info 
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code"""

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        stock_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date
    data_df_list = []
    data_len = len(stock_date_dic)
    # 获取接口数据
    logger.info('%d data will been import into %s', data_len, table_name)
    try:
        for data_num, (wind_code,
                       (date_from,
                        date_to)) in enumerate(stock_date_dic.items()):
            logger.debug('%d/%d) %s [%s - %s]', data_num, data_len, wind_code,
                         date_from, date_to)
            # if wind_code == '0192!1.HK':
            #     logger.warning('%s wind 没有这只股票数据', wind_code)
            #     continue

            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                      date_to)
            except APIError as exp:
                logger.exception("%d/%d) %s 执行异常", data_num, data_len,
                                 wind_code)
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', data_num,
                               data_len, wind_code, date_from, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s', data_num,
                        data_len, data_df.shape[0], wind_code, date_from,
                        date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df.rename(columns=col_name_dic, inplace=True)
            data_df_list.append(data_df)
            # 仅供调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        # 导入数据库 创建
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)

            data_count = bunch_insert_on_duplicate_update(data_df_all,
                                                          table_name,
                                                          engine_md,
                                                          dtype=dtype)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
def import_private_fund_info(table_name, chain_param=None, get_df=False):
    # 初始化服务器接口,用于下载万得数据
    # table_name = 'fund_info'
    has_table = engine_md.has_table(table_name)
    types = {u'股票多头策略': 1000023122000000,
             u'股票多空策略': 1000023123000000,
             u'其他股票策略': 1000023124000000,
             u'阿尔法策略': 1000023125000000,
             u'其他市场中性策略': 1000023126000000,
             u'事件驱动策略': 1000023113000000,
             u'债券策略': 1000023114000000,
             u'套利策略': 1000023115000000,
             u'宏观策略': 1000023116000000,
             u'管理期货': 1000023117000000,
             u'组合基金策略': 1000023118000000,
             u'货币市场策略': 1000023119000000,
             u'多策略': 100002312000000,
             u'其他策略': 1000023121000000}
    df = pd.DataFrame()
    today = date.today().strftime('%Y-%m-%d')
    param_list = [
        ('FUND_SETUPDATE', Date),
        ('FUND_MATURITYDATE', Date),
        ('FUND_MGRCOMP', String(800)),
        ('FUND_EXISTINGYEAR', String(500)),
        ('FUND_PTMYEAR', String(30)),
        ('FUND_TYPE', String(20)),
        ('FUND_FUNDMANAGER', String(500)),
        ('SEC_NAME', String(2000)),
        ('STRATEGY_TYPE', String(200)),
        ('TRADE_DATE_LATEST', String(200)),
    ]
    col_name_dic = {col_name.upper(): col_name.lower() for col_name, _ in param_list}
    # 获取列表名
    col_name_list = [col_name.lower() for col_name in col_name_dic.keys()]
    param_str = ",".join(col_name_list[:8])
    # 设置dtype类型
    dtype = {key.lower(): val for key, val in param_list}
    dtype['wind_code'] = String(20)
    for i in types.keys():
        temp = invoker.wset("sectorconstituent", "date=%s;sectorid=%s" % (today, str(types[i])))
        temp['strategy_type'] = i
        df = pd.concat([df, temp], axis=0)
        if DEBUG and len(df) > 1000:
            break
    # 插入数据库
    # 初始化数据库engine
    # 整理数据
    fund_types_df = df[['wind_code', 'sec_name', 'strategy_type']]
    fund_types_df.set_index('wind_code', inplace=True)
    # 获取基金基本面信息
    code_list = list(fund_types_df.index)  # df['wind_code']
    code_count = len(code_list)
    seg_count = 5000
    info_df = pd.DataFrame()
    for n in range(int(code_count / seg_count) + 1):
        num_start = n * seg_count
        num_end = (n + 1) * seg_count
        num_end = num_end if num_end <= code_count else code_count
        if num_start <= code_count:
            codes = ','.join(code_list[num_start:num_end])
            # 分段获取基金成立日期数据
            info2_df = invoker.wss(codes, param_str)
            logging.info('%05d ) [%d %d]' % (n, num_start, num_end))
            info_df = info_df.append(info2_df)
            if DEBUG and len(info_df) > 1000:
                break
        else:
            break
            # 整理数据插入数据库)
    info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply(lambda x: str_2_date(x))
    info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply(lambda x: str_2_date(x))
    info_df = fund_types_df.join(info_df, how='right')
    info_df.rename(columns=col_name_dic, inplace=True)
    info_df['trade_date_latest'] = None
    info_df.index.names = ['wind_code']
    info_df.reset_index(inplace=True)
    info_df.drop_duplicates(inplace=True)
    bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype)
    logging.info('%d funds inserted' % len(info_df))
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
    if get_df:
        return info_df
Example #13
0
def import_wind_code_kv(keys: list, chain_param=None, wind_code_list=None):
    """
    插入股票相关市场数据。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    w.wsd("600000.SH", "west_eps", "2018-05-05", "2018-06-03", "year=2018;westPeriod=180")
    :param keys:
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_list:
    :return:
    """
    table_name = "wind_code_kv"
    wind_code_set = set(wind_code_list) if wind_code_list is not None else None
    logging.info("更新 wind_stock_daily 开始")
    has_table = engine_md.has_table(table_name)
    # 本次获取数据的截止日期
    date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today()
    param_list = [
        ('trade_date', Date),
        ('key', String(45)),
        ('value', DOUBLE),
    ]
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    for item_key in keys:
        item_key = item_key.lower()
        # 获取每只股票对应key 的最近一个交易日数据 start_date
        # 如果没有,则使用 DATE_BASE ipo_date 中最大的一个
        # sql_str = """SELECT info.wind_code, ifnull(trade_date_max_1,start_date) start_date, delist_date FROM
        # (
        # SELECT wind_code, if (ipo_date<%s,date(%s),ipo_date) start_date, delist_date FROM wind_stock_info
        # ) info
        # LEFT JOIN
        # (
        # SELECT wind_code, adddate(max(Trade_date),1) trade_date_max_1 FROM wind_code_kv
        # WHERE wind_code_kv.key=%s GROUP BY wind_code
        # ) kv
        # ON info.wind_code = kv.wind_code"""
        # date_df = pd.read_sql(sql_str, engine_md, params=[DATE_BASE, DATE_BASE, item_key], index_col='wind_code')
        #
        # with with_db_session(engine_md) as session:
        #     # 获取市场有效交易日数据
        #     sql_str = "SELECT trade_date FROM wind_trade_date WHERE trade_date >= :trade_date"
        #     table = session.execute(sql_str, params={'trade_date': DATE_BASE})
        #     trade_date_sorted_list = [t[0] for t in table.fetchall()]
        #     trade_date_sorted_list.sort()
        if has_table:
            sql_str = """
                       SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
                       FROM
                       (
                           SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
                           if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                           FROM 
                               wind_stock_info info 
                           LEFT OUTER JOIN
                               (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) kv
                           ON info.wind_code = kv.wind_code
                       ) tt
                       WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
                       ORDER BY wind_code;""".format(table_name=table_name)
        else:
            logger.warning('wind_stock_kv 不存在,仅使用 wind_stock_info 表进行计算日期范围')
            sql_str = """
                       SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
                       FROM
                       (
                           SELECT info.wind_code, ipo_date date_frm, delist_date,
                           if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                           FROM wind_stock_info info 
                       ) tt
                       WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
                       ORDER BY wind_code"""
        with with_db_session(engine_md) as session:
            # 获取每只股票需要获取日线数据的日期区间
            table = session.execute(sql_str)
            # 计算每只股票需要获取日线数据的日期区间
            begin_time = None
            # 获取date_from,date_to,将date_from,date_to做为value值
            stock_date_dic = {
                wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
                for wind_code, date_from, date_to in table.fetchall() if
                wind_code_set is None or wind_code in wind_code_set}

        data_df_list = []
        data_len = len(stock_date_dic)
        logger.info('%d stocks will been import into wind_code_kv', data_len)
        try:
            for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items(), start=1):
                # if wind_code_set is not None and wind_code not in wind_code_set:
                #     continue
                # # 获取 date_from
                # date_from, date_delist = date_s['start_date'], date_s['delist_date']
                # # 获取 date_to
                # if date_delist is None:
                #     date_to = date_ending
                # else:
                #     date_to = min([date_delist, date_ending])
                # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to)
                # if date_from is None or date_to is None or date_from > date_to:
                #     continue
                # 获取股票量价等行情数据
                wind_indictor_str = item_key
                data_df = get_wind_kv_per_year(wind_code, wind_indictor_str, date_from, date_to,
                                               "year=%(year)d;westPeriod=180")
                # try:
                #     data_df = w.wsd(wind_code, wind_indictor_str, date_from, date_to, "year=2018;westPeriod=180")
                # except APIError as exp:
                #     logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code)
                #     if exp.ret_dic.setdefault('error_code', 0) in (
                #             -40520007,  # 没有可用数据
                #             -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                #     ):
                #         continue
                #     else:
                #         break
                if data_df is None:
                    logger.warning('%d/%d) %s has no data during %s %s', data_num, data_len, wind_code, date_from,
                                   date_to)
                    continue
                data_df.dropna(inplace=True)
                if data_df.shape[0] == 0:
                    logger.warning('%d/%d) %s has 0 data during %s %s', data_num, data_len, wind_code, date_from,
                                   date_to)
                    continue
                logger.info('%d/%d) %d data of %s between %s and %s', data_num, data_len, data_df.shape[0], wind_code,
                            date_from, date_to)
                data_df['wind_code'] = wind_code
                data_df.index.rename('trade_date', inplace=True)
                data_df.reset_index(inplace=True)
                data_df.rename(columns={item_key.upper(): 'value'}, inplace=True)
                data_df['key'] = item_key
                data_df_list.append(data_df)
                # 调试使用
                if data_num >= 5:
                    break
        finally:
            # 导入数据库
            if len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype)
                logging.info("更新 wind_stock_daily 结束 %d 条信息被更新", data_df_all.shape[0])
                if not has_table and engine_md.has_table(table_name):
                    alter_table_2_myisam(engine_md, [table_name])
                    build_primary_key([table_name])
Example #14
0
def import_pub_fund_info(chain_param=None, first_time=False):
    """
    获取全市场可转债基本信息
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param first_time: 第一次执行时将从2004年开始查找全部公募基金数据
    :return: 
    """
    table_name = "wind_pub_fund_info"
    has_table = engine_md.has_table(table_name)
    if first_time:
        date_since = datetime.strptime('2004-01-01', STR_FORMAT_DATE).date()
        date_list = []
        one_year = timedelta(days=365)
        while date_since < date.today() - ONE_DAY:
            date_list.append(date_since)
            date_since += one_year
        else:
            date_list.append(date.today() - ONE_DAY)
    else:
        date_list = [date.today() - ONE_DAY]
    # 获取 wind_code 集合
    wind_code_set = set()
    for fetch_date in date_list:
        data_set = get_wind_code_set(fetch_date)
        if data_set is not None:
            wind_code_set |= data_set
        if DEBUG and len(wind_code_set) > 6:
            break
    if has_table:
        with with_db_session(engine_md) as session:
            sql_str = "select wind_code from %s" % table_name
            table = session.execute(sql_str)
            wind_code_set_existed = {
                content[0]
                for content in table.fetchall()
            }
        wind_code_set -= wind_code_set_existed
    else:
        wind_code_set = wind_code_set
    # 获取股票对应上市日期,及摘牌日期
    # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng")
    wind_code_list = list(wind_code_set)
    wind_code_count = len(wind_code_list)
    seg_count = 1000
    # loop_count = math.ceil(float(wind_code_count) / seg_count)
    data_info_df_list = []
    fund_info_field_col_name_list = [
        ('FUND_FULLNAME', String(200)),
        ('FUND_EXCHANGESHORTNAME', String(50)),
        ('FUND_BENCHMARK', String(200)),
        ('FUND_SETUPDATE', Date),
        ('FUND_MATURITYDATE', Date),
        ('FUND_FUNDMANAGER', String(500)),
        ('FUND_FUNDMANAGER', String(50)),
        ('FUND_MGRCOMP', String(200)),
        ('FUND_CUSTODIANBANK', String(50)),
        ('FUND_TYPE', String(50)),
        ('FUND_FIRSTINVESTTYPE', String(50)),
        ('FUND_INVESTTYPE', String(50)),
        ('FUND_STRUCTUREDFUNDORNOT', String(50)),
        ('FUND_BENCHINDEXCODE', String(50)),
    ]
    # col_name_dic = {col_name.upper(): col_name.lower() for col_name, _ in fund_info_field_col_name_list}
    # col_name_list = ",".join([col_name.lower() for col_name in col_name_dic.keys()])
    dtype = {key.lower(): val for key, val in fund_info_field_col_name_list}
    dtype['wind_code'] = String(20)
    # dtype['invest_type_level1'] = String(50)
    for n in range(0, wind_code_count, seg_count):
        sub_list = wind_code_list[n:(n + seg_count)]
        # 尝试将 stock_code_list_sub 直接传递给wss,是否可行
        # w.wss("000309.OF", "fund_fullname, fund_exchangeshortname,fund_benchmark,fund_benchindexcode,fund_setupdate,
        # fund_maturitydate,fund_fundmanager,fund_mgrcomp,fund_custodianbank,fund_type,fund_firstinvesttype,
        # fund_investtype,fund_structuredfundornot,
        # fund_investstyle")   structuredfundornot
        field_str = ",".join([
            col_name.lower() for col_name, _ in fund_info_field_col_name_list
        ])
        stock_info_df = invoker.wss(sub_list, field_str)
        data_info_df_list.append(stock_info_df)
        if DEBUG and len(data_info_df_list) > 1000:
            break
    if len(data_info_df_list) == 0:
        logger.info("%s 没有数据可以导入", table_name)
        return
    data_info_all_df = pd.concat(data_info_df_list)
    data_info_all_df.index.rename('wind_code', inplace=True)
    data_info_all_df.rename(
        columns={col: col.lower()
                 for col in data_info_all_df.columns},
        inplace=True)
    logging.info('%d data will be import', data_info_all_df.shape[0])
    data_info_all_df.reset_index(inplace=True)
    bunch_insert_on_duplicate_update(data_info_all_df,
                                     table_name,
                                     engine_md,
                                     dtype=dtype)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])
    update_from_info_table(table_name)
def import_stock_hk_info(chain_param=None, ths_code=None, refresh=False):
    """
    导入 info 表
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code:
    :param refresh:
    :return:
    """
    table_name = 'ifind_stock_hk_info'
    logging.info("更新 %s 开始", table_name)
    if ths_code is None:
        # 获取全市场港股代码及名称
        if refresh:
            date_fetch = datetime.strptime('1991-02-01',
                                           STR_FORMAT_DATE).date()
        else:
            date_fetch = date.today()

        date_end = date.today()
        stock_hk_code_set = set()
        while date_fetch < date_end:
            stock_hk_code_set_sub = get_stock_hk_code_set(date_fetch)
            if stock_hk_code_set_sub is not None:
                stock_hk_code_set |= stock_hk_code_set_sub
            date_fetch += timedelta(days=365)

        stock_hk_code_set_sub = get_stock_hk_code_set(date_end)
        if stock_hk_code_set_sub is not None:
            stock_hk_code_set |= stock_hk_code_set_sub

        if DEBUG:
            stock_hk_code_set = list(stock_hk_code_set)[:10]

        ths_code = ','.join(stock_hk_code_set)

    indicator_param_list = [
        ('ths_stock_short_name_hks', '', String(40)),
        ('ths_stock_code_hks', '', String(20)),
        ('ths_isin_code_hks', '', String(40)),
        ('ths_corp_ashare_short_name_hks', '', String(10)),
        ('ths_corp_ashare_code_hks', '', String(60)),
        ('ths_stock_varieties_hks', '', String(40)),
        ('ths_ipo_date_hks', '', Date),
        ('ths_listed_exchange_hks', '', String(60)),
        ('ths_stop_listing_date_hks', '', Date),
        ('ths_corp_cn_name_hks', '', String(120)),
        ('ths_corp_name_en_hks', '', String(120)),
        ('ths_established_date_hks', '', Date),
        ('ths_accounting_date_hks', '', String(20)),
        ('ths_general_manager_hks', '', String(40)),
        ('ths_secretary_hks', '', String(40)),
        ('ths_operating_scope_hks', '', Text),
        ('ths_mo_product_name_hks', '', String(200)),
        ('ths_district_hks', '', String(60)),
        ('ths_reg_address_hks', '', String(200)),
        ('ths_office_address_hks', '', String(200)),
        ('ths_corp_tel_hks', '', String(200)),
        ('ths_corp_fax_hks', '', String(200)),
        ('ths_corp_website_hks', '', String(200)),
        ('ths_auditor_hks', '', String(60)),
        ('ths_legal_counsel_hks', '', String(300)),
        ('ths_hs_industry_hks', '', String(40)),
    ]
    # jsonIndicator='ths_stock_short_name_hks;ths_stock_code_hks;ths_thscode_hks;ths_isin_code_hks;ths_corp_ashare_short_name_hks;ths_corp_ashare_code_hks;ths_stock_varieties_hks;ths_ipo_date_hks;ths_listed_exchange_hks;ths_stop_listing_date_hks;ths_corp_cn_name_hks;ths_corp_name_en_hks;ths_established_date_hks;ths_accounting_date_hks;ths_general_manager_hks;ths_secretary_hks;ths_operating_scope_hks;ths_mo_product_name_hks;ths_district_hks;ths_reg_address_hks;ths_office_address_hks;ths_corp_tel_hks;ths_corp_fax_hks;ths_corp_website_hks;ths_auditor_hks;ths_legal_counsel_hks;ths_hs_industry_hks'
    # jsonparam=';;;;;;;;;;;'
    indicator, param = unzip_join([(key, val)
                                   for key, val, _ in indicator_param_list],
                                  sep=';')
    param += '100'
    data_df = invoker.THS_BasicData(ths_code, indicator, param)
    if data_df is None or data_df.shape[0] == 0:
        logging.info("没有可用的 stock_hk info 可以更新")
        return
    # 删除历史数据,更新数据
    has_table = engine_md.has_table(table_name)
    if has_table:
        with with_db_session(engine_md) as session:
            session.execute(
                "DELETE FROM {table_name} WHERE ths_code IN (".format(
                    table_name=table_name) + ','.join(
                        [':code%d' % n
                         for n in range(len(stock_hk_code_set))]) + ")",
                params={
                    'code%d' % n: val
                    for n, val in enumerate(stock_hk_code_set)
                })
            session.commit()
    dtype = {key: val for key, _, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    # data_count = data_df.shape[0]
    # data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
Example #16
0
def import_future_daily_his(chain_param=None,
                            ths_code_set: set = None,
                            begin_time=None):
    """
    更新期货合约日级别行情信息
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_future_daily'
    info_table_name = 'ifind_future_info'
    logger.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    indicator_param_list = [
        ('preClose', String(20)),
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('volume', DOUBLE),
        ('amount', DOUBLE),
        ('avgPrice', DOUBLE),
        ('change', DOUBLE),
        ('changeRatio', DOUBLE),
        ('preSettlement', DOUBLE),
        ('settlement', DOUBLE),
        ('change_settlement', DOUBLE),
        ('chg_settlement', DOUBLE),
        ('openInterest', DOUBLE),
        ('positionChange', DOUBLE),
        ('amplitude', DOUBLE),
    ]
    json_indicator = ','.join([key for key, _ in indicator_param_list])
    if has_table:
        # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据
        # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全,
        # 如:AL0202.SHF AL9902.SHF CU0202.SHF
        # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理
        sql_str = """SELECT ths_code, date_frm, 
                if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM
            (
            SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi LEFT OUTER JOIN
                (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd
                ON fi.ths_code = wfd.ths_code
            ) tt
            WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) 
            -- AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM 
            (
            SELECT fi.ths_code, ths_start_trade_date_future date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi
            ) tt""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    # 设置 dtype
    dtype = {key: val for key, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    dtype['time'] = Date

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        logger.info("%d future instrument will be handled", code_count)
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            if begin_time is None:
                continue
            if begin_time > end_time:
                logger.warning('%d/%d) %s [%s - %s] 日期范围无效,跳过', num,
                               code_count, ths_code, begin_time, end_time)
                continue
            else:
                logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                             begin_time, end_time)

            try:
                data_df = invoker.THS_HistoryQuotes(
                    ths_code,
                    json_indicator,
                    '',  # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                    begin_time,
                    end_time)
            except APIError as exp:
                if exp.ret_dic['error_code'] in (
                        -4001,
                        -4210,
                ):
                    data_df = None
                    logger.exception(
                        "THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s 不影响后续任务",
                        ths_code, json_indicator, begin_time, end_time)
                else:
                    logger.exception(
                        "THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s", ths_code,
                        json_indicator, begin_time, end_time)
                    break

            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0
                logging.info("%s 新增数据 %d 条", table_name, data_count)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype)
            tot_data_count += data_count
            logging.info("%s 新增数据 %d 条", table_name, data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_daily_ds(chain_param=None,
                             ths_code_set: set = None,
                             begin_time=None):
    """
    通过date_serise接口将历史数据保存到 ifind_stock_hk_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_stock_hk_daily_ds'
    info_table_name = 'ifind_stock_hk_info'
    # jsonIndicator='ths_pre_close_stock;ths_open_price_stock;ths_high_price_stock;ths_low_stock;ths_close_price_stock;ths_chg_ratio_stock;ths_chg_stock;ths_vol_stock;ths_trans_num_stock;ths_amt_stock;ths_turnover_ratio_stock;ths_vaild_turnover_stock;ths_af_stock;ths_up_and_down_status_stock;ths_trading_status_stock;ths_suspen_reason_stock;ths_last_td_date_stock'
    # jsonparam='100;100;100;100;100;;100;100;;;;;;;;;'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_DAILY_DS],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)

        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 0:
                break

            # 大于阀值有开始插入
            if data_count >= 2000:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                 engine_md,
                                                 DTYPE_STOCK_HK_DAILY_DS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md,
                                             DTYPE_STOCK_HK_DAILY_DS)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
def import_wind_bonus(chain_param=None):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = 'wind_stock_bonus'
    has_table = engine_md.has_table(table_name)
    param_list = [
        ('div_cashbeforetax2', DOUBLE),
        ('div_cashaftertax2', DOUBLE),
        ('div_stock2', DOUBLE),
        ('div_capitalization2', DOUBLE),
        ('div_capitalization', DOUBLE),
        ('div_stock', DOUBLE),
        ('div_cashaftertax', DOUBLE),
        ('div_cashbeforetax', DOUBLE),
        ('div_cashandstock', DOUBLE),
        ('div_recorddate', Date),
        ('div_exdate', Date),
        ('div_paydate', Date),
        ('div_trddateshareb', Date),
        ('div_preDisclosureDate', Date),
        ('div_prelandate', Date),
        ('div_smtgdate', Date),
        ('div_impdate', Date),
    ]
    param = ",".join([key for key, _ in param_list])
    with with_db_session(engine_md) as session:
        table = session.execute('SELECT wind_code, ipo_date, delist_date FROM wind_stock_info')
        stock_date_dic = {wind_code: (ipo_date, delist_date if delist_date > UN_AVAILABLE_DATE else None) for
                          wind_code, ipo_date, delist_date in table.fetchall()}
    print(len(stock_date_dic))
    DATE_LIST = [datetime.strptime('2010-12-31', STR_FORMAT_DATE).date(),
                 datetime.strptime('2011-12-31', STR_FORMAT_DATE).date(),
                 datetime.strptime('2012-12-31', STR_FORMAT_DATE).date(),
                 datetime.strptime('2013-12-31', STR_FORMAT_DATE).date(),
                 datetime.strptime('2014-12-31', STR_FORMAT_DATE).date(),
                 datetime.strptime('2015-12-31', STR_FORMAT_DATE).date(),
                 ]
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dic_exdate_df_list = []
    for rep_date in DATE_LIST:
        rep_date_str = rep_date.strftime('%Y%m%d')
        stock_list = [s for s, date_pair in stock_date_dic.items() if
                      date_pair[0] < rep_date and (rep_date < date_pair[1] if date_pair[1] is not None else True)]
        dic_exdate_df = invoker.wss(stock_list, param)
        dic_exdate_df_list.append(dic_exdate_df)

    dic_exdate_df_all = pd.concat(dic_exdate_df_list)
    dic_exdate_df_all.index.rename('wind_code', inplace=True)
    dic_exdate_df_all.drop_duplicates(inplace=True)
    dic_exdate_df_all['DIV_EXDATE'] = dic_exdate_df_all['DIV_EXDATE'].apply(str_date)
    dic_exdate_df_all['DIV_PAYDATE'] = dic_exdate_df_all['DIV_PAYDATE'].apply(str_date)
    dic_exdate_df_all['DIV_IMPDATE'] = dic_exdate_df_all['DIV_IMPDATE'].apply(str_date)
    dic_exdate_df_all['DIV_RECORDDATE'] = dic_exdate_df_all['DIV_RECORDDATE'].apply(str_date)
    dic_exdate_df_all['DIV_PREDISCLOSUREDATE'] = dic_exdate_df_all['DIV_PREDISCLOSUREDATE'].apply(str_date)
    dic_exdate_df_all['DIV_PRELANDATE'] = dic_exdate_df_all['DIV_PRELANDATE'].apply(str_date)
    dic_exdate_df_all['DIV_SMTGDATE'] = dic_exdate_df_all['DIV_SMTGDATE'].apply(str_date)
    dic_exdate_df_all['DIV_TRDDATESHAREB'] = dic_exdate_df_all['DIV_TRDDATESHAREB'].apply(str_date)

    condition = ~(dic_exdate_df_all['DIV_EXDATE'].apply(lambda x: x is None) &
                  dic_exdate_df_all['DIV_PAYDATE'].apply(lambda x: x is None) &
                  dic_exdate_df_all['DIV_IMPDATE'].apply(lambda x: x is None) &
                  dic_exdate_df_all['DIV_RECORDDATE'].apply(lambda x: x is None)
                  )
    dic_exdate_df_available = dic_exdate_df_all[condition]
    bunch_insert_on_duplicate_update(dic_exdate_df_available, table_name, engine_md, dtype=dtype)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])
def import_stock_hk_fin_quarterly(chain_param=None,
                                  ths_code_set: set = None,
                                  begin_time=None):
    """
    通过date_serise接口将历史数据保存到 import_stock_hk_fin
    该数据作为 为季度获取
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_stock_hk_fin'
    info_table_name = 'ifind_stock_hk_info'
    # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks
    # jsonparam='2013,100,OC;2013,100,OC;OC,101'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:Q', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 0:
                break

            # 大于阀值有开始插入
            if data_count >= 2000:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                 engine_md, DTYPE_STOCK_HK_FIN)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md, DTYPE_STOCK_HK_FIN)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Example #20
0
def import_future_daily(chain_param=None, wind_code_set=None, begin_time=None):
    """
    更新期货合约日级别行情信息
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = "wind_future_daily"
    logger.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    param_list = [("open", DOUBLE), ("high", DOUBLE), ("low", DOUBLE),
                  ("close", DOUBLE), ("volume", DOUBLE), ("amt", DOUBLE),
                  ("dealnum", DOUBLE), ("settle", DOUBLE), ("oi", DOUBLE),
                  ("st_stock", DOUBLE), ('position', DOUBLE),
                  ('instrument_id', String(20)), (
                      'trade_date',
                      Date,
                  )]
    wind_indictor_str = ",".join([key for key, _ in param_list[:10]])

    if has_table:
        sql_str = """
            select wind_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM
            (
            select fi.wind_code, ifnull(trade_date_max_1, ipo_date) date_frm, 
                lasttrade_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            from wind_future_info fi 
            left outer join
                (select wind_code, adddate(max(trade_date),1) trade_date_max_1 from {table_name} group by wind_code) wfd
            on fi.wind_code = wfd.wind_code
            ) tt
            where date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) 
            -- and subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) 
            order by wind_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT wind_code, date_frm,
                if(lasttrade_date<end_date,lasttrade_date, end_date) date_to
            FROM
            (
                SELECT info.wind_code,ipo_date date_frm, lasttrade_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_future_info info
            ) tt
            WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date)
            ORDER BY wind_code;
         """
        logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        table = session.execute(sql_str)
        # 获取date_from,date_to,将date_from,date_to做为value值
        future_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }

    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list = []
    data_len = len(future_date_dic)
    try:
        logger.info("%d future instrument will be handled", data_len)
        for num, (wind_code, (date_frm,
                              date_to)) in enumerate(future_date_dic.items()):
            # 暂时只处理 RU 期货合约信息
            # if wind_code.find('RU') == -1:
            #     continue
            if date_frm > date_to:
                continue
            date_frm_str = date_frm.strftime(STR_FORMAT_DATE)
            date_to_str = date_to.strftime(STR_FORMAT_DATE)
            logger.info('%d/%d) get %s between %s and %s', num, data_len,
                        wind_code, date_frm_str, date_to_str)
            # data_df = wsd_cache(w, wind_code, "open,high,low,close,volume,amt,dealnum,settle,oi,st_stock",
            #                         date_frm, date_to, "")
            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str,
                                      date_frm_str, date_to_str, "")
            except APIError as exp:
                logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code)
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, wind_code, date_frm_str, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s', num,
                        data_len, data_df.shape[0], wind_code, date_frm_str,
                        date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df.rename(columns={c: str.lower(c)
                                    for c in data_df.columns},
                           inplace=True)
            data_df.rename(columns={'oi': 'position'}, inplace=True)
            data_df['instrument_id'] = wind_code.split('.')[0]
            data_df_list.append(data_df)
            # 仅仅调试时使用
            if DEBUG and len(data_df_list) >= 1:
                break
    finally:
        data_df_count = len(data_df_list)
        if data_df_count > 0:
            logger.info('merge data with %d df', data_df_count)
            data_df = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df,
                                                          table_name,
                                                          engine_md,
                                                          dtype=dtype)
            logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
        else:
            logger.info("更新 %s 结束 0 条记录被更新", table_name)
def import_stock_quertarly(chain_param=None, wind_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    logging.info("更新 wind_fina_indicator 开始")
    table_name = 'wind_fina_indicator'
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   wind_stock_info info 
               LEFT OUTER JOIN
                   (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) quertarly
               ON info.wind_code = quertarly.wind_code
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code;""".format(table_name=table_name)
    else:
        logger.warning('wind_fina_indicator 不存在,仅使用 wind_stock_info 表进行计算日期范围')
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ipo_date date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM wind_stock_info info 
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code"""
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        stock_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    # 获取股票量价等行情数据
    param_list = [
        ('roic_ttm', DOUBLE),
        ('yoyprofit', DOUBLE),
        ('ebit', DOUBLE),
        ('ebit2', DOUBLE),
        ('ebit2_ttm', DOUBLE),
        ('surpluscapitalps', DOUBLE),
        ('undistributedps', DOUBLE),
        ('stm_issuingdate', DOUBLE),
    ]

    # 获取参数列表
    wind_indictor_str = ",".join(key for key, _ in param_list)
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list = []
    logger.info('%d stocks will been import into wind_stock_quertarly',
                len(stock_date_dic))

    try:
        for stock_num, (wind_code,
                        (date_from,
                         date_to)) in enumerate(stock_date_dic.items()):
            # 获取股票量价等行情数据
            # w.wsd("002122.SZ", "roic_ttm,yoyprofit,ebit,ebit2,ebit2_ttm,surpluscapitalps,undistributedps,stm_issuingdate", "2012-12-31", "2017-12-06", "unit=1;rptType=1;Period=Q")
            data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                  date_to, "unit=1;rptType=1;Period=Q")
            if data_df is None:
                logger.warning('%d) %s has no data during %s %s', stock_num,
                               wind_code, date_from, date_to)
                continue
            data_df.rename(
                columns={c: str(c).lower()
                         for c in data_df.columns},
                inplace=True)
            # 清理掉期间全空的行
            for trade_date in list(data_df.index[:10]):
                is_all_none = data_df.loc[trade_date].apply(
                    lambda x: x is None).all()
                if is_all_none:
                    logger.warning("%s %s 数据全部为空", wind_code, trade_date)
                    data_df.drop(trade_date, inplace=True)
            logger.info('%d) %d data of %s between %s and %s', stock_num,
                        data_df.shape[0], wind_code, date_from, date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df_list.append(data_df)
            if DEBUG and len(data_df_list) > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            bunch_insert_on_duplicate_update(data_df_all,
                                             table_name,
                                             engine_md,
                                             dtype=dtype)
            logging.info("更新 wind_stock_quertarly 结束 %d 条信息被更新",
                         data_df_all.shape[0])
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Example #22
0
def import_future_info(chain_param=None):
    """
    更新期货合约列表信息
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = "wind_future_info"
    has_table = engine_md.has_table(table_name)
    logger.info("更新 %s 开始", table_name)
    # 获取已存在合约列表
    if has_table:
        sql_str = 'select wind_code, ipo_date from {table_name}'.format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            table = session.execute(sql_str)
            wind_code_ipo_date_dic = dict(table.fetchall())
    else:
        wind_code_ipo_date_dic = {}

    # 通过wind获取合约列表
    # w.start()
    # 初始化服务器接口,用于下载万得数据
    future_sectorid_dic_list = [
        #中金所期货合约
        # {'subject_name': 'CFE 沪深300', 'regex': r"IF\d{4}\.CFE",
        #  'sectorid': 'a599010102000000', 'date_establish': '2010-4-16'},
        # {'subject_name': 'CFE 上证50', 'regex': r"IH\d{4}\.CFE",
        #  'sectorid': '1000014871000000', 'date_establish': '2015-4-16'},
        # {'subject_name': 'CFE 中证500', 'regex': r"IC\d{4}\.CFE",
        #  'sectorid': '1000014872000000', 'date_establish': '2015-4-16'},
        # {'subject_name': '2年期国债', 'regex': r"TS\d{4}\.CFE",
        #  'sectorid': '1000014880000000', 'date_establish': '2018-08-17'},
        # {'subject_name': '5年期国债', 'regex': r"TF\d{4}\.CFE",
        #  'sectorid': '1000010299000000', 'date_establish': '2013-09-06'},
        # {'subject_name': '10年期国债', 'regex': r"T\d{4}\.CFE",
        #  'sectorid': '1000014874000000', 'date_establish': '2015-03-20'},
        #中金所指数
        {
            'subject_name': 'CFE 沪深300',
            'regex': r"IF\.CFE",
            'sectorid': 'a599010102000000',
            'date_establish': '2010-4-16'
        },
        {
            'subject_name': 'CFE 上证50',
            'regex': r"IH\.CFE",
            'sectorid': '1000014871000000',
            'date_establish': '2015-4-16'
        },
        {
            'subject_name': 'CFE 中证500',
            'regex': r"IC\.CFE",
            'sectorid': '1000014872000000',
            'date_establish': '2015-4-16'
        },
        {
            'subject_name': '2年期国债',
            'regex': r"TS\.CFE",
            'sectorid': '1000014880000000',
            'date_establish': '2018-08-17'
        },
        {
            'subject_name': '5年期国债',
            'regex': r"TF\.CFE",
            'sectorid': '1000010299000000',
            'date_establish': '2013-09-06'
        },
        {
            'subject_name': '10年期国债',
            'regex': r"T\.CFE",
            'sectorid': '1000014874000000',
            'date_establish': '2015-03-20'
        },

        # 大连商品交易所合约
        # {'subject_name': 'DCE 焦炭', 'regex': r"J\d{4}\.DCE",
        #  'sectorid': '1000002976000000', 'date_establish': '2011-04-15'},
        # {'subject_name': 'DCE 焦煤', 'regex': r"JM\d{4}\.DCE",
        #  'sectorid': '1000009338000000', 'date_establish': '2013-03-22'},
        # {'subject_name': '铁矿石', 'regex': r"I\d{4}\.DCE",
        #  'sectorid': '1000006502000000', 'date_establish': '2013-10-18'},
        # {'subject_name': '豆粕', 'regex': r"M\d{4}\.DCE",
        #  'sectorid': 'a599010304000000', 'date_establish': '2000-07-17'},
        # {'subject_name': '豆油', 'regex': r"Y\d{4}\.DCE",
        #  'sectorid': 'a599010306000000', 'date_establish': '2006-01-09'},
        # {'subject_name': '棕榈油', 'regex': r"P\d{4}\.DCE",
        #  'sectorid': 'a599010307000000', 'date_establish': '2007-10-29'},
        # {'subject_name': '豆一', 'regex': r"A\d{4}\.DCE",
        #  'sectorid': 'a599010302000000', 'date_establish': '2004-07-15'},
        # {'subject_name': '豆二', 'regex': r"B\d{4}\.DCE",
        #  'sectorid': 'a599010303000000', 'date_establish': '2004-12-22'},
        # {'subject_name': '玉米', 'regex': r"C\d{4}\.DCE",
        #  'sectorid': 'a599010305000000', 'date_establish': '2004-09-22'},
        # {'subject_name': '玉米淀粉', 'regex': r"CS\d{4}\.DCE",
        #  'sectorid': '1000011469000000', 'date_establish': '2014-12-19'},
        # {'subject_name': '鸡蛋', 'regex': r"JD\d{4}\.DCE",
        #  'sectorid': '1000011464000000', 'date_establish': '2013-11-08'},
        # {'subject_name': '线型低密度聚乙烯', 'regex': r"L\d{4}\.DCE",
        #  'sectorid': 'a599010308000000', 'date_establish': '2007-07-31'},
        # {'subject_name': '聚氯乙烯', 'regex': r"V\d{4}\.DCE",
        #  'sectorid': 'a599010309000000', 'date_establish': '2009-05-25'},
        # {'subject_name': '聚丙烯', 'regex': r"PP\d{4}\.DCE",
        #  'sectorid': '1000011468000000', 'date_establish': '2014-02-28'},
        #大连指数
        {
            'subject_name': 'DCE 焦炭',
            'regex': r"J\.DCE",
            'sectorid': '1000002976000000',
            'date_establish': '2011-04-15'
        },
        {
            'subject_name': 'DCE 焦煤',
            'regex': r"JM\.DCE",
            'sectorid': '1000009338000000',
            'date_establish': '2013-03-22'
        },
        {
            'subject_name': '铁矿石',
            'regex': r"I\.DCE",
            'sectorid': '1000006502000000',
            'date_establish': '2013-10-18'
        },
        {
            'subject_name': '豆粕',
            'regex': r"M\.DCE",
            'sectorid': 'a599010304000000',
            'date_establish': '2000-07-17'
        },
        {
            'subject_name': '豆油',
            'regex': r"Y\.DCE",
            'sectorid': 'a599010306000000',
            'date_establish': '2006-01-09'
        },
        {
            'subject_name': '棕榈油',
            'regex': r"P\.DCE",
            'sectorid': 'a599010307000000',
            'date_establish': '2007-10-29'
        },
        {
            'subject_name': '豆一',
            'regex': r"A\.DCE",
            'sectorid': 'a599010302000000',
            'date_establish': '2004-07-15'
        },
        {
            'subject_name': '豆二',
            'regex': r"B\.DCE",
            'sectorid': 'a599010303000000',
            'date_establish': '2004-12-22'
        },
        {
            'subject_name': '玉米',
            'regex': r"C\.DCE",
            'sectorid': 'a599010305000000',
            'date_establish': '2004-09-22'
        },
        {
            'subject_name': '玉米淀粉',
            'regex': r"CS\.DCE",
            'sectorid': '1000011469000000',
            'date_establish': '2014-12-19'
        },
        {
            'subject_name': '鸡蛋',
            'regex': r"JD\.DCE",
            'sectorid': '1000011464000000',
            'date_establish': '2013-11-08'
        },
        {
            'subject_name': '线型低密度聚乙烯',
            'regex': r"L\.DCE",
            'sectorid': 'a599010308000000',
            'date_establish': '2007-07-31'
        },
        {
            'subject_name': '聚氯乙烯',
            'regex': r"V\.DCE",
            'sectorid': 'a599010309000000',
            'date_establish': '2009-05-25'
        },
        {
            'subject_name': '聚丙烯',
            'regex': r"PP\.DCE",
            'sectorid': '1000011468000000',
            'date_establish': '2014-02-28'
        },
        #上海期货交易所合约
        # {'subject_name': '天然橡胶', 'regex': r"RU\d{4}\.SHF",
        #  'sectorid': 'a599010208000000', 'date_establish': '1995-06-01'},
        # {'subject_name': '铜', 'regex': r"CU\d{4}\.SHF",
        #  'sectorid': 'a599010202000000', 'date_establish': '1995-05-01'},
        # {'subject_name': '铝', 'regex': r"AL\d{4}\.SHF",
        #  'sectorid': 'a599010203000000', 'date_establish': '1995-05-01'},
        # {'subject_name': '锌', 'regex': r"ZN\d{4}\.SHF",
        #  'sectorid': 'a599010204000000', 'date_establish': '2007-03-26'},
        # {'subject_name': '铅', 'regex': r"PB\d{4}\.SHF",
        #  'sectorid': '1000002892000000', 'date_establish': '2011-03-24'},
        # {'subject_name': '镍', 'regex': r"NI\d{4}\.SHF",
        #  'sectorid': '1000011457000000', 'date_establish': '2015-03-27'},
        # {'subject_name': '锡', 'regex': r"SN\d{4}\.SHF",
        #  'sectorid': '1000011458000000', 'date_establish': '2015-03-27'},
        # {'subject_name': 'SHFE 黄金', 'regex': r"AU\d{4}\.SHF",
        #  'sectorid': 'a599010205000000', 'date_establish': '2008-01-09'},
        # {'subject_name': 'SHFE 沪银', 'regex': r"AG\d{4}\.SHF",
        #  'sectorid': '1000006502000000', 'date_establish': '2012-05-10'},
        # {'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\d{4}\.SHF",
        #  'sectorid': 'a599010206000000', 'date_establish': '2009-03-27'},
        # {'subject_name': 'SHFE 热卷', 'regex': r"HC\d{4}\.SHF",
        #  'sectorid': '1000011455000000', 'date_establish': '2014-03-21'},
        # {'subject_name': 'SHFE 沥青', 'regex': r"BU\d{4}\.SHF",
        #  'sectorid': '1000011013000000', 'date_establish': '2013-10-09'},
        # {'subject_name': '原油', 'regex': r"SC\d{4}\.SHF",
        #  'sectorid': '1000011463000000', 'date_establish': '2018-03-26'},
        #上海期货交易所指数
        {
            'subject_name': '天然橡胶',
            'regex': r"RU\.SHF",
            'sectorid': 'a599010208000000',
            'date_establish': '1995-06-01'
        },
        {
            'subject_name': '铜',
            'regex': r"CU\.SHF",
            'sectorid': 'a599010202000000',
            'date_establish': '1995-05-01'
        },
        {
            'subject_name': '铝',
            'regex': r"AL\.SHF",
            'sectorid': 'a599010203000000',
            'date_establish': '1995-05-01'
        },
        {
            'subject_name': '锌',
            'regex': r"ZN\.SHF",
            'sectorid': 'a599010204000000',
            'date_establish': '2007-03-26'
        },
        {
            'subject_name': '铅',
            'regex': r"PB\.SHF",
            'sectorid': '1000002892000000',
            'date_establish': '2011-03-24'
        },
        {
            'subject_name': '镍',
            'regex': r"NI\.SHF",
            'sectorid': '1000011457000000',
            'date_establish': '2015-03-27'
        },
        {
            'subject_name': '锡',
            'regex': r"SN\.SHF",
            'sectorid': '1000011458000000',
            'date_establish': '2015-03-27'
        },
        {
            'subject_name': 'SHFE 黄金',
            'regex': r"AU\.SHF",
            'sectorid': 'a599010205000000',
            'date_establish': '2008-01-09'
        },
        {
            'subject_name': 'SHFE 沪银',
            'regex': r"AG\.SHF",
            'sectorid': '1000006502000000',
            'date_establish': '2012-05-10'
        },
        {
            'subject_name': 'SHFE 螺纹钢',
            'regex': r"RB\.SHF",
            'sectorid': 'a599010206000000',
            'date_establish': '2009-03-27'
        },
        {
            'subject_name': 'SHFE 热卷',
            'regex': r"HC\.SHF",
            'sectorid': '1000011455000000',
            'date_establish': '2014-03-21'
        },
        {
            'subject_name': 'SHFE 沥青',
            'regex': r"BU\\.SHF",
            'sectorid': '1000011013000000',
            'date_establish': '2013-10-09'
        },
        {
            'subject_name': '原油',
            'regex': r"SC\.SHF",
            'sectorid': '1000011463000000',
            'date_establish': '2018-03-26'
        },

        #郑商所合约
        # {'subject_name': '白糖', 'regex': r"SR\d{3,4}\.CZC",
        #  'sectorid': 'a599010405000000', 'date_establish': '2006-01-06'},
        # {'subject_name': '棉花', 'regex': r"CF\d{3,4}\.CZC",
        #  'sectorid': 'a599010404000000', 'date_establish': '2004-06-01'},
        # {'subject_name': '动力煤', 'regex': r"(ZC|TC)\d{3,4}\.CZC",
        #  'sectorid': '1000011012000000', 'date_establish': '2013-09-26'},
        # {'subject_name': '玻璃', 'regex': r"FG\d{3,4}\.CZC",
        #  'sectorid': '1000008549000000', 'date_establish': '2013-12-03'},
        # {'subject_name': '精对苯二甲酸', 'regex': r"TA\d{3,4}\.CZC",
        #  'sectorid': 'a599010407000000', 'date_establish': '2006-12-18'},
        # {'subject_name': '甲醇', 'regex': r"(ME|MA)\d{3,4}\.CZC",
        #  'sectorid': '1000005981000000', 'date_establish': '2011-10-28'},
        # {'subject_name': '菜籽油', 'regex': r"OI\d{3,4}\.CZC",
        #  'sectorid': 'a599010408000000', 'date_establish': '2007-06-08'},
        # {'subject_name': '菜籽粕', 'regex': r"RM\d{3,4}\.CZC",
        #  'sectorid': '1000008622000000', 'date_establish': '2012-12-28'},
        #郑商所指数
        {
            'subject_name': '白糖',
            'regex': r"SR\.CZC",
            'sectorid': 'a599010405000000',
            'date_establish': '2006-01-06'
        },
        {
            'subject_name': '棉花',
            'regex': r"CF\.CZC",
            'sectorid': 'a599010404000000',
            'date_establish': '2004-06-01'
        },
        {
            'subject_name': '动力煤',
            'regex': r"(ZC|TC)\.CZC",
            'sectorid': '1000011012000000',
            'date_establish': '2013-09-26'
        },
        {
            'subject_name': '玻璃',
            'regex': r"FG\.CZC",
            'sectorid': '1000008549000000',
            'date_establish': '2013-12-03'
        },
        {
            'subject_name': '精对苯二甲酸',
            'regex': r"TA\.CZC",
            'sectorid': 'a599010407000000',
            'date_establish': '2006-12-18'
        },
        {
            'subject_name': '甲醇',
            'regex': r"(ME|MA)\.CZC",
            'sectorid': '1000005981000000',
            'date_establish': '2011-10-28'
        },
        {
            'subject_name': '菜籽油',
            'regex': r"OI\.CZC",
            'sectorid': 'a599010408000000',
            'date_establish': '2007-06-08'
        },
        {
            'subject_name': '菜籽粕',
            'regex': r"RM\.CZC",
            'sectorid': '1000008622000000',
            'date_establish': '2012-12-28'
        },
    ]
    wind_code_set = set()
    ndays_per_update = 60
    # 获取接口参数以及参数列表
    col_name_param_list = [
        ("ipo_date", Date),
        ("sec_name", String(50)),
        ("sec_englishname", String(200)),
        ("exch_eng", String(200)),
        ("lasttrade_date", Date),
        ("lastdelivery_date", Date),
        ("dlmonth", String(20)),
        ("lprice", DOUBLE),
        ("sccode", String(20)),
        ("margin", DOUBLE),
        ("punit", String(200)),
        ("changelt", DOUBLE),
        ("mfprice", DOUBLE),
        ("contractmultiplier", DOUBLE),
        ("ftmargins", String(100)),
        ("trade_code", String(200)),
    ]
    wind_indictor_str = ",".join(col_name
                                 for col_name, _ in col_name_param_list)
    dtype = {key: val for key, val in col_name_param_list}
    dtype['wind_code'] = String(20)
    # 获取历史期货合约列表信息
    logger.info("获取历史期货合约列表信息")
    for future_sectorid_dic in future_sectorid_dic_list:
        subject_name = future_sectorid_dic['subject_name']
        sector_id = future_sectorid_dic['sectorid']
        regex_str = future_sectorid_dic['regex']
        date_establish = datetime.strptime(
            future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date()
        date_since = get_date_since(wind_code_ipo_date_dic, regex_str,
                                    date_establish)
        date_yestoday = date.today() - timedelta(days=1)
        logger.info("%s[%s] %s ~ %s", subject_name, sector_id, date_since,
                    date_yestoday)
        while date_since <= date_yestoday:
            date_since_str = date_since.strftime(STR_FORMAT_DATE)
            future_info_df = invoker.wset(
                "sectorconstituent",
                "date=%s;sectorid=%s" % (date_since_str, sector_id))
            data_count = 0 if future_info_df is None else future_info_df.shape[
                0]
            logger.info("subject_name=%s[%s] %s 返回 %d 条数据", subject_name,
                        sector_id, date_since_str, data_count)
            if data_count > 0:
                wind_code_set |= set(future_info_df['wind_code'])

            if date_since >= date_yestoday:
                break
            else:
                date_since += timedelta(days=ndays_per_update)
                if date_since > date_yestoday:
                    date_since = date_yestoday

    # 获取合约列表
    wind_code_list = [
        wc for wc in wind_code_set if wc not in wind_code_ipo_date_dic
    ]
    # 获取合约基本信息
    # w.wss("AU1706.SHF,AG1612.SHF,AU0806.SHF", "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code")
    if len(wind_code_list) > 0:
        logger.info("%d wind_code will be invoked by wss, wind_code_list=%s",
                    len(wind_code_list), wind_code_list)
        future_info_df = invoker.wss(wind_code_list, wind_indictor_str)
        future_info_df['MFPRICE'] = future_info_df['MFPRICE'].apply(
            mfprice_2_num)
        future_info_count = future_info_df.shape[0]

        future_info_df.rename(
            columns={c: str.lower(c)
                     for c in future_info_df.columns},
            inplace=True)
        future_info_df.index.rename('wind_code', inplace=True)
        future_info_df.reset_index(inplace=True)
        data_count = bunch_insert_on_duplicate_update(future_info_df,
                                                      table_name,
                                                      engine_md,
                                                      dtype=dtype)
        logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logger.info("更新 wind_future_info 结束 %d 条记录被更新", future_info_count)
        update_from_info_table(table_name)
Example #23
0
def import_tushare_stock_index_weight(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_weight'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily

    sql_str = """SELECT ts_code index_code,trade_date trade_date_list FROM md_integration.tushare_stock_index_daily_md """
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {}
        # for ts_code, trade_date in table.fetchall():
        #     code_date_range_dic.setdefault(ts_code, []).append(trade_date)
        for index_code, trade_date_list in table.fetchall():
            code_date_range_dic.setdefault(index_code,
                                           []).append(trade_date_list)

    data_len = len(code_date_range_dic)
    logger.info(
        '%d index weight will been import into tushare_stock_index_weight table',
        data_len)
    # 将data_df数据,添加到data_df_list
    Cycles = 1
    try:
        for num, (index_code,
                  trade_date_list) in enumerate(code_date_range_dic.items(),
                                                start=1):
            trade_date_list_len = len(trade_date_list)
            for i, trade_date in enumerate(trade_date_list):
                # trade_date=trade_date_list[i]
                logger.debug('%d/%d) %d/%d) %s [%s]', num, data_len, i,
                             trade_date_list_len, index_code, trade_date)
                data_df = invoke_index_weight(index_code=index_code,
                                              trade_date=trade_date)
                if len(data_df) > 0:
                    data_count = bunch_insert_on_duplicate_update(
                        data_df, table_name, engine_md,
                        DTYPE_TUSHARE_STOCK_INDEX_WEIGHT)
                    logging.info("%s 更新 %s  %d 条信息被更新", trade_date, table_name,
                                 data_count)
                else:
                    break
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md,
                DTYPE_TUSHARE_STOCK_INDEX_WEIGHT)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
def import_stock_daily(chain_param=None, wind_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = 'wind_stock_daily'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('adjfactor', DOUBLE),
        ('volume', DOUBLE),
        ('amt', DOUBLE),
        ('pct_chg', DOUBLE),
        ('maxupordown', Integer),
        ('swing', DOUBLE),
        ('turn', DOUBLE),
        ('free_turn', DOUBLE),
        ('trade_status', String(30)),
        ('susp_days', Integer),
        ('total_shares', DOUBLE),
        ('free_float_shares', DOUBLE),
        ('ev2_to_ebitda', DOUBLE),
    ]
    wind_indictor_str = ",".join([key for key, _ in param_list])
    rename_col_dic = {key.upper(): key.lower() for key, _ in param_list}
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有wind_stock_daily
    if has_table:
        sql_str = """
            SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
            SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                wind_stock_info info 
            LEFT OUTER JOIN
                (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily
            ON info.wind_code = daily.wind_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY wind_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.wind_code, ipo_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY wind_code"""
        logger.warning('%s 不存在,仅使用 wind_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list = []
    data_len = len(code_date_range_dic)
    logger.info('%d stocks will been import into wind_stock_daily', data_len)
    # 将data_df数据,添加到data_df_list
    try:
        for num, (wind_code,
                  (date_from,
                   date_to)) in enumerate(code_date_range_dic.items(),
                                          start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, wind_code,
                         date_from, date_to)
            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                      date_to)
            except APIError as exp:
                logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code)
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, wind_code, date_from, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s', num,
                        data_len, data_df.shape[0], wind_code, date_from,
                        date_to)
            data_df['wind_code'] = wind_code
            data_df_list.append(data_df)
            # 仅调试使用
            if DEBUG and len(data_df_list) > 2:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_df_all.index.rename('trade_date', inplace=True)
            data_df_all.reset_index(inplace=True)
            data_df_all.rename(columns=rename_col_dic, inplace=True)
            # data_df_all.set_index(['wind_code', 'trade_date'], inplace=True)
            # data_df_all.to_sql('wind_stock_daily', engine_md, if_exists='append', dtype=dtype)
            # logging.info("更新 wind_stock_daily 结束 %d 条信息被更新", data_df_all.shape[0])
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Example #25
0
def import_index_daily_his(chain_param=None,
                           ths_code_set: set = None,
                           begin_time=None):
    """
    通过history接口将历史数据保存到 ifind_index_daily_his
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期
    :return:
    """
    table_name = 'ifind_index_daily_his'
    if begin_time is not None and type(begin_time) == date:
        begin_time = str_2_date(begin_time)
    # THS_HistoryQuotes('600006.SH,600010.SH',
    # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf',
    # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
    # '2018-06-30','2018-07-30')
    json_indicator, _ = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_index_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code;"""
    else:
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name)
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code"""

    # 计算每只股票需要获取日线数据的日期区间
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_to is not None and date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code, json_indicator,
                'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
def add_data_2_ckdvp(col_name,
                     param,
                     wind_code_set: set = None,
                     begin_time=None):
    """判断表格是否存在,存在则进行表格的关联查询
    :param col_name: 增加的列属性名
    :param param: 参数
    :param wind_code_set: 默认为None
    :param begin_time: 默认为None
    :return:
    """
    table_name = 'wind_ckdvp_stock'
    all_finished = False
    has_table = engine_md.has_table('wind_ckdvp_stock')
    if has_table:
        # 执行语句,表格数据联立
        sql_str = """
            select wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                select info.wind_code,
                    (ipo_date) date_frm, delist_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                from wind_stock_info info
                left outer join
                    (select wind_code, adddate(max(time),1) from wind_ckdvp_stock
                    where wind_ckdvp_stock.key='{0}' and param='{1}' group by wind_code
                    ) daily
                on info.wind_code = daily.wind_code
            ) tt
            where date_frm <= if(delist_date<end_date,delist_date, end_date)
            order by wind_code""".format(col_name, param)
    else:
        logger.warning('wind_ckdvp_stock 不存在,仅使用 wind_stock_info 表进行计算日期范围')
        sql_str = """
            SELECT wind_code, date_frm,
                if(delist_date<end_date,delist_date, end_date) date_to
            FROM
            (
                SELECT info.wind_code,ipo_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_stock_info info
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date)
            ORDER BY wind_code"""
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }

        # 设置 dtype
        dtype = {
            'wind_code': String(20),
            'key': String(80),
            'time': Date,
            'value': String(80),
            'param': String(80),
        }
        data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
            code_date_range_dic)
        try:
            for num, (wind_code,
                      (date_from,
                       date_to)) in enumerate(code_date_range_dic.items(),
                                              start=1):
                logger.debug('%d/%d) %s [%s - %s]', num, code_count, wind_code,
                             date_from, date_to)
                data_df = invoker.wsd(wind_code, col_name, date_from, date_to,
                                      param)
                if data_df is not None and data_df.shape[0] > 0:
                    # 对我们的表格进行规范整理,整理我们的列名,索引更改
                    data_df['key'] = col_name
                    data_df['param'] = param
                    data_df['wind_code'] = wind_code
                    data_df.rename(columns={col_name.upper(): 'value'},
                                   inplace=True)
                    data_df.index.rename('time', inplace=True)
                    data_df.reset_index(inplace=True)
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 大于阀值有开始插入
                if data_count >= 10000:
                    tot_data_df = pd.concat(data_df_list)
                    # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                    data_count = bunch_insert_on_duplicate_update(
                        tot_data_df, table_name, engine_md, dtype)
                    tot_data_count += data_count
                    data_df_list, data_count = [], 0

                # 仅调试使用
                if DEBUG and len(data_df_list) > 1:
                    break

                all_finished = True
        finally:
            if data_count > 0:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    tot_data_df, table_name, engine_md, dtype)
                tot_data_count += data_count

            if not has_table and engine_md.has_table(table_name):
                create_pk_str = """ALTER TABLE {table_name}
                    CHANGE COLUMN `wind_code` `wind_code` VARCHAR(20) NOT NULL ,
                    CHANGE COLUMN `time` `time` DATE NOT NULL ,
                    CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
                    CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
                    ADD PRIMARY KEY (`wind_code`, `time`, `key`, `param`)""".format(
                    table_name=table_name)
                with with_db_session(engine_md) as session:
                    session.execute(create_pk_str)

            logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
        return all_finished
Example #27
0
def merge_stock_info():
    """
    合并 wind,ifind 数据到对应名称的表中
    :return:
    """
    table_name = 'stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    # ifind_model = TABLE_MODEL_DIC[ifind_table_name]
    # wind_model = TABLE_MODEL_DIC[wind_table_name]
    # with with_db_session(engine_md) as session:
    #     session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
    ifind_sql_str = "select * from {table_name}".format(
        table_name=ifind_table_name)
    wind_sql_str = "select * from {table_name}".format(
        table_name=wind_table_name)
    ifind_df = pd.read_sql(ifind_sql_str, engine_md)  # , index_col='ths_code'
    wind_df = pd.read_sql(wind_sql_str, engine_md)  # , index_col='wind_code'
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on='ths_code',
                         right_on='wind_code',
                         indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'sec_name': (String(20), prefer_left, {
            'left_key': 'ths_stock_short_name_stock',
            'right_key': 'sec_name'
        }),
        'cn_name': (String(100), get_value, {
            'key': 'ths_corp_cn_name_stock'
        }),
        'en_name': (String(100), get_value, {
            'key': 'ths_corp_name_en_stock'
        }),
        'delist_date': (Date, prefer_left, {
            'left_key': 'ths_delist_date_stock',
            'right_key': 'delist_date'
        }),
        'ipo_date': (Date, prefer_left, {
            'left_key': 'ths_ipo_date_stock',
            'right_key': 'ipo_date'
        }),
        'pre_name': (Text, prefer_left, {
            'left_key': 'ths_corp_name_en_stock',
            'right_key': 'prename'
        }),
        'established_date': (Date, get_value, {
            'key': 'ths_established_date_stock'
        }),
        'exch_city': (String(20), get_value, {
            'key': 'exch_city'
        }),
        'exch_cn': (String(20), get_value, {
            'key': 'ths_listing_exchange_stock'
        }),
        'exch_eng': (String(20), get_value, {
            'key': 'exch_eng'
        }),
        'stock_code': (String(20), prefer_left, {
            'left_key': 'ths_stock_code_stock',
            'right_key': 'trade_code'
        }),
        'mkt': (String(20), get_value, {
            'key': 'mkt'
        }),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df
def import_wind_stock_info(chain_param=None, refresh=False):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param refresh:获取全市场股票代码及名称
    :return:
    """
    table_name = 'wind_stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    wind_indicator_param_list = [
        ('sec_name', String(20)),
        ('trade_code', String(20)),
        ('ipo_date', Date),
        ('delist_date', Date),
        ('mkt', String(20)),
        ('exch_city', String(20)),
        ('exch_eng', String(20)),
        ('prename', String(2000)),
    ]
    # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng"
    param = ",".join([key for key, _ in wind_indicator_param_list])
    # 设置 dtype
    dtype = {key: val for key, val in wind_indicator_param_list}
    dtype['wind_code'] = String(20)
    if refresh:
        date_fetch = datetime.strptime('2005-1-1', STR_FORMAT_DATE).date()
    else:
        date_fetch = date.today()
    date_end = date.today()
    stock_code_set = set()
    # 对date_fetch 进行一个判断,获取stock_code_set
    while date_fetch < date_end:
        stock_code_set_sub = get_stock_code_set(date_fetch)
        if stock_code_set_sub is not None:
            stock_code_set |= stock_code_set_sub
        date_fetch += timedelta(days=365)
    stock_code_set_sub = get_stock_code_set(date_end)
    if stock_code_set_sub is not None:
        stock_code_set |= stock_code_set_sub
    # 获取股票对应上市日期,及摘牌日期
    # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng")
    stock_code_list = list(stock_code_set)
    seg_count = 1000
    stock_info_df_list = []

    # 进行循环遍历获取stock_code_list_sub
    for stock_code_list_sub in split_chunk(stock_code_list, seg_count):
        # 尝试将 stock_code_list_sub 直接传递给wss,是否可行
        stock_info_df = invoker.wss(stock_code_list_sub, param)
        stock_info_df_list.append(stock_info_df)
        if DEBUG:
            break
    # 对数据表进行规范整理.整合,索引重命名
    stock_info_all_df = pd.concat(stock_info_df_list)
    stock_info_all_df.index.rename('wind_code', inplace=True)
    logging.info('%d data will be import', stock_info_all_df.shape[0])
    stock_info_all_df.reset_index(inplace=True)
    # data_list = list(stock_info_all_df.T.to_dict().values())
    # 对wind_stock_info表进行数据插入
    # sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)"
    # 事物提交执行更新
    # with with_db_session(engine_md) as session:
    #     session.execute(sql_str, data_list)
    #     data_count = session.execute('select count(*) from {table_name}').scalar()
    data_count = bunch_insert_on_duplicate_update(stock_info_all_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
Example #29
0
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_daily_md'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
            (
            SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_stock_index_basic info 
            LEFT OUTER JOIN
                (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily
            ON info.ts_code = daily.ts_code
            ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, base_date date_frm, exp_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_index_basic info 
              ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into tushare_stock_index_daily_md',
                data_len)
    # 将data_df数据,添加到data_df_list
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_index_daily(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # data_df = df
            if len(data_df) > 0:
                while try_2_date(data_df['trade_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        data_df['trade_date'].iloc[-1]), None
                    df2 = invoke_index_daily(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(data_df['trade_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2 > 0):
                        last_date_in_df_cur = try_2_date(
                            df2['trade_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            # df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning(
                                '%d/%d) %s has no data during %s %s', num,
                                data_len, ts_code, date_from, date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    else:
                        break

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break

            # 大于阀值有开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md,
                    DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
                all_data_count += data_count
                data_df_list, data_count = [], 0

                # # 数据插入数据库
                # data_df_all = data_df
                # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,
                #                                               DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
                # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                # data_df = []

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md,
                DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Example #30
0
def import_future_info(chain_param=None):
    """
    更新期货合约列表信息
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = "rqdatac_future_info"
    has_table = engine_md.has_table(table_name)
    logger.info("更新 %s 开始", table_name)
    # 获取已存在合约列表
    if has_table:
        sql_str = 'select max(listed_date) from {table_name}'.format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            listed_date = session.scalar(sql_str)
            if listed_date is None:
                has_table = False

    ndays_per_update = 60
    # 获取接口参数以及参数列表
    col_name_param_list = [
        ("order_book_id", String(20)),
        # 期货代码,期货的独特的标识符(郑商所期货合约数字部分进行了补齐。例如原有代码'ZC609'补齐之后变为'ZC1609')。主力连续合约 UnderlyingSymbol+88,例如'IF88' ;指数连续合约命名规则为 UnderlyingSymbol+99
        ("underlying_symbol", String(20)),  # 合约标的名称,例如 IF1005 的合约标的名称为'IF'
        ("market_tplus", TINYINT),  # 交易制度。'0'表示 T+0,'1'表示 T+1,往后顺推
        ("symbol", String(100)),  # 期货的简称,例如'沪深 1005'
        ("margin_rate", DOUBLE),  # 期货合约的最低保证金率
        ("maturity_date", Date),  # 期货到期日。主力连续合约与指数连续合约都为'0000-00-00'
        ("type", String(20)),  # 合约类型,'Future'
        ("trading_code", String(20)),  #
        (
            "exchange", String(10)
        ),  # 交易所,'DCE' - 大连商品交易所, 'SHFE' - 上海期货交易所,'CFFEX' - 中国金融期货交易所, 'CZCE'- 郑州商品交易所
        ("product",
         String(20)),  # 合约种类,'Commodity'-商品期货,'Index'-股指期货,'Government'-国债期货
        ("contract_multiplier", SMALLINT),  # 合约乘数,例如沪深 300 股指期货的乘数为 300.0
        ("round_lot", TINYINT),  # 期货全部为 1.0
        ("trading_hours", String(100)),  # 合约交易时间
        ("listed_date", Date),  # 期货的上市日期。主力连续合约与指数连续合约都为'0000-00-00'
        ("industry_name", String(50)),
        ("de_listed_date", Date),  # 目测与 maturity_date 相同
        ("underlying_order_book_id",
         String(20)),  # 合约标的代码,目前除股指期货(IH, IF, IC)之外的期货合约,这一字段全部为'null'
    ]
    dtype = {key: val for key, val in col_name_param_list}
    if not has_table:
        instrument_info_df = rqdatac.all_instruments(type='Future',
                                                     market='cn')
    else:
        date_yestoday = date.today() - timedelta(days=1)
        ndays_per_update = 60
        instrument_info_df = None
        for param in get_date_iter(listed_date, date_yestoday,
                                   ndays_per_update):
            if instrument_info_df is None:
                instrument_info_df = rqdatac.all_instruments(type='Future',
                                                             market='cn',
                                                             date=param)
            else:
                instrument_info_df_tmp = rqdatac.all_instruments(type='Future',
                                                                 market='cn',
                                                                 date=param)
                instrument_info_df.append(instrument_info_df_tmp)

    instrument_info_df.drop_duplicates(inplace=True)
    instrument_info_df.loc[instrument_info_df['underlying_order_book_id'] ==
                           'null', 'underlying_order_book_id'] = None
    instrument_info_df.rename(
        columns={c: str.lower(c)
                 for c in instrument_info_df.columns},
        inplace=True)
    data_count = bunch_insert_on_duplicate_update(instrument_info_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    logger.info("更新 wind_future_info 结束 %d 条记录被更新", data_count)