def import_info_table(type_name):
    """
    调用 get_all_securities 获取指定 type 的信息
    type: 'stock', 'fund', 'index', 'futures', 'etf', 'lof', 'fja', 'fjb'。types为空时返回所有股票, 不包括基金,指数和期货
    :param type_name:
    :return:
    """
    table_name = f'jq_{type_name}_info'
    logging.info("更新 %s 开始", table_name)
    # has_table = engine_md.has_table(table_name)
    param_list = [
        ('jq_code', String(20)),
        ('display_name', String(20)),
        ('name', String(20)),
        ('start_date', Date),
        ('end_date', Date),
    ]
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    # 数据提取
    # types: list: 用来过滤securities的类型, list元素可选:
    # 'stock', 'fund', 'index', 'futures', 'etf', 'lof', 'fja', 'fjb'。types为空时返回所有股票, 不包括基金,指数和期货
    # date: 日期, 一个字符串或者 [datetime.datetime]/[datetime.date] 对象,
    # 用于获取某日期还在上市的股票信息. 默认值为 None, 表示获取所有日期的股票信息
    stock_info_all_df = get_all_securities(types=type_name)
    stock_info_all_df.index.rename('jq_code', inplace=True)
    stock_info_all_df.reset_index(inplace=True)

    logging.info('%s 数据将被导入', stock_info_all_df.shape[0])
    data_count = bunch_insert(stock_info_all_df,
                              table_name=table_name,
                              dtype=dtype,
                              primary_keys=['jq_code'])
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
def import_tushare_index_basic(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_basic'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)

    fields = [
        'ts_code', 'name', 'fullname', 'market', 'publisher', 'index_type',
        'category', 'base_date', 'base_point', 'list_date', 'weight_rule',
        'desc', 'exp_date'
    ]
    market_list = list(
        ['MSCI', 'CSI', 'SSE', 'SZSE', 'CICC', 'SW', 'CNI', 'OTH'])

    for mkt in market_list:
        # trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
        data_df = invoke_index_basic(market=mkt, fields=fields)
        if len(data_df) > 0:
            data_count = bunch_insert(data_df,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_STOCK_INDEX_BASIC,
                                      primary_keys=['ts_code'])
            logging.info("%s 更新 %s 结束 %d 条信息被更新", mkt, table_name, data_count)
        else:
            logging.info("%s 无数据信息可被更新", mkt)
Beispiel #3
0
def import_fut_basic(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_future_basic'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    exchange_list = ['DCE', 'CZCE', 'SHFE', 'CFFEX', 'INE']

    try:
        for i in range(len(exchange_list)):
            exchange_name = exchange_list[i]
            data_df = invoke_fut_basic(exchange=exchange_name)
            if len(data_df) > 0:
                data_count = bunch_insert(data_df,
                                          table_name=table_name,
                                          dtype=DTYPE_TUSHARE_FUTURE_BASIC,
                                          primary_keys=['ts_code'])
                logging.info("更新 %s 期货合约基础信息结束, %d 条信息被更新", exchange_name,
                             data_count)
            else:
                logging.info("无数据信息可被更新")
    finally:

        logger.info('%s 表 数据更新完成', table_name)
Beispiel #4
0
def save_data_2_daily_table(data_new_s_list: list, table_name, dtype: dict):
    df = pd.DataFrame(data_new_s_list)
    data_count = bunch_insert(df,
                              table_name,
                              dtype=dtype,
                              primary_keys=['id', 'trade_date'])
    return data_count
Beispiel #5
0
def import_tushare_adj_factor(chain_param=None, ):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_adj_factor'
    primary_keys = ["ts_code", "trade_date"]
    logging.info("更新 %s 开始", table_name)
    # 进行表格判断,确定是否含有 table_name
    has_table = engine_md.has_table(table_name)
    # sqlite_file_name = 'eDB_adjfactor.db'
    check_sqlite_db_primary_keys(table_name, primary_keys)

    if has_table:
        sql_str = """
           select cal_date            
           FROM
            (
             select * from tushare_trade_date trddate 
             where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
           )tt
           where (is_open=1 
                  and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
           SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = [row[0] for row in table.fetchall()]

    trade_date_count, data_count_tot = len(trade_date_list), 0
    try:
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = pro.adj_factor(ts_code='', trade_date=trade_date)
            if data_df is not None and data_df.shape[0] > 0:
                data_count = bunch_insert(
                    data_df,
                    table_name=table_name,
                    dtype=DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR,
                    primary_keys=primary_keys)
                data_count_tot += data_count

                logging.info("%d/%d) %s 表 %s %d 条信息被更新", num, trade_date_count,
                             table_name, trade_date, data_count)
            else:
                logging.info("%d/%d) %s 表 %s 数据信息可被更新", num, trade_date_count,
                             table_name, trade_date)
    except:
        logger.exception("更新 %s 异常", table_name)
    finally:
        logging.info("%s 表 %d 条记录更新完成", table_name, data_count_tot)
Beispiel #6
0
def import_tushare_suspend(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_suspend'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_suspend

    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(suspend_date) FROM {table_name} ))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = pro.suspend(ts_code='', suspend_date=trade_date, resume_date='', fields='')
            if len(data_df) > 0:
                data_count = bunch_insert(data_df, table_name=table_name, dtype=DTYPE_TUSHARE_SUSPEND,
                                          primary_keys=['ts_code', 'suspend_date'])
                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新",
                             num, trade_date_list_len, trade_date, table_name, data_count)
            else:
                logging.info("%s 当日无停牌股票", trade_date_list_len)

    except:
        logger.exception('更新 %s 表异常', table_name)
Beispiel #7
0
def import_tushare_daily_basic(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_basic'
    primary_keys = ["ts_code", "trade_date"]
    logging.info("更新 %s 开始", table_name)
    check_sqlite_db_primary_keys(table_name, primary_keys)
    has_table = engine_md.has_table(table_name)
    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!
    # 比如本表是 tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM {table_name} ))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    try:
        for_count = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_daily_basic(ts_code='', trade_date=trade_date)
            if data_df is not None and data_df.shape[0] > 0:
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_BASIC,
                    primary_keys=primary_keys)

                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, for_count, trade_date, table_name, data_count)
            else:
                logging.info("%d/%d) %s 无数据信息可被更新",  num, for_count, trade_date)
    except:
        logger.exception("更新 %s 表异常", table_name)
Beispiel #8
0
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_daily_md'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
            (
            SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_stock_index_basic info 
            LEFT OUTER JOIN
                (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily
            ON info.ts_code = daily.ts_code
            ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, base_date date_frm, exp_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_index_basic info 
              ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into tushare_stock_index_daily_md',
                data_len)
    # 将data_df数据,添加到data_df_list
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_index_daily(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # data_df = df
            if data_df is not None and data_df.shape[0] > 0:
                while try_2_date(data_df['trade_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        data_df['trade_date'].iloc[-1]), None
                    df2 = invoke_index_daily(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(data_df['trade_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2 > 0):
                        last_date_in_df_cur = try_2_date(
                            df2['trade_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            # df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning(
                                '%d/%d) %s has no data during %s %s', num,
                                data_len, ts_code, date_from, date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    else:
                        break

                # 把数据攒起来
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break

            # 大于阀值有开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name=table_name,
                             dtype=DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD,
                             primary_keys=["ts_code", "trade_date"])
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD,
                                      primary_keys=["ts_code", "trade_date"])
            all_data_count += data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_moneyflow_hsgt(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_moneyflow_hsgt'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ggt_ss', DOUBLE),
        ('ggt_sz', DOUBLE),
        ('hgt', DOUBLE),
        ('sgt', DOUBLE),
        ('north_money', DOUBLE),
        ('south_money', DOUBLE),

    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE'  AND cal_date>='2014-11-17') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_moneyflow_hsgt(trade_date=trade_date)
            if len(data_df) > 0:
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=dtype, primary_keys=['trade_date'])
                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新",
                             num, trade_date_list_len, trade_date, table_name, data_count)
            else:
                logging.info("无数据信息可被更新")
    except:
        logger.exception('更新 %s 表异常', table_name)
def import_tushare_stock_fina_indicator(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_fin_indicator'
    logging.info("更新 %s 开始", table_name)
    primary_keys = ['ts_code', 'ann_date', 'end_date']
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) fina_indicator
                ON info.ts_code = fina_indicator.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, list_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    fields = 'ts_code', 'ann_date', 'end_date', 'eps', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', 'surplus_rese_ps', \
             'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', 'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', \
             'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', 'assets_turn', 'op_income', 'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', \
             'fcfe', 'current_exint', 'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', 'working_capital', 'networking_capital', 'invest_capital', \
             'retained_earnings', 'diluted2_eps', 'bps', 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', \
             'cogs_of_sales', 'expense_of_sales', 'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', 'impai_ttm', 'gc_of_gr', 'op_of_gr', \
             'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'npta', 'roic', 'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt', \
             'n_op_profit_of_ebt', 'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', 'capitalized_to_da', 'debt_to_assets', \
             'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt', \
             'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt', 'eqt_to_interestdebt', 'tangibleasset_to_debt', 'tangasset_to_intdebt', \
             'tangibleasset_to_netdebt', 'ocf_to_debt', 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', 'ebitda_to_debt', \
             'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', 'non_op_profit', 'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', \
             'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly', 'total_fa_trun', 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', \
             'q_eps', 'q_netprofit_margin', 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 'q_adminexp_to_gr', 'q_finaexp_to_gr', \
             'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_npta', 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', \
             'q_salescash_to_or', 'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', 'netprofit_yoy', 'dt_netprofit_yoy', \
             'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', \
             'q_profit_yoy', 'q_profit_qoq', 'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp'

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d 财务指标信息将被插入 tushare_stock_fin_indicator 表', data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_fina_indicator(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS),
                fields=fields)
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            if data_df is not None and len(
                    data_df) > 0 and data_df['ann_date'].iloc[-1] is not None:
                while try_2_date(data_df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last = try_2_date(
                        data_df['ann_date'].iloc[-1])
                    df2 = invoke_fina_indicator(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(data_df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS),
                        fields=fields)
                    if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                    elif len(df2) <= 0:
                        break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, ts_code, date_from, date_to)
                continue
            elif data_df is not None:
                logger.info('%d/%d) %d 条 %s 财务指标已提取,起止时间 %s 和 %s', num,
                            data_len, data_df.shape[0], ts_code, date_from,
                            date_to)
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert(data_df_all,
                                          table_name=table_name,
                                          dtype=DTYPE_STOCK_FINA_INDICATOR,
                                          primary_keys=primary_keys)

                all_data_count += data_count
                logger.info('%d 条财务指标将数据插入 %s 表', data_count, table_name)
                data_df_list, data_count = [], 0
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_STOCK_FINA_INDICATOR,
                                      primary_keys=primary_keys)

            all_data_count += data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_cashflow(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_cashflow'
    primary_keys = ['ts_code', 'ann_date', 'end_date']
    logging.info("更新 %s 开始", table_name)
    check_sqlite_db_primary_keys(table_name, primary_keys)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) cashflow
                ON info.ts_code = cashflow.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, list_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            df = invoke_cashflow(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        df['ann_date'].iloc[-1]), None
                    df2 = invoke_cashflow(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2) > 0:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                    elif len(df2) <= 0:
                        break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, ts_code, date_from, date_to)
                continue
            elif data_df is not None:
                logger.info('%d/%d) %d 条 %s 的现金流被提取,起止时间为 %s 和 %s', num,
                            data_len, data_df.shape[0], ts_code, date_from,
                            date_to)

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name=table_name,
                             dtype=DTYPE_TUSHARE_CASHFLOW,
                             primary_keys=primary_keys)

                logger.info('%d 条现金流数据已插入 %s 表', data_count, table_name)
                all_data_count += data_count
                data_df_list, data_count = [], 0
                # # 数据插入数据库
                # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_CASHFLOW)
                # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)

            # 仅调试使用
            cycles = cycles + 1
            if DEBUG and cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_CASHFLOW,
                                      primary_keys=primary_keys)

            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Beispiel #12
0
def import_tushare_fut_wsr(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_fut_wsr'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_future_trade_cal trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         ) """.format(table_name=table_name)
    else:
        sql_str = """
                    SELECT cal_date FROM tushare_future_trade_cal trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND cal_date>'19950414') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate)
    logger.info('%d 日的期货仓单数据将被导入数据库', data_len)
    # 将data_df数据,添加到data_df_list
    fields = 'trade_date,symbol,fut_name,warehouse,wh_id,pre_vol,vol,vol_chg,area,year,grade,brand,place,pd,is_ct,unit,exchange'
    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            data_df = invoke_fut_wsr(trade_date=trade_date, fields=fields)
            logging.info(" 提取 %s 日 %d 条期货仓单数据", trade_date, data_df.shape[0])

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 1000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_FUTURE_WSR,
                             primary_keys=['symbol', 'trade_date'])
                logging.info(" 更新%s表%d条期货仓单数据", table_name, data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_FUTURE_WSR,
                                      primary_keys=['symbol', 'trade_date'])
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条仓单信息被更新", table_name, all_data_count)
Beispiel #13
0
def import_jq_stock_daily(chain_param=None, code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name_info = TABLE_NAME_INFO
    table_name = TABLE_NAME
    table_name_bak = get_bak_table_name(table_name)
    logging.info("更新 %s 开始", table_name)

    # 根据 info table 查询每只股票日期区间
    sql_info_str = f"""
        SELECT jq_code, date_frm, if(date_to<end_date, date_to, end_date) date_to
        FROM
          (
            SELECT info.jq_code, start_date date_frm, end_date date_to,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM {table_name_info} info
          ) tt
        WHERE date_frm <= if(date_to<end_date, date_to, end_date)
        ORDER BY jq_code"""

    has_table = engine_md.has_table(table_name)
    has_bak_table = engine_md.has_table(table_name_bak)
    # 进行表格判断,确定是否含有 jq_stock_daily_md
    if has_table:
        # 这里对原始的 sql语句进行了调整
        # 以前的逻辑:每只股票最大的一个交易日+1天作为起始日期
        # 现在的逻辑:每只股票最大一天的交易日作为起始日期
        # 主要原因在希望通过接口获取到数据库中现有最大交易日对应的 factor因子以进行比对
        sql_trade_date_range_str = f"""
            SELECT jq_code, date_frm, if(date_to<end_date, date_to, end_date) date_to
            FROM
            (
                SELECT info.jq_code, ifnull(trade_date, info.start_date) date_frm, info.end_date date_to,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {table_name_info} info 
                LEFT OUTER JOIN
                    (SELECT jq_code, max(trade_date) trade_date FROM {table_name} GROUP BY jq_code) daily
                ON info.jq_code = daily.jq_code
            ) tt
            WHERE date_frm < if(date_to<end_date, date_to, end_date) 
            ORDER BY jq_code"""

    else:
        sql_trade_date_range_str = sql_info_str
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, table_name_info)

    sql_trade_date_str = """SELECT trade_date FROM jq_trade_date trddate 
       WHERE trade_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) ORDER BY trade_date"""

    with with_db_session(engine_md) as session:
        # 获取截至当期全部交易日前
        table = session.execute(sql_trade_date_str)
        trade_date_list = [row[0] for row in table.fetchall()]
        trade_date_list.sort()
        # 获取每只股票日线数据的日期区间
        table = session.execute(sql_trade_date_range_str)
        # 计算每只股票需要获取日线数据的日期区间
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            key_code: (date_from, date_to)
            for key_code, date_from, date_to in table.fetchall()
            if code_set is None or key_code in code_set
        }

        # 从 info 表中查询全部日期区间
        if sql_info_str == sql_trade_date_range_str:
            code_date_range_from_info_dic = code_date_range_dic
        else:
            # 获取每只股票日线数据的日期区间
            table = session.execute(sql_info_str)
            # 计算每只股票需要获取日线数据的日期区间
            # 获取date_from,date_to,将date_from,date_to做为value值
            code_date_range_from_info_dic = {
                key_code: (date_from, date_to)
                for key_code, date_from, date_to in table.fetchall()
                if code_set is None or key_code in code_set
            }

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    try:
        for num, (key_code,
                  (date_from_tmp,
                   date_to_tmp)) in enumerate(code_date_range_dic.items(),
                                              start=1):
            data_df = None
            try:
                for loop_count in range(2):
                    # 根据交易日数据取交集,避免不用的请求耽误时间
                    date_from = get_first(trade_date_list,
                                          lambda x: x >= date_from_tmp)
                    date_to = get_last(trade_date_list,
                                       lambda x: x <= date_to_tmp)
                    if date_from is None or date_to is None or date_from >= date_to:
                        logger.debug('%d/%d) %s [%s - %s] 跳过', num, data_len,
                                     key_code, date_from, date_to)
                        break
                    logger.debug('%d/%d) %s [%s - %s] %s', num, data_len,
                                 key_code, date_from, date_to,
                                 '第二次查询' if loop_count > 0 else '')
                    data_df = invoke_daily(key_code=key_code,
                                           start_date=date_2_str(date_from),
                                           end_date=date_2_str(date_to))

                    # 该判断只在第一次循环时执行
                    if loop_count == 0 and has_table:
                        # 进行 factor 因子判断,如果发现最小的一个交易日的因子不为1,则删除数据库中该股票的全部历史数据,然后重新下载。
                        # 因为当期股票下载的数据为前复权价格,如果股票出现复权调整,则历史数据全部需要重新下载
                        factor_value = data_df.sort_values('trade_date').iloc[
                            0, :]['factor']
                        if factor_value != 1 and (
                                code_date_range_from_info_dic[key_code][0] !=
                                code_date_range_dic[key_code][0]):
                            # 删除该股屏历史数据
                            sql_str = f"delete from {table_name} where jq_code=:jq_code"
                            row_count = execute_sql_commit(
                                sql_str, params={'jq_code': key_code})
                            date_from_tmp, date_to_tmp = code_date_range_from_info_dic[
                                key_code]
                            if has_bak_table:
                                sql_str = f"delete from {table_name_bak} where jq_code=:jq_code"
                                row_count = execute_sql_commit(
                                    sql_str, params={'jq_code': key_code})
                                date_from_tmp, date_to_tmp = code_date_range_from_info_dic[
                                    key_code]
                                logger.info(
                                    '%d/%d) %s %d 条历史记录被清除,重新加载前复权历史数据 [%s - %s] 同时清除bak表中相应记录',
                                    num, data_len, key_code, row_count,
                                    date_from_tmp, date_to_tmp)
                            else:
                                logger.info(
                                    '%d/%d) %s %d 条历史记录被清除,重新加载前复权历史数据 [%s - %s]',
                                    num, data_len, key_code, row_count,
                                    date_from_tmp, date_to_tmp)

                            # 重新设置起止日期,进行第二次循环
                            continue

                    # 退出  for _ in range(2): 循环
                    break
            except Exception as exp:
                data_df = None
                logger.exception('%s [%s - %s]', key_code,
                                 date_2_str(date_from_tmp),
                                 date_2_str(date_to_tmp))
                if exp.args[0].find('超过了每日最大查询限制'):
                    break

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name,
                             dtype=DTYPE,
                             primary_keys=['jq_code', 'trade_date'])
                all_data_count += data_count
                data_df_list, data_count = [], 0

            if DEBUG and num >= 2:
                break

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name,
                                      dtype=DTYPE,
                                      primary_keys=['jq_code', 'trade_date'])
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_future_daily(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_future_daily_md'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有 table_name
    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_future_trade_cal trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         ) """.format(table_name=table_name)
    else:
        sql_str = """
                    SELECT cal_date FROM tushare_future_trade_cal trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND cal_date>'19950414') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_future_trade_cal 表进行计算日期范围',
                       table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list
    # fields = 'ts_code,trade_date,pre_close,pre_settle,open,high,low,close,settle,change1,change2,vol,amount,oi,oi_chg,delv_settle,delv_settle'
    fields = ','.join(
        [_[0] for _ in INDICATOR_PARAM_LIST_TUSHARE_FUTURE_DAILY_MD])
    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            data_df = invoke_future_daily(trade_date=trade_date, fields=fields)
            logging.info(" 提取 %s 日 %d 条期货行情数据", trade_date, data_df.shape[0])

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 1000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name=table_name,
                             dtype=DTYPE_TUSHARE_FUTURE_DAILY_MD,
                             primary_keys=['ts_code', 'trade_date'])
                logging.info(" 更新%s表%d条期货行情数据", table_name, data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_FUTURE_DAILY_MD,
                                      primary_keys=['ts_code', 'trade_date'])
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Beispiel #15
0
def import_tushare_margin(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_margin'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('exchange_id', String(20)),
        ('rzye', DOUBLE),
        ('rzmre', DOUBLE),
        ('rzche', DOUBLE),
        ('rqye', DOUBLE),
        ('rqmcl', DOUBLE),
        ('rzrqye', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                     select cal_date            
                     FROM
                      (
                       select * from tushare_trade_date trddate 
                       where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                     )tt
                     where (is_open=1 
                            and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                            and exchange='SSE') """.format(
            table_name=table_name)
    else:
        sql_str = """
                     SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
                  AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  AND exchange='SSE'  AND cal_date>='2010-03-31') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            for exchange_id in list(['SSE', 'SZSE']):
                data_df = invoke_margin(trade_date=trade_date,
                                        exchange_id=exchange_id)
                if len(data_df) > 0:
                    # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
                    # logging.info("%s更新 %s %s 结束 %d 条信息被更新", trade_date, table_name, exchange_id, data_count)
                    data_count = bunch_insert(
                        data_df,
                        table_name=table_name,
                        dtype=dtype,
                        primary_keys=['exchange_id', 'trade_date'])
                    logging.info("%d/%d) %s %s 更新 %s 结束 %d 条信息被更新", num,
                                 trade_date_list_len, exchange_id, trade_date,
                                 table_name, data_count)
                else:
                    logging.info("%d/%d) %s %s 无数据信息可被更新 %s", num,
                                 trade_date_list_len, exchange_id, trade_date,
                                 table_name)
    except:
        logger.exception('更新 %s 表异常', table_name)
Beispiel #16
0
def update_df_2_db(instrument_type, table_name, data_df):
    """将 DataFrame 数据保存到 数据库对应的表中"""
    dtype = {
        'trade_date': Date,
        'Contract': String(20),
        'ContractNext': String(20),
        'Close': DOUBLE,
        'CloseNext': DOUBLE,
        'Volume': DOUBLE,
        'VolumeNext': DOUBLE,
        'OI': DOUBLE,
        'OINext': DOUBLE,
        'Open': DOUBLE,
        'OpenNext': DOUBLE,
        'High': DOUBLE,
        'HighNext': DOUBLE,
        'Low': DOUBLE,
        'LowNext': DOUBLE,
        'Amount': DOUBLE,
        'AmountNext': DOUBLE,
        'adj_factor_main': DOUBLE,
        'adj_factor_secondary': DOUBLE,
        'instrument_type': String(20),
    }
    # 为了解决 AttributeError: 'numpy.float64' object has no attribute 'translate' 错误,需要将数据类型转换成 float
    data_df["Close"] = data_df["Close"].apply(str_2_float)
    data_df["CloseNext"] = data_df["CloseNext"].apply(str_2_float)
    data_df["Volume"] = data_df["Volume"].apply(str_2_float)
    data_df["VolumeNext"] = data_df["VolumeNext"].apply(str_2_float)
    data_df["OI"] = data_df["OI"].apply(str_2_float)
    data_df["OINext"] = data_df["OINext"].apply(str_2_float)
    data_df["Open"] = data_df["Open"].apply(str_2_float)
    data_df["OpenNext"] = data_df["OpenNext"].apply(str_2_float)
    data_df["High"] = data_df["High"].apply(str_2_float)
    data_df["HighNext"] = data_df["HighNext"].apply(str_2_float)
    data_df["Low"] = data_df["Low"].apply(str_2_float)
    data_df["LowNext"] = data_df["LowNext"].apply(str_2_float)
    data_df["Amount"] = data_df["Amount"].apply(str_2_float)
    data_df["AmountNext"] = data_df["AmountNext"].apply(str_2_float)
    data_df["adj_factor_main"] = data_df["adj_factor_main"].apply(str_2_float)
    data_df["adj_factor_secondary"] = data_df["adj_factor_secondary"].apply(
        str_2_float)
    # 清理历史记录
    with with_db_session(engine_md) as session:
        sql_str = """SELECT table_name FROM information_schema.TABLES 
            WHERE table_name = :table_name and TABLE_SCHEMA=(select database())"""
        # 复权数据表
        is_existed = session.execute(sql_str,
                                     params={
                                         "table_name": table_name
                                     }).fetchone()
        if is_existed is not None:
            session.execute(
                "delete from %s where instrument_type = :instrument_type" %
                table_name,
                params={"instrument_type": instrument_type})
            logger.debug("删除 %s 中的 %s 历史数据", table_name, instrument_type)

    # 插入数据库
    bunch_insert(data_df,
                 table_name=table_name,
                 dtype=dtype,
                 primary_keys=['trade_date', 'Contract'])
Beispiel #17
0
def import_tushare_top_list(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_top_list'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM {table_name} ))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE'
            and cal_date>'2005-05-31') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    # 定义相应的中间变量
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trade_date_list)
    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_top_list(trade_date=trade_date)
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 2000:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert(
                    data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_LIST,
                    primary_keys=['ts_code', 'trade_date', 'reason'])
                logging.info("%d/%d) 更新 %s 结束 ,截至%s日 %d 条信息被更新",
                             num, trade_date_list_len, table_name, trade_date, all_data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0
    except:
        logger.exception('更新 %s 表异常', table_name)
    finally:
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(
                data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_LIST,
                primary_keys=['ts_code', 'trade_date', 'reason'])
            all_data_count = all_data_count + data_count

        logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_hsgt_top10(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_hsgt_top10'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('name', String(20)),
        ('close', DOUBLE),
        ('change', DOUBLE),
        ('rank', Integer),
        ('market_type', String(20)),
        ('amount', DOUBLE),
        ('net_amount', DOUBLE),
        ('buy', DOUBLE),
        ('sell', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE'  AND cal_date>='2014-11-17') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            for market_type in list(['1', '3']):
                data_df = invoke_hsgt_top10(trade_date=trade_date,
                                            market_type=market_type)
                if len(data_df) > 0:
                    data_count = bunch_insert(
                        data_df,
                        table_name=table_name,
                        dtype=dtype,
                        primary_keys=['ts_code', 'trade_date'])
                    logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num,
                                 trade_date_list_len, trade_date, table_name,
                                 data_count)
                else:
                    logging.info("无数据信息可被更新")
                    break
    except:
        logger.exception('更新 %s 表异常', table_name)
def import_data(table_name, dtype, invoke_api,
                primary_keys=["index_symbol", "trade_date", "jq_code"], ts_code_set=None, is_debug=False, is_monthly=False):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    info_table = 'jq_index_info'
    # table_name = 'jq_index_stocks'
    # primary_keys = ["index_symbol", "trade_date", "jq_code"]
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = f"""
            SELECT jq_code, date_from, if(date_to<end_date, date_to, end_date) date_to
            FROM
            (
            SELECT info.jq_code, ifnull(trade_date, start_date) date_from, end_date date_to,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                {info_table} info 
            LEFT OUTER JOIN
                (SELECT index_symbol, adddate(max(trade_date),1) trade_date 
                FROM {table_name} GROUP BY index_symbol) daily
            ON info.jq_code = daily.index_symbol
            ) tt
            WHERE date_from <= if(date_to<end_date, date_to, end_date) 
            ORDER BY jq_code"""
    else:
        sql_str = f"""
            SELECT jq_code, date_from, if(date_to<end_date, date_to, end_date) date_to
            FROM
              (
                SELECT info.jq_code, start_date date_from, end_date date_to,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table} info 
              ) tt
            WHERE date_from <= if(date_to<end_date, date_to, end_date) 
            ORDER BY jq_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    sql_trade_date_str = """
       SELECT trade_date FROM jq_trade_date trddate 
       WHERE trade_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
       ORDER BY trade_date"""

    with with_db_session(engine_md) as session:
        table = session.execute(sql_trade_date_str)
        trade_date_list = [row[0] for row in table.fetchall()]
        trade_date_list.sort()
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ts_code, date_from, date_to in table.fetchall() if
            ts_code_set is None or ts_code in ts_code_set}

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic)
    logger.info('%d records will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    try:
        for num, (index_symbol, (date_from_tmp, date_to_tmp)) in enumerate(code_date_range_dic.items(), start=1):
            date_from_idx = get_first_idx(trade_date_list, lambda x: x >= date_from_tmp)
            date_to_idx = get_last_idx(trade_date_list, lambda x: x <= date_to_tmp)
            if date_from_idx is None or date_to_idx is None or date_from_idx > date_to_idx:
                logger.debug('%d/%d) %s [%s - %s] 跳过', num, data_len, index_symbol,
                             trade_date_list[date_from_idx] if date_from_idx is not None else None,
                             trade_date_list[date_to_idx] if date_to_idx is not None else None)
                continue
            if is_monthly:
                date_sample = trade_date_list[date_from_idx: (date_to_idx + 1)]
                date_sample = list(
                    pd.Series(date_sample, index=pd.DatetimeIndex(date_sample)
                              ).resample(rule='M', convention='end').last()
                )
            else:
                date_sample = trade_date_list[date_from_idx: (date_to_idx + 1)]

            date_from, date_to = date_sample[0], date_sample[-1]
            trade_date_count = len(date_sample)
            logger.debug('%d/%d) 开始导入 %s [%s - %s] %d 个交易日的数据 %s',
                         num, data_len, index_symbol, date_from, date_to, trade_date_count,
                         '月度更新' if is_monthly else '')
            for trade_date in date_sample:
                data_df = invoke_api(index_symbol=index_symbol, trade_date=trade_date)

                # 把数据攒起来
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 大于阀值有开始插入
                if data_count >= 1000:
                    data_count = bunch_insert(data_df_list, table_name=table_name, dtype=dtype,
                                              primary_keys=primary_keys)
                    all_data_count += data_count
                    data_df_list, data_count = [], 0

                if is_debug and len(data_df_list) > 1:
                    break
    except:
        logger.exception("%s 获取数据异常", table_name)
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_count = bunch_insert(data_df_list, table_name=table_name, dtype=dtype, primary_keys=primary_keys)
            all_data_count += data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_block_trade(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_block_trade'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('price', DOUBLE),
        ('vol', DOUBLE),
        ('amount', DOUBLE),
        ('buyer', String(100)),
        ('seller', String(100)),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有 table_name

    if has_table:
        sql_str = f"""select cal_date            
                 FROM
                  (
                   select * from tushare_trade_date trddate 
                   where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                 )tt
                 where (is_open=1 
                        and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                        and exchange='SSE') """
    else:
        # 2003-08-02 大宗交易制度开始实施
        sql_str = """SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
                  AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  AND exchange='SSE'  AND cal_date>='2003-08-02') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_block_trade(trade_date=trade_date)
            if len(data_df) > 0:
                # 当前表不设置主键,由于存在重复记录,因此无法设置主键
                # 例如:002325.SZ 2014-11-17 华泰证券股份有限公司沈阳光荣街证券营业部 两笔完全相同的大宗交易
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=dtype)
                logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新",
                             num, trade_date_list_len, trade_date, table_name, data_count)
            else:
                logging.info("%d/%d) %s 无数据信息可被更新", num, trade_date_list_len, trade_date)
    except:
        logger.exception('更新 %s 表异常', table_name)
def import_tushare_future_daily_by_ts_code(ts_code_set=None):
    """
    补充指定合约的行情数据
    :return:
    """
    table_name = 'tushare_future_daily_md'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    with with_db_session(engine_md) as session:
        # 进行表格判断,确定是否含有 table_name
        if has_table:
            latest_trade_date_sql_str = """SELECT max(trade_date) FROM  {table_name}""".format(
                table_name=table_name)
            trade_date_latest = session.scalar(latest_trade_date_sql_str)
        else:
            trade_date_latest = None

        curr_trade_date_sql_str = """
            SELECT max(cal_date) FROM tushare_future_trade_cal trddate 
            WHERE trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate())
            ORDER BY cal_date"""
        trade_date_curr = session.scalar(curr_trade_date_sql_str)

        if ts_code_set is None:
            sql_str = """select ts_code, list_date, delist_date from tushare_future_basic"""
            params = None
        else:
            in_clause = ', '.join(
                [f':arg{_}' for _ in range(len(ts_code_set))])
            sql_str = f"""select ts_code, list_date, delist_date from tushare_future_basic
                where ts_code in ({in_clause})"""
            params = {f'arg{_}': code for _, code in enumerate(ts_code_set)}

        table = session.execute(sql_str, params=params)
        ts_code_date_range_pair_list = list(table.fetchall())

    logger.warning('%s 不存在,仅使用 tushare_future_trade_cal 表进行计算日期范围', table_name)

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        ts_code_date_range_pair_list)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list
    # fields = 'ts_code,trade_date,pre_close,pre_settle,open,high,low,close,settle,change1,change2,vol,amount,oi,oi_chg,delv_settle,delv_settle'
    fields = ','.join(
        [_[0] for _ in INDICATOR_PARAM_LIST_TUSHARE_FUTURE_DAILY_MD])
    try:
        for num, (ts_code, date_from,
                  date_to) in enumerate(ts_code_date_range_pair_list, start=1):
            # 计算截止日期
            if trade_date_latest is None:
                date_end = min([date_to, trade_date_curr])
            else:
                date_end = min([date_to, trade_date_curr, trade_date_latest])
            # 获取数据
            data_df = invoke_future_daily_by_ts_code(ts_code,
                                                     date_from,
                                                     date_end,
                                                     fields=fields)
            logging.info(" 提取 %s [%s - %s] %d 条期货行情数据", ts_code, date_from,
                         date_to, data_df.shape[0])

            # 数据合并
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 1000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name=table_name,
                             dtype=DTYPE_TUSHARE_FUTURE_DAILY_MD,
                             primary_keys=['ts_code', 'trade_date'])
                logging.info(" 更新%s表%d条期货行情数据", table_name, data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_FUTURE_DAILY_MD,
                                      primary_keys=['ts_code', 'trade_date'])
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)