def min_to_vnpy_increment(chain_param=None, instrument_types=None):
    from tasks.config import config
    from tasks.backend import engine_dic
    table_name = 'dbbardata'
    interval = '1m'
    engine_vnpy = engine_dic[config.DB_SCHEMA_VNPY]
    has_table = engine_vnpy.has_table(table_name)
    if not has_table:
        logger.error('当前数据库 %s 没有 %s 表,建议使用 vnpy先建立相应的数据库表后再进行导入操作',
                     engine_vnpy, table_name)
        return

    sql_increment_str = "select trade_datetime `datetime`, `open` open_price, high high_price, " \
                        "`low` low_price, `close` close_price, volume, position as open_interest " \
                        "from wind_future_min where wind_code = %s and " \
                        "trade_datetime > %s and `close` is not null and `close` <> 0"
    sql_whole_str = "select trade_datetime `datetime`, `open` open_price, high high_price, " \
                    "`low` low_price, `close` close_price, volume, position as open_interest " \
                    "from wind_future_min where wind_code = %s and " \
                    "`close` is not null and `close` <> 0"
    wind_code_list = get_wind_code_list_by_types(instrument_types)
    wind_code_count = len(wind_code_list)
    for n, wind_code in enumerate(wind_code_list, start=1):
        symbol, exchange = wind_code.split('.')
        if exchange in WIND_VNPY_EXCHANGE_DIC:
            exchange_vnpy = WIND_VNPY_EXCHANGE_DIC[exchange]
        else:
            logger.warning('%s exchange: %s 在交易所列表中不存在', wind_code, exchange)
            exchange_vnpy = exchange
        sql_str = f"select max(`datetime`) from {table_name} where symbol=:symbol and `interval`=:interval"
        with with_db_session(engine_vnpy) as session:
            datetime_exist = session.scalar(sql_str,
                                            params={
                                                'symbol': symbol,
                                                'interval': interval
                                            })
        if datetime_exist is not None:
            # 读取日线数据
            df = pd.read_sql(sql_increment_str,
                             engine_md,
                             params=[wind_code, datetime_exist]).dropna()
        else:
            df = pd.read_sql(sql_whole_str, engine_md,
                             params=[wind_code]).dropna()

        df_len = df.shape[0]
        if df_len == 0:
            continue

        df['symbol'] = symbol
        df['exchange'] = exchange_vnpy
        df['interval'] = interval
        datetime_latest = df['datetime'].max().to_pydatetime()
        df.to_sql(table_name, engine_vnpy, if_exists='append', index=False)
        logger.info("%d/%d) %s (%s ~ %s] %d data -> %s interval %s", n,
                    wind_code_count, symbol, datetime_2_str(datetime_exist),
                    datetime_2_str(datetime_latest), df_len, table_name,
                    interval)
Esempio n. 2
0
def min_to_vnpy(chain_param=None, instrument_types=None):
    from tasks.config import config
    from tasks.backend import engine_dic
    interval = '1m'
    table_name = 'dbbardata'
    engine_vnpy = engine_dic[config.DB_SCHEMA_VNPY]
    has_table = engine_vnpy.has_table(table_name)
    if not has_table:
        logger.error('当前数据库 %s 没有 %s 表,建议使用 vnpy先建立相应的数据库表后再进行导入操作',
                     engine_vnpy, table_name)
        return

    code_list = get_code_list_by_types(instrument_types)
    code_count, do_count = len(code_list), 0
    logger.info("导入分钟级数据到 vnpy 数据库,预计 %d 条记录", code_count)
    data_count = 0
    for n, (order_book_id, exchange, symbol) in enumerate(code_list, start=1):
        # 读取k线数据
        sql_str = "select trade_date `datetime`, `open` open_price, high high_price, " \
                  "`low` low_price, `close` close_price, volume, open_interest " \
                  "from rqdatac_future_min where order_book_id = %s and `close` is not null"
        df = pd.read_sql(sql_str, engine_md, params=[order_book_id]).dropna()
        df_len = df.shape[0]
        if df_len == 0:
            continue

        do_count += 1
        df['symbol'] = symbol
        df['exchange'] = exchange
        df['interval'] = interval
        datetime_latest = df['datetime'].max().to_pydatetime()
        sql_str = f"select max(`datetime`) from {table_name} where symbol=:symbol and `interval`='{interval}'"
        del_sql_str = f"delete from {table_name} where symbol=:symbol and `interval`='{interval}'"
        with with_db_session(engine_vnpy) as session:
            datetime_exist = session.scalar(sql_str, params={'symbol': symbol})

            if datetime_exist is not None:
                if datetime_exist >= datetime_latest:
                    continue
                else:
                    session.execute(del_sql_str, params={'symbol': symbol})
                    session.commit()

        df.to_sql(table_name, engine_vnpy, if_exists='append', index=False)
        logger.info(
            "%d/%d) %s %s -> %s %d data have been insert into table %s", n,
            code_count, symbol, datetime_2_str(datetime_exist),
            datetime_2_str(datetime_latest), df_len, table_name)
        data_count += df_len

    logger.info(f"全部 {do_count:,d} 个合约 {data_count:,d} 条数据插入完成")
Esempio n. 3
0
def import_tushare_adj_factor(chain_param=None, ):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_adj_factor'
    primary_keys = ["ts_code", "trade_date"]
    logging.info("更新 %s 开始", table_name)
    # 进行表格判断,确定是否含有 table_name
    has_table = engine_md.has_table(table_name)
    # sqlite_file_name = 'eDB_adjfactor.db'
    check_sqlite_db_primary_keys(table_name, primary_keys)

    if has_table:
        sql_str = """
           select cal_date            
           FROM
            (
             select * from tushare_trade_date trddate 
             where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
           )tt
           where (is_open=1 
                  and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
           SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = [row[0] for row in table.fetchall()]

    trade_date_count, data_count_tot = len(trade_date_list), 0
    try:
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = pro.adj_factor(ts_code='', trade_date=trade_date)
            if data_df is not None and data_df.shape[0] > 0:
                data_count = bunch_insert(
                    data_df,
                    table_name=table_name,
                    dtype=DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR,
                    primary_keys=primary_keys)
                data_count_tot += data_count

                logging.info("%d/%d) %s 表 %s %d 条信息被更新", num, trade_date_count,
                             table_name, trade_date, data_count)
            else:
                logging.info("%d/%d) %s 表 %s 数据信息可被更新", num, trade_date_count,
                             table_name, trade_date)
    except:
        logger.exception("更新 %s 异常", table_name)
    finally:
        logging.info("%s 表 %d 条记录更新完成", table_name, data_count_tot)
Esempio n. 4
0
def import_tushare_namechange(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_namechange'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """select max(start_date) start_date   FROM md_integration.tushare_stock_namechange"""

    else:
        sql_str = """select min(list_date) start_date   FROM md_integration.tushare_stock_info"""

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        start_date = list(row[0] for row in table.fetchall())
        start_date = datetime_2_str(start_date[0], STR_FORMAT_DATE_TS)
        end_date = datetime_2_str(date.today(), STR_FORMAT_DATE_TS)

    try:
        data_df = pro.namechange(
            start_date=start_date,
            end_date=end_date,
            fields='ts_code,name,start_date,end_date,change_reason')
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_NAMECHANGE)
            logging.info("更新 %s 结束 %d 条上市公司更名信息被更新", table_name, data_count)
        else:
            logging.info("无数据信息可被更新")
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST,
                CHANGE COLUMN `start_date` `start_date` DATE NOT NULL AFTER `ts_code`,
                ADD PRIMARY KEY (`ts_code`, `start_date`)""".format(
                table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表 `ts_code`, `start_date` 主键设置完成', table_name)
Esempio n. 5
0
def import_tushare_suspend(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_suspend'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_suspend

    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(suspend_date) FROM {table_name} ))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = pro.suspend(ts_code='',
                                  suspend_date=trade_date,
                                  resume_date='',
                                  fields='')
            if len(data_df) > 0:
                data_count = bunch_insert_p(
                    data_df,
                    table_name=table_name,
                    dtype=DTYPE_TUSHARE_SUSPEND,
                    primary_keys=['ts_code', 'suspend_date'])
                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num,
                             trade_date_list_len, trade_date, table_name,
                             data_count)
            else:
                logging.info("%s 当日无停牌股票", trade_date_list_len)

    except:
        logger.exception('更新 %s 表异常', table_name)
def import_tushare_adj_factor(chain_param=None, ):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_adj_factor'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily

    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())

    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            data_df = pro.adj_factor(ts_code='', trade_date=trade_date)
            if len(data_df) > 0:
                data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR)
                logging.info(" %s 表自 %s 日起的 %d 条信息被更新", table_name, trade_date, data_count)
            else:
                logging.info("无数据信息可被更新")
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST,
                CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `ts_code`,
                ADD PRIMARY KEY (`ts_code`, `trade_date`)""".format(table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
Esempio n. 7
0
def import_tushare_daily_basic(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_basic'
    primary_keys = ["ts_code", "trade_date"]
    logging.info("更新 %s 开始", table_name)
    check_sqlite_db_primary_keys(table_name, primary_keys)
    has_table = engine_md.has_table(table_name)
    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!
    # 比如本表是 tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM {table_name} ))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    try:
        for_count = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_daily_basic(ts_code='', trade_date=trade_date)
            if data_df is not None and data_df.shape[0] > 0:
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_BASIC,
                    primary_keys=primary_keys)

                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, for_count, trade_date, table_name, data_count)
            else:
                logging.info("%d/%d) %s 无数据信息可被更新",  num, for_count, trade_date)
    except:
        logger.exception("更新 %s 表异常", table_name)
Esempio n. 8
0
def _test_account2():
    """测试 plot_data 返回数据是否符合预期"""
    n_step = 60
    ohlcav_col_name_list = ["open", "high", "low", "close", "amount", "volume"]
    from ibats_common.example.data import load_data
    md_df = load_data('RB.csv').set_index('trade_date')[ohlcav_col_name_list]
    md_df.index = pd.DatetimeIndex(md_df.index)
    from ibats_common.backend.factor import get_factor, transfer_2_batch
    factors_df = get_factor(md_df, dropna=True)
    df_index, df_columns, data_arr_batch = transfer_2_batch(factors_df,
                                                            n_step=n_step)
    md_df = md_df.loc[df_index, :]
    shape = [
        data_arr_batch.shape[0], 5,
        int(n_step / 5), data_arr_batch.shape[2]
    ]
    data_factors = np.transpose(data_arr_batch.reshape(shape), [0, 2, 3, 1])
    print(data_arr_batch.shape, '->', shape, '->', data_factors.shape)
    # 建立 Account
    env = Account(md_df, data_factors)
    next_observation = env.reset()
    # 做空
    env.step(2)
    for n in range(int(md_df.shape[0] / 2)):
        env.step(3)
    # 做多
    next_observation, reward, done = env.step(1)
    while not done:
        next_observation, reward, done = env.step(3)

    # 展示结果
    reward_df = env.plot_data()
    value_s = reward_df.iloc[:, 0]
    from ibats_utils.mess import datetime_2_str
    from datetime import datetime
    dt_str = datetime_2_str(datetime.now(), '%Y-%m-%d %H_%M_%S')
    title = f'test_account_{dt_str}'
    from ibats_common.analysis.plot import plot_twin
    plot_twin(value_s, md_df["close"], name=title)
def import_tdx_tick():
    """
        通过pytdx接口下载tick数据
        :return:
        """
    table_name = 'pytdx_stock_tick'
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT md.ts_code, md.trade_date 
            FROM 
            tushare_stock_daily_md md 
            inner join 
            (
                select ts_code, delist_date from tushare_stock_info where tushare_stock_info.delist_date is null
            ) info
            on info.ts_code = md.ts_code
            
            left outer join tushare_stock_daily_suspend suspend 
            on md.ts_code =suspend.ts_code 
            and md.trade_date =suspend.suspend_date 
            
             left outer join
            (
                select ts_code,max(trade_date) trade_date_max from {table_name} group by ts_code
            ) m
            on md.ts_code = m.ts_code
            where md.trade_date>'2000-01-24' 
            and suspend.suspend_date is null 
            and (m.trade_date_max is null or md.trade_date>m.trade_date_max)""".format(
            table_name=table_name)
    else:
        # sql_str = """SELECT ts_code ,trade_date trade_date_list FROM tushare_stock_daily_md where trade_date>'2000-01-24'"""
        sql_str = """
            SELECT md.ts_code, md.trade_date 
            FROM 
            tushare_stock_daily_md md 
            INNER JOIN 
            (
                SELECT ts_code, delist_date FROM tushare_stock_info WHERE tushare_stock_info.delist_date IS NULL
            ) info
            ON info.ts_code = md.ts_code
            
            LEFT OUTER JOIN tushare_stock_daily_suspend suspend 
            ON md.ts_code =suspend.ts_code 
            AND md.trade_date =suspend.suspend_date
            WHERE md.trade_date>'2000-01-24' 
            AND suspend.suspend_date IS NULL """

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {}
        for ts_code, trade_date_list in table.fetchall():
            # trade_date_list.sort()
            code_date_range_dic.setdefault(ts_code, []).append(trade_date_list)

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into pytdx_stock_tick', data_len)
    # 将data_df数据,添加到data_df_list
    Cycles = 1
    try:
        for num, (index_code,
                  trade_date_list) in enumerate(code_date_range_dic.items(),
                                                start=1):
            trade_date_list_len = len(trade_date_list)
            for i, trade_date in enumerate(trade_date_list):
                # trade_date=trade_date_list[i]
                logger.debug('%d/%d) %d/%d) %s [%s]', num, data_len, i,
                             trade_date_list_len, index_code, trade_date)
                data_df = invoke_tdx_tick(code=index_code[0:6],
                                          date_str=datetime_2_str(
                                              trade_date, STR_FORMAT_DATE_TS))
                # 把数据攒起来
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 大于阀值有开始插入
                if data_count >= 200000:
                    data_df_all = pd.concat(data_df_list)
                    bunch_insert_on_duplicate_update(data_df_all, table_name,
                                                     engine_md,
                                                     DTYPE_TDX_STOCK_TICK)
                    all_data_count += data_count
                    data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_TDX_STOCK_TICK)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Esempio n. 10
0
def import_tushare_fut_wsr(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_fut_wsr'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_future_trade_cal trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         ) """.format(table_name=table_name)
    else:
        sql_str = """
                    SELECT cal_date FROM tushare_future_trade_cal trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND cal_date>'19950414') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate)
    logger.info('%d 日的期货仓单数据将被导入数据库', data_len)
    # 将data_df数据,添加到data_df_list
    fields = 'trade_date,symbol,fut_name,warehouse,wh_id,pre_vol,vol,vol_chg,area,year,grade,brand,place,pd,is_ct,unit,exchange'
    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            data_df = invoke_fut_wsr(trade_date=trade_date, fields=fields)
            logging.info(" 提取 %s 日 %d 条期货仓单数据", trade_date, data_df.shape[0])

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 1000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name,
                                                 engine_md,
                                                 DTYPE_TUSHARE_FUTURE_WSR)
                logging.info(" 更新%s表%d条期货仓单数据", table_name, data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUTURE_WSR)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条仓单信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_indicator(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_fin_indicator'
    logging.info("更新 %s 开始", table_name)
    primary_keys = ['ts_code', 'ann_date', 'end_date']
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) fina_indicator
                ON info.ts_code = fina_indicator.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, list_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    fields = 'ts_code', 'ann_date', 'end_date', 'eps', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', 'surplus_rese_ps', \
             'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', 'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', \
             'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', 'assets_turn', 'op_income', 'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', \
             'fcfe', 'current_exint', 'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', 'working_capital', 'networking_capital', 'invest_capital', \
             'retained_earnings', 'diluted2_eps', 'bps', 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', \
             'cogs_of_sales', 'expense_of_sales', 'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', 'impai_ttm', 'gc_of_gr', 'op_of_gr', \
             'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'npta', 'roic', 'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt', \
             'n_op_profit_of_ebt', 'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', 'capitalized_to_da', 'debt_to_assets', \
             'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt', \
             'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt', 'eqt_to_interestdebt', 'tangibleasset_to_debt', 'tangasset_to_intdebt', \
             'tangibleasset_to_netdebt', 'ocf_to_debt', 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', 'ebitda_to_debt', \
             'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', 'non_op_profit', 'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', \
             'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly', 'total_fa_trun', 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', \
             'q_eps', 'q_netprofit_margin', 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 'q_adminexp_to_gr', 'q_finaexp_to_gr', \
             'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_npta', 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', \
             'q_salescash_to_or', 'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', 'netprofit_yoy', 'dt_netprofit_yoy', \
             'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', \
             'q_profit_yoy', 'q_profit_qoq', 'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp'

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d 财务指标信息将被插入 tushare_stock_fin_indicator 表', data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_fina_indicator(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS),
                fields=fields)
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            if data_df is not None and len(
                    data_df) > 0 and data_df['ann_date'].iloc[-1] is not None:
                while try_2_date(data_df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last = try_2_date(
                        data_df['ann_date'].iloc[-1])
                    df2 = invoke_fina_indicator(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(data_df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS),
                        fields=fields)
                    if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                    elif len(df2) <= 0:
                        break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, ts_code, date_from, date_to)
                continue
            elif data_df is not None:
                logger.info('%d/%d) %d 条 %s 财务指标已提取,起止时间 %s 和 %s', num,
                            data_len, data_df.shape[0], ts_code, date_from,
                            date_to)
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert(data_df_all,
                                          table_name=table_name,
                                          dtype=DTYPE_STOCK_FINA_INDICATOR,
                                          primary_keys=primary_keys)

                all_data_count += data_count
                logger.info('%d 条财务指标将数据插入 %s 表', data_count, table_name)
                data_df_list, data_count = [], 0
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_STOCK_FINA_INDICATOR,
                                      primary_keys=primary_keys)

            all_data_count += data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Esempio n. 12
0
def import_tushare_hsgt_top10(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_hsgt_top10'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('name', String(20)),
        ('close', DOUBLE),
        ('change', DOUBLE),
        ('rank', Integer),
        ('market_type', String(20)),
        ('amount', DOUBLE),
        ('net_amount', DOUBLE),
        ('buy', DOUBLE),
        ('sell', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE'  AND cal_date>='2014-11-17') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            for market_type in list(['1', '3']):
                data_df = invoke_hsgt_top10(trade_date=trade_date,
                                            market_type=market_type)
                if len(data_df) > 0:
                    data_count = bunch_insert_p(
                        data_df,
                        table_name=table_name,
                        dtype=dtype,
                        primary_keys=['ts_code', 'trade_date'])
                    logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num,
                                 trade_date_list_len, trade_date, table_name,
                                 data_count)
                else:
                    logging.info("无数据信息可被更新")
                    break
    except:
        logger.exception('更新 %s 表异常', table_name)
Esempio n. 13
0
def import_tushare_margin(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_margin'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('exchange_id', String(20)),
        ('rzye', DOUBLE),
        ('rzmre', DOUBLE),
        ('rzche', DOUBLE),
        ('rqye', DOUBLE),
        ('rqmcl', DOUBLE),
        ('rzrqye', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                     select cal_date            
                     FROM
                      (
                       select * from tushare_trade_date trddate 
                       where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                     )tt
                     where (is_open=1 
                            and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                            and exchange='SSE') """.format(
            table_name=table_name)
    else:
        sql_str = """
                     SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
                  AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  AND exchange='SSE'  AND cal_date>='2010-03-31') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            for exchange_id in list(['SSE', 'SZSE']):
                data_df = invoke_margin(trade_date=trade_date,
                                        exchange_id=exchange_id)
                if len(data_df) > 0:
                    # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
                    # logging.info("%s更新 %s %s 结束 %d 条信息被更新", trade_date, table_name, exchange_id, data_count)
                    data_count = bunch_insert(
                        data_df,
                        table_name=table_name,
                        dtype=dtype,
                        primary_keys=['exchange_id', 'trade_date'])
                    logging.info("%d/%d) %s %s 更新 %s 结束 %d 条信息被更新", num,
                                 trade_date_list_len, exchange_id, trade_date,
                                 table_name, data_count)
                else:
                    logging.info("%d/%d) %s %s 无数据信息可被更新 %s", num,
                                 trade_date_list_len, exchange_id, trade_date,
                                 table_name)
    except:
        logger.exception('更新 %s 表异常', table_name)
Esempio n. 14
0
def import_tushare_stock_fina_indicator(ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_fin_indicator'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('ts_code', String(20)),
        ('ann_date', Date),
        ('end_date', Date),
        ('eps', DOUBLE),
        ('dt_eps', DOUBLE),
        ('total_revenue_ps', DOUBLE),
        ('revenue_ps', DOUBLE),
        ('capital_rese_ps', DOUBLE),
        ('surplus_rese_ps', DOUBLE),
        ('undist_profit_ps', DOUBLE),
        ('extra_item', DOUBLE),
        ('profit_dedt', DOUBLE),
        ('gross_margin', DOUBLE),
        ('current_ratio', DOUBLE),
        ('quick_ratio', DOUBLE),
        ('cash_ratio', DOUBLE),
        ('invturn_days', DOUBLE),
        ('arturn_days', DOUBLE),
        ('inv_turn', DOUBLE),
        ('ar_turn', DOUBLE),
        ('ca_turn', DOUBLE),
        ('fa_turn', DOUBLE),
        ('assets_turn', DOUBLE),
        ('op_income', DOUBLE),
        ('valuechange_income', DOUBLE),
        ('interst_income', DOUBLE),
        ('daa', DOUBLE),
        ('ebit', DOUBLE),
        ('ebitda', DOUBLE),
        ('fcff', DOUBLE),
        ('fcfe', DOUBLE),
        ('current_exint', DOUBLE),
        ('noncurrent_exint', DOUBLE),
        ('interestdebt', DOUBLE),
        ('netdebt', DOUBLE),
        ('tangible_asset', DOUBLE),
        ('working_capital', DOUBLE),
        ('networking_capital', DOUBLE),
        ('invest_capital', DOUBLE),
        ('retained_earnings', DOUBLE),
        ('diluted2_eps', DOUBLE),
        ('bps', DOUBLE),
        ('ocfps', DOUBLE),
        ('retainedps', DOUBLE),
        ('cfps', DOUBLE),
        ('ebit_ps', DOUBLE),
        ('fcff_ps', DOUBLE),
        ('fcfe_ps', DOUBLE),
        ('netprofit_margin', DOUBLE),
        ('grossprofit_margin', DOUBLE),
        ('cogs_of_sales', DOUBLE),
        ('expense_of_sales', DOUBLE),
        ('profit_to_gr', DOUBLE),
        ('saleexp_to_gr', DOUBLE),
        ('adminexp_of_gr', DOUBLE),
        ('finaexp_of_gr', DOUBLE),
        ('impai_ttm', DOUBLE),
        ('gc_of_gr', DOUBLE),
        ('op_of_gr', DOUBLE),
        ('ebit_of_gr', DOUBLE),
        ('roe', DOUBLE),
        ('roe_waa', DOUBLE),
        ('roe_dt', DOUBLE),
        ('roa', DOUBLE),
        ('npta', DOUBLE),
        ('roic', DOUBLE),
        ('roe_yearly', DOUBLE),
        ('roa2_yearly', DOUBLE),
        ('roe_avg', DOUBLE),
        ('opincome_of_ebt', DOUBLE),
        ('investincome_of_ebt', DOUBLE),
        ('n_op_profit_of_ebt', DOUBLE),
        ('tax_to_ebt', DOUBLE),
        ('dtprofit_to_profit', DOUBLE),
        ('salescash_to_or', DOUBLE),
        ('ocf_to_or', DOUBLE),
        ('ocf_to_opincome', DOUBLE),
        ('capitalized_to_da', DOUBLE),
        ('debt_to_assets', DOUBLE),
        ('assets_to_eqt', DOUBLE),
        ('dp_assets_to_eqt', DOUBLE),
        ('ca_to_assets', DOUBLE),
        ('nca_to_assets', DOUBLE),
        ('tbassets_to_totalassets', DOUBLE),
        ('int_to_talcap', DOUBLE),
        ('eqt_to_talcapital', DOUBLE),
        ('currentdebt_to_debt', DOUBLE),
        ('longdeb_to_debt', DOUBLE),
        ('ocf_to_shortdebt', DOUBLE),
        ('debt_to_eqt', DOUBLE),
        ('eqt_to_debt', DOUBLE),
        ('eqt_to_interestdebt', DOUBLE),
        ('tangibleasset_to_debt', DOUBLE),
        ('tangasset_to_intdebt', DOUBLE),
        ('tangibleasset_to_netdebt', DOUBLE),
        ('ocf_to_debt', DOUBLE),
        ('ocf_to_interestdebt', DOUBLE),
        ('ocf_to_netdebt', DOUBLE),
        ('ebit_to_interest', DOUBLE),
        ('longdebt_to_workingcapital', DOUBLE),
        ('ebitda_to_debt', DOUBLE),
        ('turn_days', DOUBLE),
        ('roa_yearly', DOUBLE),
        ('roa_dp', DOUBLE),
        ('fixed_assets', DOUBLE),
        ('profit_prefin_exp', DOUBLE),
        ('non_op_profit', DOUBLE),
        ('op_to_ebt', DOUBLE),
        ('nop_to_ebt', DOUBLE),
        ('ocf_to_profit', DOUBLE),
        ('cash_to_liqdebt', DOUBLE),
        ('cash_to_liqdebt_withinterest', DOUBLE),
        ('op_to_liqdebt', DOUBLE),
        ('op_to_debt', DOUBLE),
        ('roic_yearly', DOUBLE),
        ('total_fa_trun', DOUBLE),
        ('profit_to_op', DOUBLE),
        ('q_opincome', DOUBLE),
        ('q_investincome', DOUBLE),
        ('q_dtprofit', DOUBLE),
        ('q_eps', DOUBLE),
        ('q_netprofit_margin', DOUBLE),
        ('q_gsprofit_margin', DOUBLE),
        ('q_exp_to_sales', DOUBLE),
        ('q_profit_to_gr', DOUBLE),
        ('q_saleexp_to_gr', DOUBLE),
        ('q_adminexp_to_gr', DOUBLE),
        ('q_finaexp_to_gr', DOUBLE),
        ('q_impair_to_gr_ttm', DOUBLE),
        ('q_gc_to_gr', DOUBLE),
        ('q_op_to_gr', DOUBLE),
        ('q_roe', DOUBLE),
        ('q_dt_roe', DOUBLE),
        ('q_npta', DOUBLE),
        ('q_opincome_to_ebt', DOUBLE),
        ('q_investincome_to_ebt', DOUBLE),
        ('q_dtprofit_to_profit', DOUBLE),
        ('q_salescash_to_or', DOUBLE),
        ('q_ocf_to_sales', DOUBLE),
        ('q_ocf_to_or', DOUBLE),
        ('basic_eps_yoy', DOUBLE),
        ('dt_eps_yoy', DOUBLE),
        ('cfps_yoy', DOUBLE),
        ('op_yoy', DOUBLE),
        ('ebt_yoy', DOUBLE),
        ('netprofit_yoy', DOUBLE),
        ('dt_netprofit_yoy', DOUBLE),
        ('ocf_yoy', DOUBLE),
        ('roe_yoy', DOUBLE),
        ('bps_yoy', DOUBLE),
        ('assets_yoy', DOUBLE),
        ('eqt_yoy', DOUBLE),
        ('tr_yoy', DOUBLE),
        ('or_yoy', DOUBLE),
        ('q_gr_yoy', DOUBLE),
        ('q_gr_qoq', DOUBLE),
        ('q_sales_yoy', DOUBLE),
        ('q_sales_qoq', DOUBLE),
        ('q_op_yoy', DOUBLE),
        ('q_op_qoq', DOUBLE),
        ('q_profit_yoy', DOUBLE),
        ('q_profit_qoq', DOUBLE),
        ('q_netprofit_yoy', DOUBLE),
        ('q_netprofit_qoq', DOUBLE),
        ('equity_yoy', DOUBLE),
        ('rd_exp', DOUBLE),
    ]

    sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info"""
    logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    # dtype['ts_code'] = String(20)
    # dtype['trade_date'] = Date

    fields = 'ts_code', 'ann_date', 'end_date', 'eps', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', \
             'surplus_rese_ps', 'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', \
             'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', 'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', \
             'assets_turn', 'op_income', 'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', \
             'fcfe', 'current_exint', 'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', \
             'working_capital', 'networking_capital', 'invest_capital', 'retained_earnings', 'diluted2_eps', 'bps', \
             'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', \
             'cogs_of_sales', 'expense_of_sales', 'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', \
             'impai_ttm', 'gc_of_gr', 'op_of_gr', 'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'npta', 'roic', \
             'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt', 'n_op_profit_of_ebt', \
             'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', \
             'capitalized_to_da', 'debt_to_assets', 'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', \
             'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt', \
             'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt', 'eqt_to_interestdebt', \
             'tangibleasset_to_debt', 'tangasset_to_intdebt', 'tangibleasset_to_netdebt', 'ocf_to_debt', \
             'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', \
             'ebitda_to_debt', 'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', \
             'non_op_profit', 'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', \
             'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly', 'total_fa_trun', \
             'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', 'q_eps', 'q_netprofit_margin', \
             'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 'q_adminexp_to_gr', \
             'q_finaexp_to_gr', 'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_npta', \
             'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', 'q_salescash_to_or', \
             'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', \
             'netprofit_yoy', 'dt_netprofit_yoy', 'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', \
             'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', \
             'q_profit_yoy', 'q_profit_qoq', 'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp'

    data_len = len(code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            df = invoke_fina_indicator(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS),
                fields=fields)
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        df['ann_date'].iloc[-1]), None
                    df2 = invoke_fina_indicator(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS),
                        fields=fields)
                    if len(df2) > 0:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning(
                                '%d/%d) %s has no data during %s %s', num,
                                data_len, ts_code, date_from, date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    elif len(df2) <= 0:
                        break
                # 数据插入数据库
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, dtype)
                logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                data_df = []
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(
                data_df,
                table_name,
                engine_md,
                dtype,
                myisam_if_create_table=True,
                primary_keys=['ts_code', 'ann_date', 'end_date'],
                schema=config.DB_SCHEMA_MD)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
Esempio n. 15
0
def import_tushare_top_list(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_top_list'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM {table_name} ))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE'
            and cal_date>'2005-05-31') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    # 定义相应的中间变量
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trade_date_list)
    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_top_list(trade_date=trade_date)
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 2000:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert(
                    data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_LIST,
                    primary_keys=['ts_code', 'trade_date', 'reason'])
                logging.info("%d/%d) 更新 %s 结束 ,截至%s日 %d 条信息被更新",
                             num, trade_date_list_len, table_name, trade_date, all_data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0
    except:
        logger.exception('更新 %s 表异常', table_name)
    finally:
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(
                data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_LIST,
                primary_keys=['ts_code', 'trade_date', 'reason'])
            all_data_count = all_data_count + data_count

        logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Esempio n. 16
0
def import_repurchase(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_repurchase'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
               select * from 
                (select * from tushare_trade_date trddate 
                 where (cal_date>(SELECT max(ann_date) FROM {table_name} ))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') 
            AND cal_date>'20120605'
            ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        ann_date_list = list(row[0] for row in table.fetchall())
    logging.info("%d 个交易日的回购信息将被更新", len(ann_date_list))
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        ann_date_list)
    try:
        for i in range(len(ann_date_list)):
            ann_date = datetime_2_str(ann_date_list[i], STR_FORMAT_DATE_TS)
            data_df = invoke_repurchase(ann_date=ann_date)
            if data_df is not None and data_df.shape[0] > 0:
                logging.info("提取%s日%d条回购信息", ann_date, data_df.shape[0])
            else:
                logging.info("%s日无股票回购公告", ann_date)
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 1000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name,
                                                 engine_md,
                                                 DTYPE_TUSHARE_REPURCHASE)
                logger.info('%d 条股票回购信息被插入 tushare_stock_repurchase 表',
                            data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_TUSHARE_REPURCHASE)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条回购信息被更新", table_name, all_data_count)
Esempio n. 17
0
def import_tushare_stock_fina_mainbz(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_fin_mainbz'
    logging.info("更新 %s 开始", table_name)
    # param_list = [
    #     ('ts_code', String(20)),
    #     ('end_date', Date),
    #     ('bz_item', String(200)),
    #     ('bz_sales', DOUBLE),
    #     ('bz_profit', DOUBLE),
    #     ('bz_cost', DOUBLE),
    #     ('curr_type', String(20)),
    #     ('update_flag', String(20)),
    #     ('market_type', String(20)),
    # ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
               SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to
               FROM
               (
                   SELECT info.ts_code, ifnull(end_date, subdate(list_date,365*10)) date_frm, delist_date,
                   if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2
                   FROM 
                     tushare_stock_info info 
                   LEFT OUTER JOIN
                       (SELECT ts_code, adddate(max(end_date),1) end_date 
                       FROM {table_name} GROUP BY ts_code) mainbz
                   ON info.ts_code = mainbz.ts_code
               ) tt
               WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) 
               ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
               SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to
               FROM
                 (
                   SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date,
                   if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2
                   FROM tushare_stock_info info 
                 ) tt
               WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) 
               ORDER BY ts_code """
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }
    # 设置 dtype
    # dtype = {key: val for key, val in param_list}
    # dtype['ts_code'] = String(20)
    # dtype['trade_date'] = Date

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            for mainbz_type in list(['P', 'D']):
                logger.debug('%d/%d) %s [%s - %s] %s', num, data_len, ts_code,
                             date_from, date_to, mainbz_type)
                data_df = invoke_fina_mainbz(
                    ts_code=ts_code,
                    start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                    end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS),
                    type=mainbz_type)
                data_df['market_type'] = mainbz_type
                # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
                # data_df = df
                if data_df is not None and len(data_df) > 0:
                    while try_2_date(data_df['end_date'].iloc[-1]) > date_from:
                        last_date_in_df_last, last_date_in_df_cur = try_2_date(
                            data_df['end_date'].iloc[-1]), None
                        df2 = invoke_fina_mainbz(
                            ts_code=ts_code,
                            start_date=datetime_2_str(date_from,
                                                      STR_FORMAT_DATE_TS),
                            end_date=datetime_2_str(
                                try_2_date(data_df['end_date'].iloc[-1]),
                                STR_FORMAT_DATE_TS),
                            type=mainbz_type)
                        df2['market_type'] = mainbz_type
                        if len(df2) > 0:
                            last_date_in_df_cur = try_2_date(
                                df2['end_date'].iloc[-1])
                            if last_date_in_df_cur < last_date_in_df_last:
                                data_df = pd.concat([data_df, df2])
                                # df = df2
                            elif last_date_in_df_cur <= last_date_in_df_last:
                                break

                        elif len(df2) <= 0:
                            break
                if data_df is None:
                    logger.warning('%d/%d) %s 在 %s 到 %s 这段时间如数据', num,
                                   data_len, ts_code, date_from, date_to)
                    continue
                elif data_df is not None:
                    logger.info(
                        '%d/%d), 提取出%d 条 %s 的主营业务数据,类型为%s,起止时间为 %s 和 %s', num,
                        data_len, data_df.shape[0], ts_code, mainbz_type,
                        date_from, date_to)

                    # # 数据插入数据库
                    # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
                    # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                # 把数据攒起来
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)
                # 大于阀值有开始插入
                if data_count >= 100 and len(data_df_list) > 0:
                    data_df_all = pd.concat(data_df_list)
                    bunch_insert_on_duplicate_update(
                        data_df_all,
                        table_name,
                        engine_md,
                        DTYPE_TUSHARE_STOCK_FINA_MAINBZ,
                        myisam_if_create_table=True,
                        primary_keys=['ts_code', 'end_date', 'bz_item'],
                        schema=config.DB_SCHEMA_MD)
                    all_data_count += data_count
                    data_df_list, data_count = [], 0
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 2:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all,
                table_name,
                engine_md,
                DTYPE_TUSHARE_STOCK_FINA_MAINBZ,
                myisam_if_create_table=True,
                primary_keys=['ts_code', 'end_date', 'bz_item'],
                schema=config.DB_SCHEMA_MD)
            all_data_count = all_data_count + data_count
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Esempio n. 18
0
def import_tushare_stock_cashflow(ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_cashflow'
    logging.info("更新 %s 开始", table_name)
    sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info;"""
    logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    data_len = len(code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            df = invoke_cashflow(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        df['ann_date'].iloc[-1]), None
                    df2 = invoke_cashflow(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2) > 0:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning(
                                '%d/%d) %s has no data during %s %s', num,
                                data_len, ts_code, date_from, date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    elif len(df2) <= 0:
                        break
                # 数据插入数据库
                data_count = bunch_insert_on_duplicate_update(
                    data_df,
                    table_name,
                    engine_md,
                    DTYPE_TUSHARE_STOCK_CASHFLOW,
                    myisam_if_create_table=True,
                    primary_keys=['ts_code', 'ann_date'],
                    schema=config.DB_SCHEMA_MD)
                logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                data_df = []
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(
                data_df,
                table_name,
                engine_md,
                DTYPE_TUSHARE_STOCK_CASHFLOW,
                myisam_if_create_table=True,
                primary_keys=['ts_code', 'ann_date'],
                schema=config.DB_SCHEMA_MD)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
Esempio n. 19
0
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_daily_md'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
            (
            SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_stock_index_basic info 
            LEFT OUTER JOIN
                (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily
            ON info.ts_code = daily.ts_code
            ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, base_date date_frm, exp_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_index_basic info 
              ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into tushare_stock_index_daily_md',
                data_len)
    # 将data_df数据,添加到data_df_list
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_index_daily(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # data_df = df
            if data_df is not None and data_df.shape[0] > 0:
                while try_2_date(data_df['trade_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        data_df['trade_date'].iloc[-1]), None
                    df2 = invoke_index_daily(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(data_df['trade_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2 > 0):
                        last_date_in_df_cur = try_2_date(
                            df2['trade_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            # df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning(
                                '%d/%d) %s has no data during %s %s', num,
                                data_len, ts_code, date_from, date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    else:
                        break

                # 把数据攒起来
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break

            # 大于阀值有开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name=table_name,
                             dtype=DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD,
                             primary_keys=["ts_code", "trade_date"])
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD,
                                      primary_keys=["ts_code", "trade_date"])
            all_data_count += data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Esempio n. 20
0
def import_tushare_moneyflow_hsgt(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_moneyflow_hsgt'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ggt_ss', DOUBLE),
        ('ggt_sz', DOUBLE),
        ('hgt', DOUBLE),
        ('sgt', DOUBLE),
        ('north_money', DOUBLE),
        ('south_money', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE'  AND cal_date>='2014-11-17') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_moneyflow_hsgt(trade_date=trade_date)
            if len(data_df) > 0:
                data_count = bunch_insert_p(data_df,
                                            table_name=table_name,
                                            dtype=dtype,
                                            primary_keys=['trade_date'])
                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num,
                             trade_date_list_len, trade_date, table_name,
                             data_count)
            else:
                logging.info("无数据信息可被更新")
    except:
        logger.exception('更新 %s 表异常', table_name)
Esempio n. 21
0
def import_tushare_dividend(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_dividend'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily

    if has_table:
        sql_str = """
               select cal_date  ann_date          
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(ann_date) FROM  {table_name}))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date ann_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())

    #输出数据字段
    fields = 'ts_code,end_date,ann_date,div_proc,stk_div,stk_bo_rate,stk_co_rate,cash_div,cash_div_tax,\
           record_date,ex_date,pay_date,div_listdate,imp_ann_date,base_date,base_share'

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate)
    try:
        for i in range(len(trddate)):
            ann_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            data_df = invoke_dividend(ann_date=ann_date, fields=fields)
            logging.info(" %s 日 提取 %d 条分红送股信息", ann_date, len(data_df))
            # if len(data_df) > 0:
            #     data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_DIVIDEND)
            #     logging.info(" %s 表 %s 日 %d 条信息被更新", table_name, ann_date, data_count)
            # else:
            #     logging.info("无数据信息可被更新")

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break
            # 大于阀值开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name,
                                                 engine_md,
                                                 DTYPE_TUSHARE_DIVIDEND)
                all_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        # 导入残余数据到数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_TUSHARE_DIVIDEND)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST,
                CHANGE COLUMN `ann_date` `ann_date` DATE NOT NULL AFTER `ts_code`,
                ADD PRIMARY KEY (`ts_code`, `ann_date`)""".format(
                table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
def import_tushare_stock_top10_holders(ts_code_set=None, chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_top10_holders'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
               SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to
               FROM
               (
                   SELECT info.ts_code, ifnull(end_date, subdate(list_date,365*10)) date_frm, delist_date,
                   if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2
                   FROM 
                     tushare_stock_info info 
                   LEFT OUTER JOIN
                       (SELECT ts_code, adddate(max(ann_date),1) end_date 
                       FROM {table_name} GROUP BY ts_code) top10_holders
                   ON info.ts_code = top10_holders.ts_code
               ) tt
               WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) 
               ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
               SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to
               FROM
                 (
                   SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date,
                   if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2
                   FROM tushare_stock_info info 
                 ) tt
               WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) 
               ORDER BY ts_code """
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ts_code, date_from, date_to in table.fetchall() if
            ts_code_set is None or ts_code in ts_code_set}

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic)
    logger.info('%d stocks will been import into wind_stock_daily', data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to)
            data_df = invoke_top10_holders(
                ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            if data_df is not None and len(data_df) > 0 and data_df['ann_date'].iloc[-1] is not None:
                last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1])
                while last_date_in_df_last > date_from:
                    df2 = invoke_top10_holders(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(last_date_in_df_last - timedelta(days=1),
                                                STR_FORMAT_DATE_TS))
                    if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None:
                        last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur != last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1])
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break

                    elif len(df2) > 0 and df2['ann_date'].iloc[-1] is None:
                        last_date_in_df_cur = try_2_date(df2['end_date'].iloc[-1])
                        if last_date_in_df_cur != last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            last_date_in_df_last = try_2_date(data_df['end_date'].iloc[-1])
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                    else:
                        break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to)
            elif data_df is not None:
                logger.info('整体进度:%d/%d), %d 条 %s 前10股东被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code,
                            date_from, date_to)
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 500 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_TOP10_HOLDERS)
                all_data_count += data_count
                data_df_list, data_count = [], 0
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 25:
                break
    finally:
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,
                                                          DTYPE_TUSHARE_STOCK_TOP10_HOLDERS)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_audit(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_fin_audit'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('ts_code', String(20)),
        ('ann_date', Date),
        ('end_date', Date),
        ('audit_result', Text),
        ('audit_fees', DOUBLE),
        ('audit_agency', String(100)),
        ('audit_sign', String(100)),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, subdate(list_date,365*8)) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) fina_audit
                ON info.ts_code = fina_audit.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code DESC """
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ts_code, date_from, date_to in table.fetchall() if
            ts_code_set is None or ts_code in ts_code_set}
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    data_len = len(code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)

    data_df = pd.DataFrame()
    Cycles = 1
    try:
        for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to)
            df = invoke_fina_audit(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                                   end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(df['ann_date'].iloc[-1]), None
                    df2 = invoke_fina_audit(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                                            end_date=datetime_2_str(
                                                try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1),
                                                STR_FORMAT_DATE_TS))
                    if len(df2) > 0:
                        last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from,
                                           date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    elif len(df2) <= 0:
                        break
                # 数据插入数据库
                data_df_all = data_df
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, dtype,
                    myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date', 'end_date'], schema=config.DB_SCHEMA_MD)
                logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count)

            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df) > 0:
            data_df_all = data_df
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype,
                myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD)
            logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_fut_holding(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_future_holding'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_future_trade_cal trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         ) """.format(table_name=table_name)
    else:
        sql_str = """
                    SELECT cal_date FROM tushare_future_trade_cal trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND cal_date>'20020106') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_cal 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())
    #交易所列表
    exchange_list = ['DCE', 'CZCE', 'SHFE', 'CFFEX', 'INE']
    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate)
    logger.info('%d 日的每日成交持仓排名数据将被导入数据库', data_len)
    # 将data_df数据,添加到data_df_list

    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            for exchange in exchange_list:
                data_df = invoke_fut_holding(trade_date=trade_date,
                                             exchange=exchange)
                logging.info(" 提取%s  %s 日 %d 条每日成交持仓排名数据", exchange,
                             trade_date, data_df.shape[0])

                # 把数据攒起来
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 大于阀值有开始插入
                if data_count >= 20000:
                    data_df_all = pd.concat(data_df_list)
                    bunch_insert_on_duplicate_update(
                        data_df_all, table_name, engine_md,
                        DTYPE_TUSHARE_FUTURE_HOLDING)
                    logging.info(" 更新%s表%d条期货仓单数据", table_name, data_count)
                    all_data_count += data_count
                    data_df_list, data_count = [], 0

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md,
                DTYPE_TUSHARE_FUTURE_HOLDING)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条仓单信息被更新", table_name, all_data_count)
def import_tushare_stock_forecast(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_forecast'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) forecast
                ON info.ts_code = forecast.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, list_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code DESC """
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)
    # ts_code_set = None
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info(
        '%d stock balancesheets will been import into tushare_stock_forcast',
        data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_forecast(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))

            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, ts_code, date_from, date_to)
                continue
            elif data_df is not None:
                logger.info('整体进度:%d/%d), %d 条 %s 业绩预告数据被提取,起止时间为 %s 和 %s',
                            num, data_len, data_df.shape[0], ts_code,
                            date_from, date_to)

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 1000 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(
                    data_df_all,
                    table_name,
                    engine_md,
                    DTYPE_TUSHARE_STOCK_FORECAST,
                    myisam_if_create_table=True,
                    primary_keys=['ts_code', 'ann_date'],
                    schema=config.DB_SCHEMA_MD)
                logger.info('%d 条业绩预告数据被插入 %s 表', data_count, table_name)
                all_data_count += data_count
                data_df_list, data_count = [], 0
                # # 数据插入数据库
                # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md,
                #                                               DTYPE_TUSHARE_STOCK_BALABCESHEET)
                # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                # data_df = []
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all,
                table_name,
                engine_md,
                DTYPE_TUSHARE_STOCK_FORECAST,
                myisam_if_create_table=True,
                primary_keys=['ts_code', 'ann_date'],
                schema=config.DB_SCHEMA_MD)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条业绩预告信息被更新", table_name, all_data_count)
Esempio n. 26
0
def import_tushare_stock_balancesheet(ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_balancesheet'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('ts_code', String(20)),
        ('ann_date', Date),
        ('f_ann_date', Date),
        ('end_date', Date),
        ('report_type', DOUBLE),
        ('comp_type', DOUBLE),
        ('total_share', DOUBLE),
        ('cap_rese', DOUBLE),
        ('undistr_porfit', DOUBLE),
        ('surplus_rese', DOUBLE),
        ('special_rese', DOUBLE),
        ('money_cap', DOUBLE),
        ('trad_asset', DOUBLE),
        ('notes_receiv', DOUBLE),
        ('accounts_receiv', DOUBLE),
        ('oth_receiv', DOUBLE),
        ('prepayment', DOUBLE),
        ('div_receiv', DOUBLE),
        ('int_receiv', DOUBLE),
        ('inventories', DOUBLE),
        ('amor_exp', DOUBLE),
        ('nca_within_1y', DOUBLE),
        ('sett_rsrv', DOUBLE),
        ('loanto_oth_bank_fi', DOUBLE),
        ('premium_receiv', DOUBLE),
        ('reinsur_receiv', DOUBLE),
        ('reinsur_res_receiv', DOUBLE),
        ('pur_resale_fa', DOUBLE),
        ('oth_cur_assets', DOUBLE),
        ('total_cur_assets', DOUBLE),
        ('fa_avail_for_sale', DOUBLE),
        ('htm_invest', DOUBLE),
        ('lt_eqt_invest', DOUBLE),
        ('invest_real_estate', DOUBLE),
        ('time_deposits', DOUBLE),
        ('oth_assets', DOUBLE),
        ('lt_rec', DOUBLE),
        ('fix_assets', DOUBLE),
        ('cip', DOUBLE),
        ('const_materials', DOUBLE),
        ('fixed_assets_disp', DOUBLE),
        ('produc_bio_assets', DOUBLE),
        ('oil_and_gas_assets', DOUBLE),
        ('intan_assets', DOUBLE),
        ('r_and_d', DOUBLE),
        ('goodwill', DOUBLE),
        ('lt_amor_exp', DOUBLE),
        ('defer_tax_assets', DOUBLE),
        ('decr_in_disbur', DOUBLE),
        ('oth_nca', DOUBLE),
        ('total_nca', DOUBLE),
        ('cash_reser_cb', DOUBLE),
        ('depos_in_oth_bfi', DOUBLE),
        ('prec_metals', DOUBLE),
        ('deriv_assets', DOUBLE),
        ('rr_reins_une_prem', DOUBLE),
        ('rr_reins_outstd_cla', DOUBLE),
        ('rr_reins_lins_liab', DOUBLE),
        ('rr_reins_lthins_liab', DOUBLE),
        ('refund_depos', DOUBLE),
        ('ph_pledge_loans', DOUBLE),
        ('refund_cap_depos', DOUBLE),
        ('indep_acct_assets', DOUBLE),
        ('client_depos', DOUBLE),
        ('client_prov', DOUBLE),
        ('transac_seat_fee', DOUBLE),
        ('invest_as_receiv', DOUBLE),
        ('total_assets', DOUBLE),
        ('lt_borr', DOUBLE),
        ('st_borr', DOUBLE),
        ('cb_borr', DOUBLE),
        ('depos_ib_deposits', DOUBLE),
        ('loan_oth_bank', DOUBLE),
        ('trading_fl', DOUBLE),
        ('notes_payable', DOUBLE),
        ('acct_payable', DOUBLE),
        ('adv_receipts', DOUBLE),
        ('sold_for_repur_fa', DOUBLE),
        ('comm_payable', DOUBLE),
        ('payroll_payable', DOUBLE),
        ('taxes_payable', DOUBLE),
        ('int_payable', DOUBLE),
        ('oth_payable', DOUBLE),
        ('acc_exp', DOUBLE),
        ('deferred_inc', DOUBLE),
        ('st_bonds_payable', DOUBLE),
        ('payable_to_reinsurer', DOUBLE),
        ('rsrv_insur_cont', DOUBLE),
        ('acting_trading_sec', DOUBLE),
        ('acting_uw_sec', DOUBLE),
        ('non_cur_liab_due_1y', DOUBLE),
        ('oth_cur_liab', DOUBLE),
        ('total_cur_liab', DOUBLE),
        ('bond_payable', DOUBLE),
        ('lt_payable', DOUBLE),
        ('specific_payables', DOUBLE),
        ('estimated_liab', DOUBLE),
        ('defer_tax_liab', DOUBLE),
        ('defer_inc_non_cur_liab', DOUBLE),
        ('oth_ncl', DOUBLE),
        ('total_ncl', DOUBLE),
        ('depos_oth_bfi', DOUBLE),
        ('deriv_liab', DOUBLE),
        ('depos', DOUBLE),
        ('agency_bus_liab', DOUBLE),
        ('oth_liab', DOUBLE),
        ('prem_receiv_adva', DOUBLE),
        ('depos_received', DOUBLE),
        ('ph_invest', DOUBLE),
        ('reser_une_prem', DOUBLE),
        ('reser_outstd_claims', DOUBLE),
        ('reser_lins_liab', DOUBLE),
        ('reser_lthins_liab', DOUBLE),
        ('indept_acc_liab', DOUBLE),
        ('pledge_borr', DOUBLE),
        ('indem_payable', DOUBLE),
        ('policy_div_payable', DOUBLE),
        ('total_liab', DOUBLE),
        ('treasury_share', DOUBLE),
        ('ordin_risk_reser', DOUBLE),
        ('forex_differ', DOUBLE),
        ('invest_loss_unconf', DOUBLE),
        ('minority_int', DOUBLE),
        ('total_hldr_eqy_exc_min_int', DOUBLE),
        ('total_hldr_eqy_inc_min_int', DOUBLE),
        ('total_liab_hldr_eqy', DOUBLE),
        ('lt_payroll_payable', DOUBLE),
        ('oth_comp_income', DOUBLE),
        ('oth_eqt_tools', DOUBLE),
        ('oth_eqt_tools_p_shr', DOUBLE),
        ('lending_funds', DOUBLE),
        ('acc_receivable', DOUBLE),
        ('st_fin_payable', DOUBLE),
        ('payables', DOUBLE),
        ('hfs_assets', DOUBLE),
        ('hfs_sales', DOUBLE),

    ]

    # 进行表格判断,确定是否含有tushare_stock_daily
    sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info"""
    logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name)
    # ts_code_set = None
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ts_code, date_from, date_to in table.fetchall() if
            ts_code_set is None or ts_code in ts_code_set}
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    # dtype['ts_code'] = String(20)
    # dtype['trade_date'] = Date

    data_len = len(code_date_range_dic)
    logger.info('%d stocks will been import into wind_stock_daily', data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to)
            df = invoke_balancesheet(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                                     end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(df['ann_date'].iloc[-1]), None
                    df2 = invoke_balancesheet(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                                              end_date=datetime_2_str(
                                                  try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1),
                                                  STR_FORMAT_DATE_TS))
                    if len(df2) > 0:
                        last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from,
                                           date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code, date_from, date_to)
                    elif len(df2) <= 0:
                        break
                # 数据插入数据库
                data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
                logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                data_df = []
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md, dtype,
                myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'],
                schema=config.DB_SCHEMA_MD)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_tmt_twincome(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_tmt_twincome'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm start_date, end_date
            FROM
            (
            SELECT info.ts_code, ifnull(date, start_date) date_frm, 
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_tmt_twincome_info info 
            LEFT OUTER JOIN
                (SELECT item, adddate(max(date),1) date FROM {table_name} GROUP BY item ) income
            ON info.ts_code = income.item
            ) tt
            order by ts_code""".format(table_name=table_name)
    else:
        sql_str = """SELECT ts_code, start_date ,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date 
            FROM tushare_tmt_twincome_info info """
        logger.warning('%s 不存在,仅使用 tushare_tmt_twincome_info 表进行计算日期范围', table_name)


    # ts_code_set = None
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time,ts_code_set = None,None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ts_code, date_from, date_to in table.fetchall() if
            ts_code_set is None or ts_code in ts_code_set}

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic)
    logger.info('%d Taiwan TMT information will been import into tushare_tmt_twincome', data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (start_date, end_date)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, start_date, end_date)
            data_df = invoke_tmt_twincome(item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS),
                                     end_date=datetime_2_str(end_date, STR_FORMAT_DATE_TS))
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, start_date, date_to)
            if len(data_df) > 0 and data_df['date'] is not None:
                while try_2_date(data_df['date'].iloc[-1]) > try_2_date(start_date):
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(data_df['date'].iloc[-1]), None
                    df2 = invoke_tmt_twincome(item=ts_code,
                                              start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS),
                                              end_date=datetime_2_str(try_2_date(data_df['date'].iloc[-1]) - timedelta(days=1),STR_FORMAT_DATE_TS))
                    if len(df2) > 0 and df2['date'] is not None:
                        last_date_in_df_cur = try_2_date(df2['date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])

                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, start_date,
                                           end_date)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code,
                                    start_date, end_date)
                    elif len(df2) <= 0:
                        break
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME)
                all_data_count += data_count
                data_df_list, data_count = [], 0


    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Esempio n. 28
0
def import_tushare_stock_cashflow(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_cashflow'
    primary_keys = ['ts_code', 'ann_date', 'end_date']
    logging.info("更新 %s 开始", table_name)
    check_sqlite_db_primary_keys(table_name, primary_keys)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) cashflow
                ON info.ts_code = cashflow.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, list_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            df = invoke_cashflow(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        df['ann_date'].iloc[-1]), None
                    df2 = invoke_cashflow(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2) > 0:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                    elif len(df2) <= 0:
                        break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, ts_code, date_from, date_to)
                continue
            elif data_df is not None:
                logger.info('%d/%d) %d 条 %s 的现金流被提取,起止时间为 %s 和 %s', num,
                            data_len, data_df.shape[0], ts_code, date_from,
                            date_to)

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name=table_name,
                             dtype=DTYPE_TUSHARE_CASHFLOW,
                             primary_keys=primary_keys)

                logger.info('%d 条现金流数据已插入 %s 表', data_count, table_name)
                all_data_count += data_count
                data_df_list, data_count = [], 0
                # # 数据插入数据库
                # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_CASHFLOW)
                # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)

            # 仅调试使用
            cycles = cycles + 1
            if DEBUG and cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_CASHFLOW,
                                      primary_keys=primary_keys)

            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_block_trade(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_block_trade'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('price', DOUBLE),
        ('vol', DOUBLE),
        ('amount', DOUBLE),
        ('buyer', String(100)),
        ('seller', String(100)),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有 table_name

    if has_table:
        sql_str = f"""select cal_date            
                 FROM
                  (
                   select * from tushare_trade_date trddate 
                   where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                 )tt
                 where (is_open=1 
                        and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                        and exchange='SSE') """
    else:
        # 2003-08-02 大宗交易制度开始实施
        sql_str = """SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
                  AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  AND exchange='SSE'  AND cal_date>='2003-08-02') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_block_trade(trade_date=trade_date)
            if len(data_df) > 0:
                # 当前表不设置主键,由于存在重复记录,因此无法设置主键
                # 例如:002325.SZ 2014-11-17 华泰证券股份有限公司沈阳光荣街证券营业部 两笔完全相同的大宗交易
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=dtype)
                logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新",
                             num, trade_date_list_len, trade_date, table_name, data_count)
            else:
                logging.info("%d/%d) %s 无数据信息可被更新", num, trade_date_list_len, trade_date)
    except:
        logger.exception('更新 %s 表异常', table_name)
Esempio n. 30
0
def import_tushare_stock_income(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_income'
    logging.info("更新 %s 开始", table_name)
    # wind_indictor_str = ",".join([key for key, _ in param_list])
    # rename_col_dic = {key.upper(): key.lower() for key, _ in param_list}
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, subdate(list_date,365*10)) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) income
                ON info.ts_code = income.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code DESC """
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into wind_stock_daily', data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            df = invoke_income(ts_code=ts_code,
                               start_date=datetime_2_str(
                                   date_from, STR_FORMAT_DATE_TS),
                               end_date=datetime_2_str(date_to,
                                                       STR_FORMAT_DATE_TS))
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        df['ann_date'].iloc[-1]), None
                    df2 = invoke_income(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if df2 is not None and df2.shape[0] > 0:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                    elif df2 is None or df2.shape[0] <= 0:
                        break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, ts_code, date_from, date_to)
                continue
            elif data_df is not None:
                logger.info('%d/%d), %d 条 %s 的利润表数据被提取,起止时间为 %s 和 %s', num,
                            data_len, data_df.shape[0], ts_code, date_from,
                            date_to)
                # # 数据插入数据库
                # data_df_all = data_df
                # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,
                #                                               DTYPE_TUSHARE_STOCK_INCOME)
                # logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count)

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(
                    data_df_all,
                    table_name,
                    engine_md,
                    DTYPE_TUSHARE_STOCK_INCOME,
                    myisam_if_create_table=True,
                    primary_keys=['ts_code', 'ann_date', 'end_date'],
                    schema=config.DB_SCHEMA_MD)
                logger.info('%d 条财务指标将数据插入 %s 表', data_count, table_name)
                all_data_count += data_count
                data_df_list, data_count = [], 0
            # 仅调试使用
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all,
                table_name,
                engine_md,
                DTYPE_TUSHARE_STOCK_INCOME,
                myisam_if_create_table=True,
                primary_keys=['ts_code', 'ann_date', 'end_date'],
                schema=config.DB_SCHEMA_MD)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)