Ejemplo n.º 1
0
def import_tushare_adj_factor(chain_param=None, ):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_adj_factor'
    primary_keys = ["ts_code", "trade_date"]
    logging.info("更新 %s 开始", table_name)
    # 进行表格判断,确定是否含有 table_name
    has_table = engine_md.has_table(table_name)
    # sqlite_file_name = 'eDB_adjfactor.db'
    check_sqlite_db_primary_keys(table_name, primary_keys)

    if has_table:
        sql_str = """
           select cal_date            
           FROM
            (
             select * from tushare_trade_date trddate 
             where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
           )tt
           where (is_open=1 
                  and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
           SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = [row[0] for row in table.fetchall()]

    trade_date_count, data_count_tot = len(trade_date_list), 0
    try:
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = pro.adj_factor(ts_code='', trade_date=trade_date)
            if data_df is not None and data_df.shape[0] > 0:
                data_count = bunch_insert(
                    data_df,
                    table_name=table_name,
                    dtype=DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR,
                    primary_keys=primary_keys)
                data_count_tot += data_count

                logging.info("%d/%d) %s 表 %s %d 条信息被更新", num, trade_date_count,
                             table_name, trade_date, data_count)
            else:
                logging.info("%d/%d) %s 表 %s 数据信息可被更新", num, trade_date_count,
                             table_name, trade_date)
    except:
        logger.exception("更新 %s 异常", table_name)
    finally:
        logging.info("%s 表 %d 条记录更新完成", table_name, data_count_tot)
Ejemplo n.º 2
0
def import_tushare_daily_basic(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_basic'
    primary_keys = ["ts_code", "trade_date"]
    logging.info("更新 %s 开始", table_name)
    check_sqlite_db_primary_keys(table_name, primary_keys)
    has_table = engine_md.has_table(table_name)
    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!
    # 比如本表是 tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM {table_name} ))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    try:
        for_count = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_daily_basic(ts_code='', trade_date=trade_date)
            if data_df is not None and data_df.shape[0] > 0:
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_BASIC,
                    primary_keys=primary_keys)

                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, for_count, trade_date, table_name, data_count)
            else:
                logging.info("%d/%d) %s 无数据信息可被更新",  num, for_count, trade_date)
    except:
        logger.exception("更新 %s 表异常", table_name)
Ejemplo n.º 3
0
def import_tushare_stock_daily(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_md'
    primary_keys = ["ts_code", "trade_date"]
    check_sqlite_db_primary_keys(table_name, primary_keys)
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
            SELECT info.ts_code, ifnull(trade_date, list_date) date_frm, delist_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_stock_info info 
            LEFT OUTER JOIN
                (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily
            ON info.ts_code = daily.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, list_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    sql_trade_date_str = """
           SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
        AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
        AND exchange='SSE') ORDER BY cal_date"""

    with with_db_session(engine_md) as session:
        table = session.execute(sql_trade_date_str)
        trade_date_list = [row[0] for row in table.fetchall()]
        trade_date_list.sort()
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ts_code, date_from, date_to in table.fetchall() if
            ts_code_set is None or ts_code in ts_code_set}

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    try:
        for num, (ts_code, (date_from_tmp, date_to_tmp)) in enumerate(code_date_range_dic.items(), start=1):
            date_from = get_first(trade_date_list, lambda x: x >= date_from_tmp)
            date_to = get_last(trade_date_list, lambda x: x <= date_to_tmp)
            if date_from is None or date_to is None or date_from > date_to:
                logger.debug('%d/%d) %s [%s - %s] 跳过', num, data_len, ts_code, date_from, date_to)
                continue
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to)
            data_df = invoke_daily(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                                   end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # data_df = df
            if len(data_df) > 0:
                while try_2_date(data_df['trade_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(data_df['trade_date'].iloc[-1]), None
                    df2 = invoke_daily(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                                       end_date=datetime_2_str(
                                           try_2_date(data_df['trade_date'].iloc[-1]) - timedelta(days=1),
                                           STR_FORMAT_DATE_TS))
                    if df2 is not None and len(df2) > 0:
                        last_date_in_df_cur = try_2_date(df2['trade_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            # df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from,
                                           date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    else:
                        break
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert(
                    data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_MD,
                    primary_keys=primary_keys)
                all_data_count += data_count
                data_df_list, data_count = [], 0

            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(
                data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_MD,
                primary_keys=primary_keys)

            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Ejemplo n.º 4
0
def import_tushare_stock_cashflow(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_cashflow'
    primary_keys = ['ts_code', 'ann_date', 'end_date']
    logging.info("更新 %s 开始", table_name)
    check_sqlite_db_primary_keys(table_name, primary_keys)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                  tushare_stock_info info 
                LEFT OUTER JOIN
                    (SELECT ts_code, adddate(max(ann_date),1) ann_date 
                    FROM {table_name} GROUP BY ts_code) cashflow
                ON info.ts_code = cashflow.ts_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, list_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d data will been import into %s', data_len, table_name)
    # 将data_df数据,添加到data_df_list

    cycles = 1
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            df = invoke_cashflow(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to)
            data_df = df
            if data_df is not None and len(data_df) > 0:
                while try_2_date(df['ann_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        df['ann_date'].iloc[-1]), None
                    df2 = invoke_cashflow(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(df['ann_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2) > 0:
                        last_date_in_df_cur = try_2_date(
                            df2['ann_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                    elif len(df2) <= 0:
                        break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, ts_code, date_from, date_to)
                continue
            elif data_df is not None:
                logger.info('%d/%d) %d 条 %s 的现金流被提取,起止时间为 %s 和 %s', num,
                            data_len, data_df.shape[0], ts_code, date_from,
                            date_to)

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000 and len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)
                bunch_insert(data_df_all,
                             table_name=table_name,
                             dtype=DTYPE_TUSHARE_CASHFLOW,
                             primary_keys=primary_keys)

                logger.info('%d 条现金流数据已插入 %s 表', data_count, table_name)
                all_data_count += data_count
                data_df_list, data_count = [], 0
                # # 数据插入数据库
                # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_CASHFLOW)
                # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)

            # 仅调试使用
            cycles = cycles + 1
            if DEBUG and cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert(data_df_all,
                                      table_name=table_name,
                                      dtype=DTYPE_TUSHARE_CASHFLOW,
                                      primary_keys=primary_keys)

            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)