Ejemplo n.º 1
0
def import_tushare_stock_info(chain_param=None, refresh=False):
    """ 获取全市场股票代码及名称
    """
    table_name = 'tushare_stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    wind_indicator_param_list = [
        ('ts_code', String(20)),
        ('symbol', DOUBLE),
        ('list_date', Date),
        ('delist_date', Date),
        ('name', String(30)),
        ('fullname', String(100)),
        ('enname', String(200)),
        ('exchange_id', String(30)),
        ('list_status', String(10)),
        ('is_hs', String(10)),
    ]
    #     # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng"
    param = ",".join([key for key, _ in wind_indicator_param_list])
    # 设置 dtype
    dtype = {key: val for key, val in wind_indicator_param_list}
    dtype['ts_code'] = String(20)

    # 数据提取

    stock_info_all_df = pro.stock_basic(exchange_id='',
                                        fields='ts_code,symbol,name,fullname,enname,exchange_id,curr_type,list_date,list_status,delist_date,is_hs')

    logging.info('%s stock data will be import', stock_info_all_df.shape[0])
    data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])
Ejemplo n.º 2
0
def import_coin_info():
    """获取全球交易币基本信息"""
    table_name = 'tushare_coin_info'
    has_table = engine_md.has_table(table_name)
    # 设置 dtype
    dtype = {
        'coin': String(60),
        'en_name': String(60),
        'cn_name': String(60),
        'issue_date': Date,
        'amount': DOUBLE,
    }
    coinlist_df = pro.coinlist(start_date='20170101',
                               end_date=date_2_str(date.today(),
                                                   DATE_FORMAT_STR))
    data_count = bunch_insert_on_duplicate_update(coinlist_df, table_name,
                                                  engine_md, dtype)
    logging.info("更新 %s 完成 新增数据 %d 条", table_name, data_count)

    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        create_pk_str = """ALTER TABLE {table_name}
            CHANGE COLUMN `coin` `coin` VARCHAR(60) NOT NULL FIRST,
            CHANGE COLUMN `en_name` `en_name` VARCHAR(60) NOT NULL AFTER `coin`,
            ADD PRIMARY KEY (`coin`, `en_name`)""".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            session.execute(create_pk_str)
def import_tushare_index_basic(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_basic'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)

    fields = 'ts_code', 'name', 'fullname', 'market', 'publisher', 'index_type', 'category', 'base_date', 'base_point', 'list_date', 'weight_rule', 'desc', 'exp_date'
    market_list = list(
        ['MSCI', 'CSI', 'SSE', 'SZSE', 'CICC', 'SW', 'CNI', 'OTH'])

    try:
        for mkt in market_list:
            # trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            data_df = invoke_index_basic(market=mkt, fields=fields)
            if len(data_df) > 0:
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md,
                    DTYPE_TUSHARE_STOCK_INDEX_BASIC)
                logging.info("%s更新 %s 结束 %d 条信息被更新", mkt, table_name,
                             data_count)
            else:
                logging.info("无数据信息可被更新")
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
Ejemplo n.º 4
0
def import_macroeconomy_info(chain_param=None):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = 'wind_macroeconomy_info'
    has_table = engine_md.has_table(table_name)
    indicators_dic = [
        # 人民币汇率
        ["M0067855", "us2rmb", "美元兑人民币即期汇率", "1994-01-04", None, '中国货币网'],
    ]
    dtype = {
        'key': String(20),
        'en_name': String(120),
        'cn_name': String(120),
        'begin_date': Date,
        'end_date': Date,
        'remark': Text,
    }
    name_list = [
        'key', 'en_name', 'cn_name', 'begin_date', 'end_date', 'remark'
    ]
    info_df = pd.DataFrame(data=indicators_dic, columns=name_list)
    data_count = bunch_insert_on_duplicate_update(info_df, table_name,
                                                  engine_md, dtype)
    logger.info('%d 条记录被更新', data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        create_pk_str = """ALTER TABLE {table_name}
            CHANGE COLUMN `key` `key` VARCHAR(20) NOT NULL FIRST,
            ADD PRIMARY KEY (`key`)""".format(table_name=table_name)
        with with_db_session(engine_md) as session:
            session.execute(create_pk_str)
        logger.info('%s 表 `key` 主键设置完成', table_name)
Ejemplo n.º 5
0
def insert_into_db(data_df_list, engine_md):
    data_count = len(data_df_list)
    table_name = 'wind_stock_tick'
    has_table = engine_md.has_table(table_name)
    param_list = [
        ('datetime', DateTime),
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('ask1', DOUBLE),
        ('bid1', DOUBLE),
        ('asize1', DOUBLE),
        ('bsize1', DOUBLE),
        ('volume', DOUBLE),
        ('amount', DOUBLE),
        ('preclose', DOUBLE),
    ]
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    if data_count > 0:
        data_df_all = pd.concat(data_df_list)
        data_df_all.index.rename('datetime', inplace=True)
        data_df_all.reset_index(inplace=True)
        bunch_insert_on_duplicate_update(data_df_all,
                                         table_name,
                                         engine_md,
                                         dtype=dtype)
        logger.info('%d data imported', data_df_all.shape[0])
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

    return data_count
Ejemplo n.º 6
0
def import_tushare_adj_factor(chain_param=None, ):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_adj_factor'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily

    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())

    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            data_df = pro.adj_factor(ts_code='', trade_date=trade_date)
            if len(data_df) > 0:
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md,
                    DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR)
                logging.info(" %s 表 %s 日 %d 条信息被更新", table_name, trade_date,
                             data_count)
            else:
                logging.info("无数据信息可被更新")
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST,
                CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `ts_code`,
                ADD PRIMARY KEY (`ts_code`, `trade_date`)""".format(
                table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
Ejemplo n.º 7
0
def import_tushare_stock_fund_holdings():
    table_name = 'tushare_stock_fund_holdings'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    tushare_fund_holdings_indicator_param_list = [
        ('ts_code', String(20)),
        ('sec_name', String(20)),
        ('end_date', Date),
        ('nums', DOUBLE),
        ('nlast', DOUBLE),
        ('count', DOUBLE),
        ('clast', DOUBLE),
        ('amount', DOUBLE),
        ('ratio', DOUBLE),
    ]
    tushare_fund_holdings_dtype = {key: val for key, val in tushare_fund_holdings_indicator_param_list}
    data_df_list, data_count, all_data_count, = [], 0, 0
    years = list(range(2013, 2019))
    try:
        for year in years:
            for quarter in list([1, 2, 3, 4]):
                print((year, quarter))
                data_df = invoke_fund_holdings(year, quarter)
                ts_code_list = []
                for i in data_df.code:
                    if i[0] == '6':
                        sh = i + '.SH'
                        ts_code_list.append(sh)
                    else:
                        sz = i + '.SZ'
                        ts_code_list.append(sz)
                data_df.code = ts_code_list
                data_df = data_df.rename(columns={'code': 'ts_code', 'name': 'sec_name', 'date': 'end_date'})
                # 把数据攒起来
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)
                # 大于阀值有开始插入
                if data_count >= 50:
                    data_df_all = pd.concat(data_df_list)
                    bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, tushare_fund_holdings_dtype)
                    all_data_count += data_count
                    data_df_list, data_count = [], 0
    finally:
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,
                                                          tushare_fund_holdings_dtype)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Ejemplo n.º 8
0
def import_coin_info(chain_param=None, ):
    """插入基础信息数据到 cmc_coin_v1_info"""
    table_name = "cmc_coin_v1_info"
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # url = 'https://api.coinmarketcap.com/v2/listings/'
    # dtype = {
    #     'id': String(60),
    #     'name': String(60),
    #     'symbol': String(20),
    #     'website_slug': String(60),
    # }

    url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0'
    dtype = {
        'id': String(60),
        'name': String(60),
        'symbol': String(20),
        'rank': Integer,
        'price_usd': DOUBLE,
        'price_btc': DOUBLE,
        '24h_volume_usd': DOUBLE,
        'market_cap_usd': DOUBLE,
        'available_supply': DOUBLE,
        'total_supply': DOUBLE,
        'max_supply': DOUBLE,
        'percent_change_1h': DOUBLE,
        'percent_change_24h': DOUBLE,
        'percent_change_7d': DOUBLE,
        'last_updated': DATETIME,
    }
    rsp = requests.get(url)
    if rsp.status_code != 200:
        raise ValueError('请求 listings 相应失败')
    json = rsp.json()
    data_df = pd.DataFrame(json)
    data_df['last_updated'] = data_df['last_updated'].apply(
        lambda x: None
        if x is None else datetime.datetime.fromtimestamp(float(x)))
    data_count = bunch_insert_on_duplicate_update(data_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        # build_primary_key([table_name])
        create_pk_str = """ALTER TABLE {table_name}
        CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
        ADD PRIMARY KEY (`id`)""".format(table_name=table_name)
        with with_db_session(engine_md) as session:
            session.execute(create_pk_str)
Ejemplo n.º 9
0
def import_index_info(chain_param=None, ths_code=None):
    """
    导入 info 表
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code:
    :param refresh:
    :return:
    """
    table_name = 'ifind_index_info'
    has_table = engine_md.has_table(table_name)
    logging.info("更新 ifind_index_info 开始")
    if ths_code is None:
        # 获取全市场股票代码及名称
        date_end = date.today()
        stock_code_set = set()
        stock_code_set_sub = get_stock_code_set(date_end)
        if stock_code_set_sub is not None:
            stock_code_set |= stock_code_set_sub

        ths_code = ','.join(stock_code_set)

    indicator_param_list = [
        ('ths_index_short_name_index', '', String(20)),
        ('ths_index_code_index', '', String(10)),
        ('ths_index_category_index', '', String(20)),
        ('ths_index_base_period_index', '', Date),
        ('ths_index_base_point_index', '', DOUBLE),
        ('ths_publish_org_index', '', String(20)),
    ]
    # indicator' = 'ths_index_short_name_index;ths_index_code_index;ths_thscode_index;ths_index_category_index;
    # ths_index_base_period_index;ths_index_base_point_index;ths_publish_org_index',
    # param = ';;;;;;'
    indicator, param = unzip_join([(key, val)
                                   for key, val, _ in indicator_param_list],
                                  sep=';')
    data_df = invoker.THS_BasicData(ths_code, indicator, param)
    if data_df is None or data_df.shape[0] == 0:
        logging.info("没有可用的 index info 可以更新")
        return

    dtype = {key: val for key, _, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
Ejemplo n.º 10
0
def init(alter_table=False):
    # 创建表
    Base.metadata.create_all(engine_md)
    logger.info("所有表结构建立完成")

    if alter_table:
        alter_table_2_myisam(engine_md)

    table_name_list = engine_md.table_names()
    build_primary_key(table_name_list)
    logger.info("所有表结构调整完成")

    for table_name in table_name_list:
        TABLE_MODEL_DIC[table_name] = Table(table_name, Base.metadata, autoload=True)

    logger.info("所有表Model动态加载完成")
Ejemplo n.º 11
0
def import_index_info(wind_codes, chain_param=None):
    """
    导入指数信息
    :param wind_codes: 
    :return: 
    """
    table_name = 'wind_index_info'
    has_table = engine_md.has_table(table_name)
    col_name_param_list = [
        ('LAUNCHDATE', Date),
        ('BASEDATE', Date),
        ('BASEVALUE', DOUBLE),
        ('COUNTRY', String(20)),
        ('CRM_ISSUER', String(20)),
        ('SEC_NAME', String(20)),
    ]
    col_name_param = ",".join([key.lower() for key, _ in col_name_param_list])
    col_name_param_dic = {col_name.upper(): col_name.lower() for col_name, _ in col_name_param_list}
    # 设置dtype类型
    dtype = {key.lower(): val for key, val in col_name_param_list}
    dtype['wind_code'] = String(20)

    info_df = invoker.wss(wind_codes, col_name_param)
    if info_df is None or info_df.shape[0] == 0:
        logger.warning("没有数据可导入")
        return
    info_df.rename(columns=col_name_param_dic, inplace=True)
    info_df.index.rename("wind_code", inplace=True)
    info_df.reset_index(inplace=True)
    bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype)
    # info_df.to_sql(table_name, engine_md, if_exists='append', index=True,
    #                 dtype={
    #                 'wind_code': String(20),
    #                 'null': Date,
    #                 'basedate': Date,
    #                 'basevalue': DOUBLE,
    #                 'country': String(20),
    #                 'crm_issuer': String(20),
    #                 'sec_name': String(20),
    #                 })
    logger.info('%d 条指数信息导入成功\n%s', info_df.shape[0], info_df)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
Ejemplo n.º 12
0
def import_tushare_namechange(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_namechange'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """select max(start_date) start_date   FROM md_integration.tushare_stock_namechange"""

    else:
        sql_str = """select min(list_date) start_date   FROM md_integration.tushare_stock_info"""

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        start_date = list(row[0] for row in table.fetchall())
        start_date = datetime_2_str(start_date[0], STR_FORMAT_DATE_TS)
        end_date = datetime_2_str(date.today(), STR_FORMAT_DATE_TS)

    try:
        data_df = pro.namechange(
            start_date=start_date,
            end_date=end_date,
            fields='ts_code,name,start_date,end_date,change_reason')
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_NAMECHANGE)
            logging.info("更新 %s 结束 %d 条上市公司更名信息被更新", table_name, data_count)
        else:
            logging.info("无数据信息可被更新")
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST,
                CHANGE COLUMN `start_date` `start_date` DATE NOT NULL AFTER `ts_code`,
                ADD PRIMARY KEY (`ts_code`, `start_date`)""".format(
                table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表 `ts_code`, `start_date` 主键设置完成', table_name)
Ejemplo n.º 13
0
def import_tushare_stock_index_weight(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_weight'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily

    sql_str = """SELECT ts_code index_code,trade_date trade_date_list FROM md_integration.tushare_stock_index_daily_md """
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {}
        # for ts_code, trade_date in table.fetchall():
        #     code_date_range_dic.setdefault(ts_code, []).append(trade_date)
        for index_code, trade_date_list in table.fetchall():
            code_date_range_dic.setdefault(index_code,
                                           []).append(trade_date_list)

    data_len = len(code_date_range_dic)
    logger.info(
        '%d index weight will been import into tushare_stock_index_weight table',
        data_len)
    # 将data_df数据,添加到data_df_list
    Cycles = 1
    try:
        for num, (index_code,
                  trade_date_list) in enumerate(code_date_range_dic.items(),
                                                start=1):
            trade_date_list_len = len(trade_date_list)
            for i, trade_date in enumerate(trade_date_list):
                # trade_date=trade_date_list[i]
                logger.debug('%d/%d) %d/%d) %s [%s]', num, data_len, i,
                             trade_date_list_len, index_code, trade_date)
                data_df = invoke_index_weight(index_code=index_code,
                                              trade_date=trade_date)
                if len(data_df) > 0:
                    data_count = bunch_insert_on_duplicate_update(
                        data_df, table_name, engine_md,
                        DTYPE_TUSHARE_STOCK_INDEX_WEIGHT)
                    logging.info("%s 更新 %s  %d 条信息被更新", trade_date, table_name,
                                 data_count)
                else:
                    break
            Cycles = Cycles + 1
            if DEBUG and Cycles > 10:
                break
    finally:
        # 导入数据库
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md,
                DTYPE_TUSHARE_STOCK_INDEX_WEIGHT)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
def import_tushare_hsgt_top10(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_hsgt_top10'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('name', String(20)),
        ('close', DOUBLE),
        ('change', DOUBLE),
        ('rank', Integer),
        ('market_type', String(20)),
        ('amount', DOUBLE),
        ('net_amount', DOUBLE),
        ('buy', DOUBLE),
        ('sell', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE'  AND cal_date>='2014-11-17') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            for market_type in list(['1', '3']):
                data_df = invoke_hsgt_top10(trade_date=trade_date, market_type=market_type)
                if len(data_df) > 0:
                    data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
                    logging.info("%s更新 %s 结束 %d 条信息被更新", trade_date, table_name, data_count)
                else:
                    logging.info("无数据信息可被更新")
                    break
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `trade_date` `trade_date` VARCHAR(20) NOT NULL FIRST,
                ADD PRIMARY KEY (`trade_date`)""".format(table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表  `trade_date` 主键设置完成', table_name)
Ejemplo n.º 15
0
def import_coin_latest(chain_param=None, ):
    """插入最新价格数据到 cmc_coin_pro_latest """
    table_name = 'cmc_coin_pro_latest'
    has_table = engine_md.has_table(table_name)
    # 设置 dtype
    dtype = {
        'id': Integer,
        'name': String(60),
        'slug': String(60),
        'symbol': String(20),
        'date_added': DATETIME,
        'last_updated': DATETIME,
        'market_cap': DOUBLE,
        'circulating_supply': DOUBLE,
        'max_supply': DOUBLE,
        'num_market_pairs': DOUBLE,
        'percent_change_1h': DOUBLE,
        'percent_change_24h': DOUBLE,
        'percent_change_7d': DOUBLE,
        'price': DOUBLE,
        'total_supply': DOUBLE,
        'volume_24h': DOUBLE,
        'cmc_rank': DOUBLE,
    }

    header = {
        'Content-Type': 'application/json',
        'X-CMC_PRO_API_KEY': config.CMC_PRO_API_KEY
    }
    params = {
        # 'CMC_PRO_API_KEY': config.CMC_PRO_API_KEY,
        'limit': 5000,
        'start': 1
    }
    # https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?sort=market_cap&start=0&limit=10&cryptocurrency_type=tokens&convert=USD,BTC
    url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest"
    rsp = requests.get(url=url, params=params, headers=header)
    if rsp.status_code != 200:
        logger.error('获取数据异常[%d] %s', rsp.status_code, rsp.content)
        return
    ret_dic = rsp.json()
    data_list = ret_dic['data']

    data_dic_list = []
    for dic in data_list:
        data_dic = {}
        for key, val in dic.items():
            if key == 'quote':
                for sub_key, sub_val in val['USD'].items():
                    data_dic[sub_key] = sub_val
            else:
                data_dic[key] = val
        data_dic_list.append(data_dic)

    data_df = pd.DataFrame(data_dic_list)
    # 数据整理
    data_df['date_added'] = data_df['date_added'].apply(
        lambda x: str_2_datetime(x, DATETIME_FORMAT_STR))
    data_df['last_updated'] = data_df['last_updated'].apply(
        lambda x: str_2_datetime(x, DATETIME_FORMAT_STR))
    data_count = bunch_insert_on_duplicate_update(data_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        # build_primary_key([table_name])
        create_pk_str = """ALTER TABLE {table_name}
        CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
        CHANGE COLUMN `last_updated` `last_updated` DATETIME NOT NULL AFTER `id`,
        ADD PRIMARY KEY (`id`, `last_updated`)""".format(table_name=table_name)
        execute_sql(engine_md, create_pk_str)
Ejemplo n.º 16
0
def merge_stock_info():
    """
    合并 wind,ifind 数据到对应名称的表中
    :return:
    """
    table_name = 'stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    # ifind_model = TABLE_MODEL_DIC[ifind_table_name]
    # wind_model = TABLE_MODEL_DIC[wind_table_name]
    # with with_db_session(engine_md) as session:
    #     session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
    ifind_sql_str = "select * from {table_name}".format(
        table_name=ifind_table_name)
    wind_sql_str = "select * from {table_name}".format(
        table_name=wind_table_name)
    ifind_df = pd.read_sql(ifind_sql_str, engine_md)  # , index_col='ths_code'
    wind_df = pd.read_sql(wind_sql_str, engine_md)  # , index_col='wind_code'
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on='ths_code',
                         right_on='wind_code',
                         indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'sec_name': (String(20), prefer_left, {
            'left_key': 'ths_stock_short_name_stock',
            'right_key': 'sec_name'
        }),
        'cn_name': (String(100), get_value, {
            'key': 'ths_corp_cn_name_stock'
        }),
        'en_name': (String(100), get_value, {
            'key': 'ths_corp_name_en_stock'
        }),
        'delist_date': (Date, prefer_left, {
            'left_key': 'ths_delist_date_stock',
            'right_key': 'delist_date'
        }),
        'ipo_date': (Date, prefer_left, {
            'left_key': 'ths_ipo_date_stock',
            'right_key': 'ipo_date'
        }),
        'pre_name': (Text, prefer_left, {
            'left_key': 'ths_corp_name_en_stock',
            'right_key': 'prename'
        }),
        'established_date': (Date, get_value, {
            'key': 'ths_established_date_stock'
        }),
        'exch_city': (String(20), get_value, {
            'key': 'exch_city'
        }),
        'exch_cn': (String(20), get_value, {
            'key': 'ths_listing_exchange_stock'
        }),
        'exch_eng': (String(20), get_value, {
            'key': 'exch_eng'
        }),
        'stock_code': (String(20), prefer_left, {
            'left_key': 'ths_stock_code_stock',
            'right_key': 'trade_code'
        }),
        'mkt': (String(20), get_value, {
            'key': 'mkt'
        }),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df
Ejemplo n.º 17
0
def add_data_2_ckdvp(json_indicator,
                     json_param,
                     ths_code_set: set = None,
                     begin_time=None):
    """
    将数据增量保存到 ifind_ckdvp_index 表,code key date value param 五个字段组合成的表 value 为 Varchar(80)
    该表用于存放各种新增加字段的值
    查询语句举例:
    THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31')
    :param json_indicator:
    :param json_param:
    :param ths_code_set:
    :param begin_time:
    :return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断
    """
    all_finished = False
    table_name = 'ifind_ckdvp_index'
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
            select ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                select info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                from 
                    ifind_index_info info 
                left outer join
                    (select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name} 
                        where {table_name}.key='{0}' and param='{1}' group by ths_code
                    ) daily
                on info.ths_code = daily.ths_code
            ) tt
            where date_frm <= if(NULL<end_date, NULL, end_date) 
            order by ths_code""".format(json_indicator,
                                        json_param,
                                        table_name=table_name)
    else:
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name)
        sql_str = """
            SELECT ths_code, date_frm, 
                if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code"""

    # 计算每只股票需要获取日线数据的日期区间
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    # 设置 dtype
    dtype = {
        'ths_code': String(20),
        'key': String(80),
        'time': Date,
        'value': String(80),
        'param': String(80),
    }
    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_df['key'] = json_indicator
                data_df['param'] = json_param
                data_df.rename(columns={json_indicator: 'value'}, inplace=True)
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 4:
                break

            all_finished = True
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype)
            tot_data_count += data_count

        if not has_table:
            alter_table_2_myisam(engine_md, [table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL ,
                CHANGE COLUMN `time` `time` DATE NOT NULL ,
                CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
                CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
                ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format(
                table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

    return all_finished
Ejemplo n.º 18
0
def import_index_daily_ds(chain_param=None,
                          ths_code_set: set = None,
                          begin_time=None):
    """
    通过date_serise接口将历史数据保存到 ifind_index_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_index_daily_ds'
    has_table = engine_md.has_table(table_name)
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_DS],
        sep=';')
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_index_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code""".format(table_name=table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code;"""
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围' % table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)

        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_to is not None and date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            end_time = date_2_str(end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS)
            tot_data_count += data_count

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
Ejemplo n.º 19
0
def import_index_daily_his(chain_param=None,
                           ths_code_set: set = None,
                           begin_time=None):
    """
    通过history接口将历史数据保存到 ifind_index_daily_his
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期
    :return:
    """
    table_name = 'ifind_index_daily_his'
    if begin_time is not None and type(begin_time) == date:
        begin_time = str_2_date(begin_time)
    # THS_HistoryQuotes('600006.SH,600010.SH',
    # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf',
    # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
    # '2018-06-30','2018-07-30')
    json_indicator, _ = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_index_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code;"""
    else:
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name)
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code"""

    # 计算每只股票需要获取日线数据的日期区间
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_to is not None and date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code, json_indicator,
                'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Ejemplo n.º 20
0
def import_stock_hk_info(chain_param=None, ths_code=None, refresh=False):
    """
    导入 info 表
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code:
    :param refresh:
    :return:
    """
    table_name = 'ifind_stock_hk_info'
    logging.info("更新 %s 开始", table_name)
    if ths_code is None:
        # 获取全市场港股代码及名称
        if refresh:
            date_fetch = datetime.strptime('1991-02-01',
                                           STR_FORMAT_DATE).date()
        else:
            date_fetch = date.today()

        date_end = date.today()
        stock_hk_code_set = set()
        while date_fetch < date_end:
            stock_hk_code_set_sub = get_stock_hk_code_set(date_fetch)
            if stock_hk_code_set_sub is not None:
                stock_hk_code_set |= stock_hk_code_set_sub
            date_fetch += timedelta(days=365)

        stock_hk_code_set_sub = get_stock_hk_code_set(date_end)
        if stock_hk_code_set_sub is not None:
            stock_hk_code_set |= stock_hk_code_set_sub

        if DEBUG:
            stock_hk_code_set = list(stock_hk_code_set)[:10]

        ths_code = ','.join(stock_hk_code_set)

    indicator_param_list = [
        ('ths_stock_short_name_hks', '', String(40)),
        ('ths_stock_code_hks', '', String(20)),
        ('ths_isin_code_hks', '', String(40)),
        ('ths_corp_ashare_short_name_hks', '', String(10)),
        ('ths_corp_ashare_code_hks', '', String(60)),
        ('ths_stock_varieties_hks', '', String(40)),
        ('ths_ipo_date_hks', '', Date),
        ('ths_listed_exchange_hks', '', String(60)),
        ('ths_stop_listing_date_hks', '', Date),
        ('ths_corp_cn_name_hks', '', String(120)),
        ('ths_corp_name_en_hks', '', String(120)),
        ('ths_established_date_hks', '', Date),
        ('ths_accounting_date_hks', '', String(20)),
        ('ths_general_manager_hks', '', String(40)),
        ('ths_secretary_hks', '', String(40)),
        ('ths_operating_scope_hks', '', Text),
        ('ths_mo_product_name_hks', '', String(200)),
        ('ths_district_hks', '', String(60)),
        ('ths_reg_address_hks', '', String(200)),
        ('ths_office_address_hks', '', String(200)),
        ('ths_corp_tel_hks', '', String(200)),
        ('ths_corp_fax_hks', '', String(200)),
        ('ths_corp_website_hks', '', String(200)),
        ('ths_auditor_hks', '', String(60)),
        ('ths_legal_counsel_hks', '', String(300)),
        ('ths_hs_industry_hks', '', String(40)),
    ]
    # jsonIndicator='ths_stock_short_name_hks;ths_stock_code_hks;ths_thscode_hks;ths_isin_code_hks;ths_corp_ashare_short_name_hks;ths_corp_ashare_code_hks;ths_stock_varieties_hks;ths_ipo_date_hks;ths_listed_exchange_hks;ths_stop_listing_date_hks;ths_corp_cn_name_hks;ths_corp_name_en_hks;ths_established_date_hks;ths_accounting_date_hks;ths_general_manager_hks;ths_secretary_hks;ths_operating_scope_hks;ths_mo_product_name_hks;ths_district_hks;ths_reg_address_hks;ths_office_address_hks;ths_corp_tel_hks;ths_corp_fax_hks;ths_corp_website_hks;ths_auditor_hks;ths_legal_counsel_hks;ths_hs_industry_hks'
    # jsonparam=';;;;;;;;;;;'
    indicator, param = unzip_join([(key, val)
                                   for key, val, _ in indicator_param_list],
                                  sep=';')
    param += '100'
    data_df = invoker.THS_BasicData(ths_code, indicator, param)
    if data_df is None or data_df.shape[0] == 0:
        logging.info("没有可用的 stock_hk info 可以更新")
        return
    # 删除历史数据,更新数据
    has_table = engine_md.has_table(table_name)
    if has_table:
        with with_db_session(engine_md) as session:
            session.execute(
                "DELETE FROM {table_name} WHERE ths_code IN (".format(
                    table_name=table_name) + ','.join(
                        [':code%d' % n
                         for n in range(len(stock_hk_code_set))]) + ")",
                params={
                    'code%d' % n: val
                    for n, val in enumerate(stock_hk_code_set)
                })
            session.commit()
    dtype = {key: val for key, _, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    # data_count = data_df.shape[0]
    # data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)
Ejemplo n.º 21
0
def merge_stock_daily(date_from=None):
    """
    合并 wind,ifind 数据到对应名称的表中
    :param date_from:
    :return:
    """
    table_name = 'stock_daily'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(trade_date),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name)
    ifind_table_his_name = 'ifind_{table_name}_his'.format(
        table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    if date_from is None:
        ifind_his_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_ds_name)
        ifind_ds_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_his_name)
        wind_sql_str = "select * from {table_name}".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md)  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md)  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str,
                              engine_md)  # , index_col='wind_code'
    else:
        ifind_his_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_ds_name)
        ifind_ds_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_his_name)
        wind_sql_str = "select * from {table_name} where trade_date >= %s".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md,
                                   params=[date_from
                                           ])  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md,
                                  params=[date_from])  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str, engine_md,
                              params=[date_from])  # , index_col='wind_code'

    ifind_df = pd.merge(ifind_his_df,
                        ifind_ds_df,
                        how='outer',
                        on=['ths_code', 'time'])
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on=['ths_code', 'time'],
                         right_on=['wind_code', 'trade_date'],
                         indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'trade_date': (Date, prefer_left, {
            'left_key': 'time',
            'right_key': 'trade_date'
        }),
        'open': (DOUBLE, mean_value, {
            'left_key': 'open_x',
            'right_key': 'open_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'high': (DOUBLE, mean_value, {
            'left_key': 'high_x',
            'right_key': 'high_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'low': (DOUBLE, mean_value, {
            'left_key': 'low_x',
            'right_key': 'low_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        # TODO: 原因不详,wind接口取到的部分 close 数据不准确
        'close': (DOUBLE, prefer_left, {
            'left_key': 'close_x',
            'right_key': 'close_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'volume': (DOUBLE, mean_value, {
            'left_key': 'volume_x',
            'right_key': 'volume_y',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        'amount': (DOUBLE, mean_value, {
            'left_key': 'amount',
            'right_key': 'amt',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        # 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准
        # 因此出现冲突时应该以 wind 为准
        'total_shares': (DOUBLE, prefer_right, {
            'left_key': 'totalShares',
            'right_key': 'total_shares'
        }),
        # 'susp_days': (Integer, '***', {
        #     'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status',
        #  'primary_keys': ('ths_code', 'time')}),
        'max_up_or_down': (Integer, max_up_or_down, {
            'ths_key': 'ths_up_and_down_status_stock',
            'wind_key': 'maxupordown',
            'primary_keys': ('ths_code', 'time')
        }),
        'total_capital': (DOUBLE, get_value, {
            'key': 'totalCapital'
        }),
        'float_capital': (DOUBLE, get_value, {
            'key': 'floatCapitalOfAShares'
        }),
        'pct_chg': (DOUBLE, mean_value, {
            'left_key': 'changeRatio',
            'right_key': 'pct_chg',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'float_a_shares': (DOUBLE, get_value, {
            'key': 'floatSharesOfAShares'
        }),  # 对应wind float_a_shares
        'free_float_shares': (DOUBLE, get_value, {
            'key': 'free_float_shares'
        }),  # 对应 ths ths_free_float_shares_stock
        # PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准
        # 因此应该如有不同应该以 wind 为准
        'pe_ttm': (DOUBLE, prefer_right, {
            'left_key': 'ths_pe_ttm_stock',
            'right_key': 'pe_ttm',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'pe': (DOUBLE, get_value, {
            'key': 'pe'
        }),
        'pb': (DOUBLE, get_value, {
            'key': 'pb'
        }),
        'ps': (DOUBLE, get_value, {
            'key': 'ps'
        }),
        'pcf': (DOUBLE, get_value, {
            'key': 'pcf'
        }),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df
Ejemplo n.º 22
0
def import_stock_hk_daily_ds(chain_param=None,
                             ths_code_set: set = None,
                             begin_time=None):
    """
    通过date_serise接口将历史数据保存到 ifind_stock_hk_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_stock_hk_daily_ds'
    info_table_name = 'ifind_stock_hk_info'
    # jsonIndicator='ths_pre_close_stock;ths_open_price_stock;ths_high_price_stock;ths_low_stock;ths_close_price_stock;ths_chg_ratio_stock;ths_chg_stock;ths_vol_stock;ths_trans_num_stock;ths_amt_stock;ths_turnover_ratio_stock;ths_vaild_turnover_stock;ths_af_stock;ths_up_and_down_status_stock;ths_trading_status_stock;ths_suspen_reason_stock;ths_last_td_date_stock'
    # jsonparam='100;100;100;100;100;;100;100;;;;;;;;;'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_DAILY_DS],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)

        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 0:
                break

            # 大于阀值有开始插入
            if data_count >= 2000:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                 engine_md,
                                                 DTYPE_STOCK_HK_DAILY_DS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md,
                                             DTYPE_STOCK_HK_DAILY_DS)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Ejemplo n.º 23
0
def merge_ifind_stock_daily(ths_code_set: set = None, date_from=None):
    """将ds his 以及财务数据合并为 daily 数据"""
    table_name = 'ifind_stock_daily'
    logging.info("合成 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(`time`),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    # 獲取各個表格數據
    ifind_his_df = get_ifind_daily_df('ifind_stock_daily_his', date_from)
    ifind_ds_df = get_ifind_daily_df('ifind_stock_daily_ds', date_from)
    ifind_report_date_df = get_ifind_report_date_df('ifind_stock_report_date',
                                                    None)
    ifind_fin_df = get_ifind_daily_df('ifind_stock_fin', None)
    ifind_fin_df_g = ifind_fin_df.groupby('ths_code')
    ths_code_set_4_daily = set(ifind_fin_df_g.size().index)
    # 合并 ds his 数据
    ifind_his_ds_df = pd.merge(ifind_his_df,
                               ifind_ds_df,
                               how='outer',
                               on=['ths_code', 'time'])  # 拼接後續有nan,無數據
    ifind_his_ds_df_g = ifind_his_ds_df.groupby('ths_code')
    logger.debug("提取数据完成")
    # 计算 财报披露时间
    report_date_dic_dic = {}
    for report_date_g in [
            ifind_report_date_df.groupby(
                ['ths_code', 'ths_regular_report_actual_dd_stock'])
    ]:
        for num, ((ths_code, report_date), data_df) in enumerate(report_date_g,
                                                                 start=1):
            if ths_code_set is not None and ths_code not in ths_code_set:
                continue
            if is_nan_or_none(report_date):
                continue
            report_date_dic = report_date_dic_dic.setdefault(ths_code, {})
            if ths_code not in ths_code_set_4_daily:
                logger.error('fin 表中不存在 %s 的財務數據', ths_code)
                continue
            ifind_fin_df_temp = ifind_fin_df_g.get_group(ths_code)
            if report_date not in report_date_dic_dic:
                ifind_fin_df_temp = ifind_fin_df_temp[
                    ifind_fin_df_temp['time'] <= report_date]
                if ifind_fin_df_temp.shape[0] > 0:
                    report_date_dic[
                        report_date] = ifind_fin_df_temp.sort_values(
                            'time').iloc[0]

    # # 设置 dtype
    dtype = {'report_date': Date}
    for dic in [
            DTYPE_STOCK_DAILY_DS, DTYPE_STOCK_REPORT_DATE,
            DTYPE_STOCK_DAILY_FIN, DTYPE_STOCK_DAILY_HIS
    ]:
        for key, val in dic.items():
            dtype[key] = val

    logger.debug("计算财报日期完成")
    # 整理 data_df 数据
    tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len(
        report_date_dic_dic)
    try:
        for num, (ths_code,
                  report_date_dic) in enumerate(report_date_dic_dic.items(),
                                                start=1):  # key:ths_code
            # TODO: 檢查判斷 ths_code 是否存在在ifind_fin_df_g 裏面,,size暫時使用  以後在驚醒改進
            if ths_code not in ifind_his_ds_df_g.size():
                logger.error('fin 表中不存在 %s 的財務數據', ths_code)
                continue
            # open low  等 is NAN 2438
            ifind_his_ds_df_cur_ths_code = ifind_his_ds_df_g.get_group(
                ths_code)  # shape[1] 30
            logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ths_code,
                         ifind_his_ds_df_cur_ths_code.shape[0])
            report_date_list = list(report_date_dic.keys())
            report_date_list.sort()
            for report_date_from, report_date_to in iter_2_range(
                    report_date_list):
                logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count,
                             ths_code, date_2_str(report_date_from),
                             date_2_str(report_date_to))
                # 计算有效的日期范围
                if report_date_from is None:
                    is_fit = ifind_his_ds_df_cur_ths_code[
                        'time'] < report_date_to
                elif report_date_to is None:
                    is_fit = ifind_his_ds_df_cur_ths_code[
                        'time'] >= report_date_from
                else:
                    is_fit = (ifind_his_ds_df_cur_ths_code['time'] <
                              report_date_to) & (
                                  ifind_his_ds_df_cur_ths_code['time'] >=
                                  report_date_from)
                # 获取日期范围内的数据
                ifind_his_ds_df_segment = ifind_his_ds_df_cur_ths_code[
                    is_fit].copy()
                segment_count = ifind_his_ds_df_segment.shape[0]
                if segment_count == 0:
                    continue
                fin_s = report_date_dic[
                    report_date_from] if report_date_from is not None else None
                for key in DTYPE_STOCK_DAILY_FIN.keys():
                    if key in ('ths_code', 'time'):
                        continue
                    ifind_his_ds_df_segment[key] = fin_s[
                        key] if fin_s is not None and key in fin_s else None
                ifind_his_ds_df_segment['report_date'] = report_date_from
                # 添加数据到列表
                data_df_list.append(ifind_his_ds_df_segment)
                data_count += segment_count

            if DEBUG and len(data_df_list) > 1:
                break

            # 保存数据库
            if data_count > 10000:
                # 保存到数据库
                data_df = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_count, data_df_list = 0, []

    finally:
        # 保存到数据库
        if len(data_df_list) > 0:
            data_df = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md, dtype)
            tot_data_count += data_count

        logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Ejemplo n.º 24
0
def import_stock_hk_report_date(chain_param=None,
                                ths_code_set: set = None,
                                begin_time=None,
                                interval='Q'):
    """
    通过date_serise接口将历史财务数据保存到 ifind_stock_fin,国内财务数据按季度发布,因此获取周期为季度(默认)
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :param interval: Q 季度 M 月 W 周 D 日
    :return:
    """
    table_name = 'ifind_stock_hk_report_date'
    info_table_name = 'ifind_stock_hk_info'
    has_table = engine_md.has_table(table_name)
    # jsonIndicator='ths_perf_briefing_fore_dsclsr_date_hks;ths_perf_brief_actual_dd_hks;ths_perf_report_foredsclsr_date_hks;ths_perf_report_actual_dd_hks'
    # jsonparam=';'
    json_indicator, json_param = unzip_join(
        [(key, val)
         for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_REPORT_DATE],
        sep=';')
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                "Days:Tradedays,Fill:Previous,Interval:{interval}".format(
                    interval=interval), begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md,
                    DTYPE_STOCK_HK_REPORT_DATE)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE)
            tot_data_count += data_count

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
Ejemplo n.º 25
0
def import_coin_daily(chain_param=None, id_set=None, begin_time=None):
    """插入历史数据到 cmc_coin_v1_daily 试用 v1 接口,该接口可能在2018年12月底到期"""
    table_name = "cmc_coin_v1_daily"
    info_table_name = "cmc_coin_v1_info"
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.id, symbol, ifnull(trade_date,date('2013-04-28')) date_frm, null delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   {info_table_name} info 
               LEFT OUTER JOIN
                   (SELECT id, adddate(max(date),1) trade_date FROM {table_name} GROUP BY id) daily
               ON info.id = daily.id
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY id""".format(table_name=table_name,
                                 info_table_name=info_table_name)
    else:
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
        sql_str = """
           SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT id, symbol, null date_frm, null delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM {info_table_name} info 
           ) tt
           ORDER BY id""".format(info_table_name=info_table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        stock_date_dic = {
            (coin_id, symbol):
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for coin_id, symbol, date_from, date_to in table.fetchall()
            if id_set is None or coin_id in id_set
        }
    # 设置 dtype
    dtype = {
        'id': String(60),
        'date': Date,
        'open': DOUBLE,
        'high': DOUBLE,
        'low': DOUBLE,
        'close': DOUBLE,
        'volume': DOUBLE,
        'market_cap': DOUBLE,
    }
    col_names = dtype.keys()
    data_df_list = []
    dic_count = len(stock_date_dic)
    data_count = 0
    # 获取接口数据
    logger.info('%d coins will been import into %s', dic_count, table_name)
    try:
        for data_num, ((coin_id, symbol),
                       (date_from,
                        date_to)) in enumerate(stock_date_dic.items(),
                                               start=1):
            logger.debug('%d/%d) %s[%s] [%s - %s]', data_num, dic_count,
                         coin_id, symbol, date_from, date_to)
            date_from_str = None
            try:
                if date_from is None:
                    scraper = CmcScraperV1(symbol, coin_id)
                else:
                    date_from_str = date_2_str(
                        str_2_date(date_from, DATE_FORMAT_STR),
                        DATE_FORMAT_STR_CMC)
                    scraper = CmcScraperV1(symbol,
                                           coin_id,
                                           start_date=date_from_str)
                data_df = scraper.get_dataframe()
            except Exception as exp:
                logger.exception("scraper('%s', '%s', start_date='%s')",
                                 symbol, coin_id, date_from_str)
                continue

            if data_df is None or data_df.shape[0] == 0:
                logger.warning('%d/%d) %s has no data during %s %s', data_num,
                               dic_count, coin_id, date_from, date_to)
                continue
            data_df.rename(columns={
                col_name: rename_by_dic(col_name, col_names)
                for col_name in data_df.columns
            },
                           inplace=True)
            data_df.rename(columns={'market cap': 'market_cap'}, inplace=True)
            data_df['market_cap'] = data_df['market_cap'].apply(
                lambda x: 0 if isinstance(x, str) else x)
            data_df['volume'] = data_df['volume'].apply(
                lambda x: 0 if isinstance(x, str) else x)
            logger.info('%d/%d) %d data of %s between %s and %s', data_num,
                        dic_count, data_df.shape[0], coin_id,
                        data_df['date'].min(), data_df['date'].max())
            data_df['id'] = coin_id
            data_df_list.append(data_df)
            data_count += data_df.shape[0]
            # 仅供调试使用
            if DEBUG and len(data_df_list) > 10:
                break

            if data_count > 10000:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(data_df_all,
                                                              table_name,
                                                              engine_md,
                                                              dtype=dtype)
                logging.info("%s %d 条信息被更新", table_name, data_count)
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库 创建
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df_all,
                                                          table_name,
                                                          engine_md,
                                                          dtype=dtype)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
            CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
            CHANGE COLUMN `date` `date` DATE NOT NULL AFTER `id`,
            ADD PRIMARY KEY (`id`, `date`)""".format(table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
Ejemplo n.º 26
0
def import_stock_hk_fin_quarterly(chain_param=None,
                                  ths_code_set: set = None,
                                  begin_time=None):
    """
    通过date_serise接口将历史数据保存到 import_stock_hk_fin
    该数据作为 为季度获取
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_stock_hk_fin'
    info_table_name = 'ifind_stock_hk_info'
    # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks
    # jsonparam='2013,100,OC;2013,100,OC;OC,101'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:Q', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 0:
                break

            # 大于阀值有开始插入
            if data_count >= 2000:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                 engine_md, DTYPE_STOCK_HK_FIN)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md, DTYPE_STOCK_HK_FIN)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Ejemplo n.º 27
0
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_daily_md'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
            (
            SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_stock_index_basic info 
            LEFT OUTER JOIN
                (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily
            ON info.ts_code = daily.ts_code
            ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, base_date date_frm, exp_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_index_basic info 
              ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into tushare_stock_index_daily_md',
                data_len)
    # 将data_df数据,添加到data_df_list
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_index_daily(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # data_df = df
            if len(data_df) > 0:
                while try_2_date(data_df['trade_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        data_df['trade_date'].iloc[-1]), None
                    df2 = invoke_index_daily(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(data_df['trade_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2 > 0):
                        last_date_in_df_cur = try_2_date(
                            df2['trade_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            # df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning(
                                '%d/%d) %s has no data during %s %s', num,
                                data_len, ts_code, date_from, date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    else:
                        break

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break

            # 大于阀值有开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md,
                    DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
                all_data_count += data_count
                data_df_list, data_count = [], 0

                # # 数据插入数据库
                # data_df_all = data_df
                # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,
                #                                               DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
                # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                # data_df = []

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md,
                DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Ejemplo n.º 28
0
def import_stock_hk_fin_by_report_date_weekly(chain_param=None,
                                              ths_code_set: set = None,
                                              begin_time=None,
                                              refresh=False):
    """
    通过date_serise接口将历史数据保存到 import_stock_hk_fin
    该数据作为 为周度获取
    以财务报表发布日期为进准,[ 财务报表发布日-14天 ~ 财务报表发布日],周度获取财务数据
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :param refresh: 全部刷新
    :return:
    """
    table_name = 'ifind_stock_hk_fin'
    info_table_name = 'ifind_stock_hk_info'
    # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks
    # jsonparam='2013,100,OC;2013,100,OC;OC,101'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN],
        sep=';')
    has_table = engine_md.has_table(table_name)
    ths_code_report_date_str = """select distinct ths_code, subdate(report_date, 14), report_date from
        (
        select ths_code, ths_perf_brief_actual_dd_hks report_date from ifind_stock_hk_report_date
        union
        select ths_code, ths_perf_report_actual_dd_hks report_date from ifind_stock_hk_report_date
        ) tt
        where report_date is not null
        order by ths_code, report_date"""

    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取报告日-10天到报告日日期范围列表
        table = session.execute(ths_code_report_date_str)
        ths_code_report_date_range_list_dic, ths_code_report_date_range_list_dic_tmp = {}, {}
        for ths_code, date_from, date_to in table.fetchall():
            if ths_code_set is None or ths_code in ths_code_set:
                ths_code_report_date_range_list_dic_tmp.setdefault(
                    ths_code, []).append((date_from, date_to))

        # 获取每只股票需要获取日线数据的日期区间
        if not refresh:
            # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制
            table = session.execute(sql_str)
            code_date_range_dic = {
                ths_code: (date_from if begin_time is None else min(
                    [date_from, begin_time]), date_to)
                for ths_code, date_from, date_to in table.fetchall()
                if ths_code_set is None or ths_code in ths_code_set
            }

            if TRIAL:
                date_from_min = date.today() - timedelta(days=(365 * 5))
                # 试用账号只能获取近5年数据
                code_date_range_dic = {
                    ths_code: (max([date_from, date_from_min]), date_to)
                    for ths_code, (date_from,
                                   date_to) in code_date_range_dic.items()
                    if date_from_min <= date_to
                }
        else:
            code_date_range_dic = {}

    # 合并重叠的日期
    for ths_code, date_range_list in ths_code_report_date_range_list_dic_tmp.items(
    ):
        if not refresh and ths_code in code_date_range_dic:
            code_date_range = code_date_range_dic[ths_code]
        else:
            code_date_range = None

        # date_range_list 按照 起始日期 顺序排序,下层循环主要作用是将具有重叠日期的日期范围进行合并
        date_range_list_new, date_from_last, date_to_last = [], None, None
        for date_from, date_to in date_range_list:
            if code_date_range is not None:
                # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制
                if not refresh and (date_to < code_date_range[0]
                                    or code_date_range[1] < date_from):
                    continue

            if date_from_last is None:
                # 首次循环 设置 date_from_last
                date_from_last = date_from
            elif date_from < date_to_last:
                # 日期重叠,需要合并
                pass
            else:
                # 日期未重叠,保存 range
                date_range_list_new.append((date_from_last, date_to_last))
                date_from_last = date_from

            # 循环底部,设置 date_to_last
            date_to_last = date_to

        # 循环结束,保存 range
        if date_from_last is not None and date_to_last is not None:
            date_range_list_new.append((date_from_last, date_to_last))

        if len(date_range_list_new) > 0:
            ths_code_report_date_range_list_dic[ths_code] = date_range_list_new

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        ths_code_report_date_range_list_dic)
    try:
        for num, (ths_code, date_range_list) in enumerate(
                ths_code_report_date_range_list_dic.items(), start=1):
            for begin_time, end_time in date_range_list:
                logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                             begin_time, end_time)
                data_df = invoker.THS_DateSerial(
                    ths_code, json_indicator, json_param,
                    'Days:Tradedays,Fill:Previous,Interval:W', begin_time,
                    end_time)
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 仅调试使用
                if DEBUG and len(data_df_list) > 0:
                    break

                # 大于阀值有开始插入
                if data_count >= 2000:
                    tot_data_df = pd.concat(data_df_list)
                    # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                    bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                     engine_md,
                                                     DTYPE_STOCK_HK_FIN)
                    tot_data_count += data_count
                    data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md, DTYPE_STOCK_HK_FIN)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Ejemplo n.º 29
0
def import_future_daily_his(chain_param=None,
                            ths_code_set: set = None,
                            begin_time=None):
    """
    更新期货合约日级别行情信息
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_future_daily'
    info_table_name = 'ifind_future_info'
    logger.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    indicator_param_list = [
        ('preClose', String(20)),
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('volume', DOUBLE),
        ('amount', DOUBLE),
        ('avgPrice', DOUBLE),
        ('change', DOUBLE),
        ('changeRatio', DOUBLE),
        ('preSettlement', DOUBLE),
        ('settlement', DOUBLE),
        ('change_settlement', DOUBLE),
        ('chg_settlement', DOUBLE),
        ('openInterest', DOUBLE),
        ('positionChange', DOUBLE),
        ('amplitude', DOUBLE),
    ]
    json_indicator = ','.join([key for key, _ in indicator_param_list])
    if has_table:
        # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据
        # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全,
        # 如:AL0202.SHF AL9902.SHF CU0202.SHF
        # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理
        sql_str = """SELECT ths_code, date_frm, 
                if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM
            (
            SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi LEFT OUTER JOIN
                (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd
                ON fi.ths_code = wfd.ths_code
            ) tt
            WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) 
            AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM 
            (
            SELECT fi.ths_code, ths_start_trade_date_future date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi
            ) tt""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    # 设置 dtype
    dtype = {key: val for key, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    dtype['time'] = Date

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        logger.info("%d future instrument will be handled", code_count)
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code, json_indicator,
                'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0
                logging.info("%s 新增数据 %d 条", table_name, data_count)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype)
            tot_data_count += data_count
            logging.info("%s 新增数据 %d 条", table_name, data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_quertarly(chain_param=None, wind_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    logging.info("更新 wind_fina_indicator 开始")
    table_name = 'wind_fina_indicator'
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   wind_stock_info info 
               LEFT OUTER JOIN
                   (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) quertarly
               ON info.wind_code = quertarly.wind_code
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code;""".format(table_name=table_name)
    else:
        logger.warning('wind_fina_indicator 不存在,仅使用 wind_stock_info 表进行计算日期范围')
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ipo_date date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM wind_stock_info info 
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code"""
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        stock_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    # 获取股票量价等行情数据
    param_list = [
        ('roic_ttm', DOUBLE),
        ('yoyprofit', DOUBLE),
        ('ebit', DOUBLE),
        ('ebit2', DOUBLE),
        ('ebit2_ttm', DOUBLE),
        ('surpluscapitalps', DOUBLE),
        ('undistributedps', DOUBLE),
        ('stm_issuingdate', DOUBLE),
    ]

    # 获取参数列表
    wind_indictor_str = ",".join(key for key, _ in param_list)
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list = []
    logger.info('%d stocks will been import into wind_stock_quertarly',
                len(stock_date_dic))

    try:
        for stock_num, (wind_code,
                        (date_from,
                         date_to)) in enumerate(stock_date_dic.items()):
            # 获取股票量价等行情数据
            # w.wsd("002122.SZ", "roic_ttm,yoyprofit,ebit,ebit2,ebit2_ttm,surpluscapitalps,undistributedps,stm_issuingdate", "2012-12-31", "2017-12-06", "unit=1;rptType=1;Period=Q")
            data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                  date_to, "unit=1;rptType=1;Period=Q")
            if data_df is None:
                logger.warning('%d) %s has no data during %s %s', stock_num,
                               wind_code, date_from, date_to)
                continue
            data_df.rename(
                columns={c: str(c).lower()
                         for c in data_df.columns},
                inplace=True)
            # 清理掉期间全空的行
            for trade_date in list(data_df.index[:10]):
                is_all_none = data_df.loc[trade_date].apply(
                    lambda x: x is None).all()
                if is_all_none:
                    logger.warning("%s %s 数据全部为空", wind_code, trade_date)
                    data_df.drop(trade_date, inplace=True)
            logger.info('%d) %d data of %s between %s and %s', stock_num,
                        data_df.shape[0], wind_code, date_from, date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df_list.append(data_df)
            if DEBUG and len(data_df_list) > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            bunch_insert_on_duplicate_update(data_df_all,
                                             table_name,
                                             engine_md,
                                             dtype=dtype)
            logging.info("更新 wind_stock_quertarly 结束 %d 条信息被更新",
                         data_df_all.shape[0])
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])