Example #1
0
def import_trade_date():
    """
    增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据
    2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表
    日后将会考虑将两张表进行合并
    :return: 
    """
    table_name = 'wind_trade_date'
    has_table = engine_md.has_table(table_name)
    if has_table:
        with with_db_session(engine_md) as session:
            try:
                table = session.execute('SELECT exch_code,max(trade_date) FROM {table_name} GROUP BY exch_code'.format(
                    table_name=table_name))
                exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()}
            except Exception:
                logger.exception("交易日获取异常")
    else:
        exch_code_trade_date_dic = {}

    exchange_code_dict = {
        "HKEX": "香港",
        "NYSE": "纽约",
        "SZSE": "深圳",
        "TWSE": "台湾",
        "NASDAQ": "纳斯达克",
        "AMEX": "美国证券交易所",
        "TSE": "东京",
        "LSE": "伦敦",
        "SGX": "新加坡"
    }
    exchange_code_list = list(exchange_code_dict.keys())
    for exchange_code in exchange_code_list:
        if exchange_code in exch_code_trade_date_dic:
            trade_date_max = exch_code_trade_date_dic[exchange_code]
            trade_date_start = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE)
        else:
            trade_date_start = '1980-01-01'
        end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE)
        if exchange_code is None or exchange_code == "":
            trade_date_list = invoker.tdays(trade_date_start, end_date_str)
        else:
            trade_date_list = invoker.tdays(trade_date_start, end_date_str, "TradingCalendar=%s" % exchange_code)
        if trade_date_list is None:
            logger.warning("没有查询到交易日期")
        date_count = len(trade_date_list)
        if date_count > 0:
            logger.info("%d 条交易日数据将被导入", date_count)
            trade_date_df = pd.DataFrame({'trade_date': trade_date_list})
            trade_date_df['exch_code'] = exchange_code
            bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md,
                                             dtype={'trade_date': Date, 'exch_code': String(20)},
                                             myisam_if_create_table=True,
                                             primary_keys=['trade_date', 'exch_code'], schema=config.DB_SCHEMA_MD)
            logger.info('%s %d 条交易日数据导入 %s 完成', exchange_code, date_count, table_name)
def import_tushare_stock_info(chain_param=None, refresh=False):
    """ 获取全市场股票代码及名称
    """
    table_name = 'tushare_stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    wind_indicator_param_list = [
        ('ts_code', String(20)),
        ('symbol', DOUBLE),
        ('list_date', Date),
        ('delist_date', Date),
        ('name', String(30)),
        ('fullname', String(100)),
        ('enname', String(200)),
        ('exchange_id', String(30)),
        ('list_status', String(10)),
        ('is_hs', String(10)),
    ]
    #     # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng"
    param = ",".join([key for key, _ in wind_indicator_param_list])
    # 设置 dtype
    dtype = {key: val for key, val in wind_indicator_param_list}
    dtype['ts_code'] = String(20)

    # 数据提取

    stock_info_all_df = pro.stock_basic(
        exchange_id='',
        fields=
        'ts_code,symbol,name,fullname,enname,exchange_id,curr_type,list_date,list_status,delist_date,is_hs'
    )

    logging.info('%s stock data will be import', stock_info_all_df.shape[0])
    data_count = bunch_insert_on_duplicate_update(stock_info_all_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])
Example #3
0
    def insert_db(df: pd.DataFrame):
        nonlocal has_table
        insert_data_count = bunch_insert_on_duplicate_update(df,
                                                             table_name,
                                                             engine_md,
                                                             dtype=dtype)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
            has_table = True

        return insert_data_count
Example #4
0
def import_tushare_suspend(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_suspend'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_suspend

    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l
    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(suspend_date) FROM {table_name} ))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = pro.suspend(ts_code='', suspend_date=trade_date, resume_date='', fields='')
            if len(data_df) > 0:
                data_count = bunch_insert(data_df, table_name=table_name, dtype=DTYPE_TUSHARE_SUSPEND,
                                          primary_keys=['ts_code', 'suspend_date'])
                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新",
                             num, trade_date_list_len, trade_date, table_name, data_count)
            else:
                logging.info("%s 当日无停牌股票", trade_date_list_len)

    except:
        logger.exception('更新 %s 表异常', table_name)
Example #5
0
def import_tushare_daily_basic(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_daily_basic'
    primary_keys = ["ts_code", "trade_date"]
    logging.info("更新 %s 开始", table_name)
    check_sqlite_db_primary_keys(table_name, primary_keys)
    has_table = engine_md.has_table(table_name)
    # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!
    # 比如本表是 tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了
    if has_table:
        sql_str = """
               select cal_date            
               FROM
                (
                 select * from tushare_trade_date trddate 
                 where( cal_date>(SELECT max(trade_date) FROM {table_name} ))
               )tt
               where (is_open=1 
                      and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                      and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
               SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
            AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
            AND exchange='SSE') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())

    try:
        for_count = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_daily_basic(ts_code='', trade_date=trade_date)
            if data_df is not None and data_df.shape[0] > 0:
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_BASIC,
                    primary_keys=primary_keys)

                logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, for_count, trade_date, table_name, data_count)
            else:
                logging.info("%d/%d) %s 无数据信息可被更新",  num, for_count, trade_date)
    except:
        logger.exception("更新 %s 表异常", table_name)
def import_trade_date(chain_param=None):
    """
    增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据
    2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表
    日后将会考虑将两张表进行合并
    :return:
    """
    table_name = TABLE_NAME
    exch_code_trade_date_dic = {}
    has_table = engine_md.has_table(table_name)
    if has_table:
        with with_db_session(engine_md) as session:
            try:
                table = session.execute('SELECT exchange,max(cal_date) FROM {table_name} GROUP BY exchange'.format(
                    table_name=table_name
                ))
                exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()}
            except Exception as exp:
                logger.exception("交易日获取异常")

    exchange_code_dict = {
        "HKEX": "香港联合交易所",
        "SZSE": "深圳证券交易所",
        "SSE": "上海证券交易所",
    }
    exchange_code_list = list(exchange_code_dict.keys())
    for exchange_code in exchange_code_list:
        if exchange_code in exch_code_trade_date_dic:
            trade_date_max = exch_code_trade_date_dic[exchange_code]
            start_date_str = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE_TS)
        else:
            start_date_str = '19900101'

        end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE_TS)
        trade_date_df = pro.trade_cal(exchange_id='', start_date=start_date_str, end_date=end_date_str)
        if trade_date_df is None or trade_date_df.shape[0] == 0:
            logger.warning('%s[%s] [%s - %s] 没有查询到交易日期',
                           exchange_code_dict[exchange_code], exchange_code, start_date_str, end_date_str)
            continue
        date_count = trade_date_df.shape[0]
        logger.info("%s[%s] %d 条交易日数据将被导入 %s",
                    exchange_code_dict[exchange_code], exchange_code, date_count, table_name)
        date_count = bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={
            'exchange': String(10),
            'cal_date': Date,
            'is_open': DOUBLE,
        }, myisam_if_create_table=True, primary_keys=['exchange', 'cal_date'], schema=config.DB_SCHEMA_MD)
        logger.info('%s[%s] %d 条交易日数据导入 %s 完成',
                    exchange_code_dict[exchange_code], exchange_code, date_count, table_name)
Example #7
0
def import_tushare_fund_portfolio(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_fund_portfolio'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    # if has_table is True:
    #     sql_str=""""""
    # else:
    sql_str = """SELECT ts_code FROM md_integration.tushare_fund_basic"""

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        code_list = list(row[0] for row in table.fetchall())
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_list)
    try:
        for num, (ts_code) in enumerate(code_list, start=1):
            data_df = invoke_fund_portfolio(ts_code=ts_code)
            logging.info(" 提取%s 基金行情信息%d 条,整体完成%d/%d", ts_code, len(data_df),
                         num, data_len)
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 20000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name,
                                                 engine_md,
                                                 DTYPE_TUSHARE_FUND_PORTFOLIO)
                logging.info(" %s 表 %d 条净值信息被更新", table_name, data_count)
                all_data_count += data_count
                data_df_list, data_count = [], 0
    finally:
        #插入剩余数据
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md,
                DTYPE_TUSHARE_FUND_PORTFOLIO)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
Example #8
0
def merge_tick_2_bar():
    """
    将tick数据合并成为1分钟数据
    :return: 
    """ ""
    table_name = 'pytdx_stock_min1'
    tick_table_name = 'pytdx_tick_temp'
    has_table = engine_md.has_table(table_name)
    sql_create_table = f"""CREATE TABLE `{table_name}` (
          `ts_code` varchar(12) NOT NULL,
          `trade_date` datetime DEFAULT NULL,
          `date` date NOT NULL,
          `time` time  NOT NULL,
          `open` DOUBLE DEFAULT NULL,
          `high` DOUBLE DEFAULT NULL,
          `low` DOUBLE DEFAULT NULL,
          `close` DOUBLE DEFAULT NULL,
          `vol` int(11) DEFAULT NULL,
          `amount` DOUBLE DEFAULT NULL,
          PRIMARY KEY (`ts_code`,`date`,`time`)
        ) ENGINE=MyISAM DEFAULT CHARSET=utf8"""
    sql_str = f"""insert ignore {table_name}
        select bar.ts_code, bar.trade_date, bar.date, bar.time, 
            tick_open.price open, bar.high, bar.low, tick_close.price close, bar.vol, bar.amount
        from 
        (
            select ts_code, trade_date, time, date, min(`index`) index_min, max(`index`) index_max, 
                max(price) high, min(price) low, sum(vol) vol, sum(price*vol) amount 
            from {tick_table_name} group by ts_code, date, time
        ) bar
        left join {tick_table_name} tick_open
            on bar.ts_code = tick_open.ts_code
            and bar.date = tick_open.date
            and bar.index_min = tick_open.index
        left join {tick_table_name} tick_close
            on bar.ts_code = tick_close.ts_code
            and bar.date = tick_close.date
            and bar.index_max = tick_close.index"""
    with with_db_session(engine_md) as session:
        # 创建表
        if not has_table:
            session.execute(sql_create_table)
            logger.info('创建表 %s ', table_name)
        # tick 数据合并成 min1 数据
        rslt = session.execute(sql_str)
        insert_count = rslt.rowcount
        logger.info('合并分钟数据 %d 条', insert_count)
    def save(self):
        self.logger.info("更新 %s 开始", self.table_name)
        has_table = engine_md.has_table(self.table_name)
        # 判断表是否已经存在
        if has_table:
            with with_db_session(engine_md) as session:
                sql_str = f"""select trade_date from jq_trade_date where trade_date>(select max(day) from {self.table_name}) order by trade_date"""
                table = session.execute(sql_str,
                                        params={"trade_date": self.BASE_DATE})
                trade_date_list = [_[0] for _ in table.fetchall()]
            date_start = execute_scalar(sql_str, engine_md)
            self.logger.info('查询 %s 数据使用起始日期 %s', self.table_name,
                             date_2_str(date_start))
        else:
            with with_db_session(engine_md) as session:
                sql_str = "select trade_date from jq_trade_date where trade_date>=:trade_date order by trade_date"
                table = session.execute(sql_str,
                                        params={"trade_date": self.BASE_DATE})
                trade_date_list = [_[0] for _ in table.fetchall()]
            self.logger.warning('%s 不存在,使用基础日期 %s', self.table_name,
                                self.BASE_DATE)

        # 查询最新的
        trade_date_list.sort()
        data_count_tot, for_count = 0, len(trade_date_list)
        try:
            for num, trade_date in enumerate(trade_date_list):
                q = query(self.statement)
                df = get_fundamentals(q, date=date_2_str(trade_date))
                if df is None or df.shape[0] == 0:
                    continue
                logger.debug('%d/%d) %s 包含 %d 条数据', num, for_count, trade_date,
                             df.shape[0])
                data_count = bunch_insert_on_duplicate_update(
                    df,
                    self.table_name,
                    engine_md,
                    dtype=self.dtype,
                    myisam_if_create_table=True,
                    primary_keys=['id'],
                    schema=config.DB_SCHEMA_MD)
                data_count_tot += data_count
        except:
            logger.exception("更新 %s 异常", self.table_name)
        finally:
            # 导入数据库
            logging.info("更新 %s 结束 %d 条信息被更新", self.table_name, data_count_tot)
def merge_tushare_daily(date_from=None):
    """
    合並 adj_factor,daily_basic,stock,suspend 表格數據
    :param date_from:
    :return:
    """
    table_name = 'tushare_stock_daily'
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(trade_date),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    tushare_adj_factor_df = get_tushare_daily_df(
        'tushare_stock_daily_adj_factor', date_from)
    tushare_daily_basic_df = get_tushare_daily_df('tushare_stock_daily_basic',
                                                  date_from)
    tushare_stock_daily_md_df = get_tushare_daily_df('tushare_stock_daily_md',
                                                     date_from)
    # tushare_stock_daily_suspend_df = get_tushare_daily_df('tushare_stock_daily_suspend', date_from)
    tushare_daily_two_form_df = pd.merge(tushare_adj_factor_df,
                                         tushare_daily_basic_df,
                                         how='outer',
                                         on=['ts_code', 'trade_date'])
    tushare_daily_df = pd.merge(tushare_daily_two_form_df,
                                tushare_stock_daily_md_df,
                                how='outer',
                                on=['ts_code', 'trade_date'])
    # tushare_daily_df = pd.merge(tushare_daily_three_form_df, tushare_stock_daily_suspend_df, how='outer',
    #                             on=['ts_code'])
    # 设置 dtype
    dtype = {}
    for dic in [
            DTYPE_TUSHARE_SUSPEND, DTYPE_TUSHARE_STOCK_DAILY_BASIC,
            DTYPE_TUSHARE_STOCK_DAILY_MD
    ]:
        for key, val in dic.items():
            dtype[key] = val
    tushare_daily_df["close"] = tushare_daily_df.apply(get_merge, axis=1)
    tushare_daily_df.drop(['close_x', 'close_y'], axis=1, inplace=True)
    data_count = bunch_insert_on_duplicate_update(tushare_daily_df, table_name,
                                                  engine_md, dtype)
    logging.info('%s  更新 %d', table_name, data_count)
    # if not has_table and engine_md.has_table(table_name):
    #     alter_table_2_myisam(engine_md, table_name)
    #     build_primary_key([table_name])
    return tushare_daily_df
Example #11
0
def import_tushare_stock_info(chain_param=None, refresh=False):
    """ 获取全市场股票代码及名称
    """
    table_name = 'tushare_stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    tushare_indicator_param_list = [
        ('ts_code', String(20)),
        ('symbol', String(20)),
        ('name', String(40)),
        ('area', String(100)),
        ('industry', String(200)),
        ('fullname', String(100)),
        ('enname', String(200)),
        ('market', String(100)),
        ('exchange', String(20)),
        ('curr_type', String(20)),
        ('list_status', String(20)),
        ('list_date', Date),
        ('delist_date', Date),
        ('is_hs', String(20)),
    ]
    #     # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng"
    param = ",".join([key for key, _ in tushare_indicator_param_list])
    # 设置 dtype
    dtype = {key: val for key, val in tushare_indicator_param_list}
    dtype['ts_code'] = String(20)

    # 数据提取

    stock_info_all_df = pro.stock_basic(
        exchange='',
        fields=
        'ts_code,symbol,name,area,industry,fullname,enname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs,is_hs,is_hs'
    )

    logging.info('%s stock data will be import', stock_info_all_df.shape[0])
    data_count = bunch_insert_on_duplicate_update(stock_info_all_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype,
                                                  myisam_if_create_table=True,
                                                  primary_keys=['ts_code'],
                                                  schema=config.DB_SCHEMA_MD)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
Example #12
0
def import_tushare_fund_basic(chain_param=None, ):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_fund_basic'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily

    for market in list(['E','O']):
        data_df = pro.fund_basic(market=market)
        if len(data_df) > 0:
            data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR)
            logging.info(" %s 表 %d 条基金信息被更新", table_name,  data_count)
        else:
            logging.info("无数据信息可被更新")
Example #13
0
def get_latest_constituent_df(sector_code):
    """
    获取最新的交易日日期,及相应的成分股数据
    :return:
    """
    table_name = "wind_sectorconstituent"
    has_table = engine_md.has_table(table_name)
    if not has_table:
        return None, None

    sql_str = 'SELECT max(trade_date) FROM wind_sectorconstituent WHERE sector_code = :sector_code'

    with with_db_session(engine_md) as session:
        date_latest = session.execute(sql_str, params={"sector_code": sector_code}).scalar()
        if date_latest is None:
            return None, None

    sql_str = "SELECT * FROM wind_sectorconstituent WHERE sector_code = %s AND trade_date = %s"
    sec_df = pd.read_sql(sql_str, engine_md, params=[sector_code, date_latest])
    return date_latest, sec_df
Example #14
0
def import_tushare_stock_company(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_company'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily

    data_df = pro.stock_company()
    for i in  range(len(data_df['setup_date'])):
        if data_df['setup_date'][i] is not None and len(data_df['setup_date'][i])<8 :
            data_df['setup_date'][i]=np.nan
    if len(data_df) > 0:
        data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_COMPANY)
        logging.info(" %s 表 %d 条上市公司基本信息被更新", table_name,  data_count)
    else:
        logging.info("无数据信息可被更新")
def get_latest_constituent_df(index_code):
    """
    获取最新的交易日日期,及相应的成分股数据
    :return:
    """
    table_name = "wind_index_constituent"
    has_table = engine_md.has_table(table_name)
    if not has_table:
        return None, None
    with with_db_session(engine_md) as session:
        date_latest = session.execute(
            "select max(trade_date) from wind_index_constituent where index_code = :index_code",
            params={
                "index_code": index_code
            }).scalar()
        if date_latest is None:
            return None, None

    sql_str = "select * from wind_index_constituent where index_code = %s and trade_date = %s"
    sec_df = pd.read_sql(sql_str, engine_md, params=[index_code, date_latest])
    return date_latest, sec_df
def import_wind_fund_nav_to_nav():
    """
    将 wind_fund_nav 数据导入到 fund_nav 表中
    :return:
    """
    table_name = 'fund_nav'
    has_table = engine_md.has_table(table_name)
    logger.info("开始将 wind_fund_nav_daily 数据导入到 fund_nav_tmp_df")

    create_sql_str = """CREATE TABLE {table_name} (
          `wind_code` varchar(20) NOT NULL COMMENT '基金代码',
          `nav_date` date NOT NULL COMMENT '净值日期',
          `nav` double DEFAULT NULL COMMENT '净值',
          `nav_acc` double DEFAULT NULL COMMENT '累计净值',
          PRIMARY KEY (`wind_code`,`nav_date`)
        ) ENGINE=MyISAM DEFAULT CHARSET=utf8""".format(table_name=table_name)

    # TODO: 需要对 Group by 子句进行跳转
    sql_str = """INSERT INTO fund_nav(wind_code, nav_date, nav, nav_acc)
    SELECT wfn.wind_code, wfn.nav_date, wfn.nav, wfn.nav_acc
    FROM
    (
        SELECT wind_code, nav_date, max(nav) nav, max(nav_acc) nav_acc
        FROM wind_fund_nav_daily
        GROUP BY wind_code, nav_date
    ) AS wfn
    LEFT OUTER JOIN
        fund_nav fn
    ON 
        wfn.wind_code = fn.wind_code AND 
        wfn.nav_date = fn.nav_date
    WHERE fn.nav_date IS NULL"""
    with with_db_session(engine_md) as session:
        if not has_table:
            session.execute(create_sql_str)
            logger.info("创建 %s 表", table_name)

        session.execute(sql_str)
        logger.info('导入结束')
Example #17
0
def fresh_tushare_stock_fund_holdings(year, quarter):
    table_name = 'tushare_stock_fund_holdings'
    logging.info("更新 %s 表%s年%s季度基金持股信息开始", table_name, year, quarter)
    has_table = engine_md.has_table(table_name)
    tushare_fund_holdings_indicator_param_list = [
        ('ts_code', String(20)),
        ('sec_name', String(20)),
        ('end_date', Date),
        ('nums', DOUBLE),
        ('nlast', DOUBLE),
        ('count', DOUBLE),
        ('clast', DOUBLE),
        ('amount', DOUBLE),
        ('ratio', DOUBLE),
    ]
    tushare_fund_holdings_dtype = {
        key: val
        for key, val in tushare_fund_holdings_indicator_param_list
    }
    data_df_list, data_count, all_data_count, = [], 0, 0
    data_df = invoke_fund_holdings(year, quarter)
    ts_code_list = []
    for i in data_df.code:
        if i[0] == '6':
            sh = i + '.SH'
            ts_code_list.append(sh)
        else:
            sz = i + '.SZ'
            ts_code_list.append(sz)
    data_df.code = ts_code_list
    data_df = data_df.rename(columns={
        'code': 'ts_code',
        'name': 'sec_name',
        'date': 'end_date'
    })
    bunch_insert_on_duplicate_update(data_df, table_name, engine_md,
                                     tushare_fund_holdings_dtype)
    logging.info("%s年%s季度 %s 更新 %d 条基金持股信息", year, quarter, table_name,
                 all_data_count)
Example #18
0
def import_jq_stock_income(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    logger.info("更新 %s 开始", TABLE_NAME)
    has_table = engine_md.has_table(TABLE_NAME)
    # 判断表是否已经存在
    if has_table:
        sql_str = f"""select max(pub_date) from {TABLE_NAME}"""
        date_start = execute_scalar(sql_str, engine_md)
        logger.info('查询 %s 数据使用起始日期 %s', TABLE_NAME, date_2_str(date_start))
    else:
        date_start = BASE_DATE
        logger.warning('%s 不存在,使用基础日期 %s', TABLE_NAME, date_2_str(date_start))

    # 查询最新的 pub_date
    date_end = datetime.date.today()
    if date_start >= date_end:
        logger.info('%s 已经是最新数据,无需进一步获取', date_start)
        return
    data_count_tot = 0
    try:
        for num, (df, date_from, date_to) in enumerate(
                get_df_iter(date_start, date_end, LOOP_STEP)):
            # logger.debug('%d) [%s ~ %s] 包含 %d 条数据', num, date_from, date_to, df.shape[0])
            data_count = bunch_insert_on_duplicate_update(
                df,
                TABLE_NAME,
                engine_md,
                dtype=DTYPE,
                myisam_if_create_table=True,
                primary_keys=['id'],
                schema=config.DB_SCHEMA_MD)
            data_count_tot += data_count
    finally:
        # 导入数据库
        logging.info("更新 %s 结束 %d 条信息被更新", TABLE_NAME, data_count_tot)
Example #19
0
    def save(self):
        self.logger.info("更新 %s 开始", self.table_name)
        has_table = engine_md.has_table(self.table_name)
        # 判断表是否已经存在
        if has_table:
            sql_str = f"""select max(pub_date) from {self.table_name}"""
            date_start = execute_scalar(sql_str, engine_md)
            self.logger.info('查询 %s 数据使用起始日期 %s', self.table_name,
                             date_2_str(date_start))
        else:
            date_start = self.BASE_DATE
            self.logger.warning('%s 不存在,使用基础日期 %s', self.table_name,
                                date_2_str(date_start))

        # 查询最新的 pub_date
        date_end = datetime.date.today()
        if date_start >= date_end:
            self.logger.info('%s %s 已经是最新数据,无需进一步获取', self.table_name,
                             date_start)
            return
        data_count_tot = 0
        try:
            for num, (df, date_from, date_to) in enumerate(
                    self.get_df_iter(date_start, date_end, self.loop_step)):
                # logger.debug('%d) [%s ~ %s] 包含 %d 条数据', num, date_from, date_to, df.shape[0])
                if df is not None and df.shape[0] > 0:
                    data_count = bunch_insert_on_duplicate_update(
                        df,
                        self.table_name,
                        engine_md,
                        dtype=self.dtype,
                        myisam_if_create_table=True,
                        primary_keys=['id'],
                        schema=config.DB_SCHEMA_MD)
                    data_count_tot += data_count
        finally:
            # 导入数据库
            logging.info("更新 %s 结束 %d 条信息被更新", self.table_name, data_count_tot)
def import_tushare_fund_company(chain_param=None, ):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_fund_company'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    data_df = pro.fund_company()
    for i in range(len(data_df.setup_date)):
        if data_df.setup_date[i] is not None and len(data_df.setup_date[i]) != 8:
            data_df.setup_date[i] = np.nan
    for i in range(len(data_df.end_date)):
        if data_df.end_date[i] is not None and len(data_df.end_date[i]) != 8:
            data_df.end_date[i] = np.nan
        # data_df=data_df[data_df['shortname'].apply(lambda x:x is not None)]
    if len(data_df) > 0:
        data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR)
        logging.info(" %s 表 %d 条公募基金公司信息被更新", table_name,  data_count)
    else:
        logging.info("无数据信息可被更新")
Example #21
0
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_index_daily_md'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
            (
            SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_stock_index_basic info 
            LEFT OUTER JOIN
                (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily
            ON info.ts_code = daily.ts_code
            ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to
            FROM
              (
                SELECT info.ts_code, base_date date_frm, exp_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM tushare_stock_index_basic info 
              ) tt
            WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) 
            ORDER BY ts_code"""
        logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ts_code, date_from, date_to in table.fetchall()
            if ts_code_set is None or ts_code in ts_code_set
        }

    # data_len = len(code_date_range_dic)
    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(
        code_date_range_dic)
    logger.info('%d stocks will been import into tushare_stock_index_daily_md',
                data_len)
    # 将data_df数据,添加到data_df_list
    try:
        for num, (ts_code, (date_from,
                            date_to)) in enumerate(code_date_range_dic.items(),
                                                   start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code,
                         date_from, date_to)
            data_df = invoke_index_daily(
                ts_code=ts_code,
                start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS),
                end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS))
            # data_df = df
            if len(data_df) > 0:
                while try_2_date(data_df['trade_date'].iloc[-1]) > date_from:
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(
                        data_df['trade_date'].iloc[-1]), None
                    df2 = invoke_index_daily(
                        ts_code=ts_code,
                        start_date=datetime_2_str(date_from,
                                                  STR_FORMAT_DATE_TS),
                        end_date=datetime_2_str(
                            try_2_date(data_df['trade_date'].iloc[-1]) -
                            timedelta(days=1), STR_FORMAT_DATE_TS))
                    if len(df2 > 0):
                        last_date_in_df_cur = try_2_date(
                            df2['trade_date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])
                            # df = df2
                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning(
                                '%d/%d) %s has no data during %s %s', num,
                                data_len, ts_code, date_from, date_to)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s',
                                    num, data_len, data_df.shape[0], ts_code,
                                    date_from, date_to)
                    else:
                        break

            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break

            # 大于阀值有开始插入
            if data_count >= 500:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md,
                    DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
                all_data_count += data_count
                data_df_list, data_count = [], 0

                # # 数据插入数据库
                # data_df_all = data_df
                # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,
                #                                               DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
                # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                # data_df = []

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md,
                DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
def import_tushare_block_trade(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_block_trade'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('price', DOUBLE),
        ('vol', DOUBLE),
        ('amount', DOUBLE),
        ('buyer', String(100)),
        ('seller', String(100)),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有 table_name

    if has_table:
        sql_str = f"""select cal_date            
                 FROM
                  (
                   select * from tushare_trade_date trddate 
                   where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                 )tt
                 where (is_open=1 
                        and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                        and exchange='SSE') """
    else:
        # 2003-08-02 大宗交易制度开始实施
        sql_str = """SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
                  AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  AND exchange='SSE'  AND cal_date>='2003-08-02') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            data_df = invoke_block_trade(trade_date=trade_date)
            if len(data_df) > 0:
                # 当前表不设置主键,由于存在重复记录,因此无法设置主键
                # 例如:002325.SZ 2014-11-17 华泰证券股份有限公司沈阳光荣街证券营业部 两笔完全相同的大宗交易
                data_count = bunch_insert(
                    data_df, table_name=table_name, dtype=dtype)
                logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新",
                             num, trade_date_list_len, trade_date, table_name, data_count)
            else:
                logging.info("%d/%d) %s 无数据信息可被更新", num, trade_date_list_len, trade_date)
    except:
        logger.exception('更新 %s 表异常', table_name)
Example #23
0
def merge_latest(chain_param=None, ):
    """
    将 cmc_coin_v1_daily 历史数据 以及 cmc_coin_pro_latest 最新价格数据 合并到 cmc_coin_merged_latest
    :return:
    """
    table_name = 'cmc_coin_merged_latest'
    logger.info("开始合并数据到 %s 表", table_name)
    has_table = engine_md.has_table(table_name)
    create_sql_str = """CREATE TABLE {table_name} (
      `id` VARCHAR(60) NOT NULL,
      `date` DATE NOT NULL,
      `datetime` DATETIME NULL,
      `name` VARCHAR(60) NULL,
      `symbol` VARCHAR(20) NULL,
      `close` DOUBLE NULL,
      `volume` DOUBLE NULL,
      `market_cap` DOUBLE NULL,
      PRIMARY KEY (`id`, `date`))
    ENGINE = MyISAM""".format(table_name=table_name)
    with with_db_session(engine_md) as session:
        if not has_table:
            session.execute(create_sql_str)
            logger.info("创建 %s 表", table_name)
        session.execute(
            'truncate table {table_name}'.format(table_name=table_name))
        insert_sql_str = """INSERT INTO `{table_name}` 
            (`id`, `date`, `datetime`, `name`, `symbol`, `close`, `volume`, `market_cap`) 
            select daily.id, `date`, `date`, `name`, `symbol`, `close`, `volume`, `market_cap` 
            from cmc_coin_v1_daily daily
            left join cmc_coin_v1_info info
            on daily.id = info.id""".format(table_name=table_name)
        session.execute(insert_sql_str)
        session.commit()
        insert_latest_sql_str = """INSERT INTO `{table_name}` 
            (`id`, `date`, `datetime`, `name`, `symbol`, `close`, `volume`, `market_cap`) 
            select info.id, date(latest.last_updated), latest.last_updated, 
                latest.name, latest.symbol, price, volume_24h, market_cap
            from cmc_coin_pro_latest latest
            left join
            (
                select latest.name, latest.symbol, max(latest.last_updated) last_updated
                from cmc_coin_pro_latest latest
                group by latest.name, latest.symbol
            ) g
            on latest.name = g.name
            and latest.symbol = g.symbol
            and latest.last_updated = g.last_updated
            left outer join cmc_coin_v1_info info
            on latest.name = info.name
            and latest.symbol = info.symbol
            on duplicate key update
                `datetime`=values(`datetime`), 
                `name`=values(`name`), 
                `symbol`=values(`symbol`), 
                `close`=values(`close`), 
                `volume`=values(`volume`), 
                `market_cap`=values(`market_cap`)""".format(
            table_name=table_name)
        session.execute(insert_latest_sql_str)
        session.commit()
        data_count = session.execute(
            "select count(*) from {table_name}".format(
                table_name=table_name)).scalar()
        logger.info("%d 条记录插入到 %s", data_count, table_name)
Example #24
0
def import_coin_latest(chain_param=None, ):
    """插入最新价格数据到 cmc_coin_pro_latest """
    table_name = 'cmc_coin_pro_latest'
    has_table = engine_md.has_table(table_name)
    # 设置 dtype
    dtype = {
        'id': Integer,
        'name': String(60),
        'slug': String(60),
        'symbol': String(20),
        'date_added': DATETIME,
        'last_updated': DATETIME,
        'market_cap': DOUBLE,
        'circulating_supply': DOUBLE,
        'max_supply': DOUBLE,
        'num_market_pairs': DOUBLE,
        'percent_change_1h': DOUBLE,
        'percent_change_24h': DOUBLE,
        'percent_change_7d': DOUBLE,
        'price': DOUBLE,
        'total_supply': DOUBLE,
        'volume_24h': DOUBLE,
        'cmc_rank': DOUBLE,
    }

    header = {
        'Content-Type': 'application/json',
        'X-CMC_PRO_API_KEY': config.CMC_PRO_API_KEY
    }
    params = {
        # 'CMC_PRO_API_KEY': config.CMC_PRO_API_KEY,
        'limit': 5000,
        'start': 1
    }
    # https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?sort=market_cap&start=0&limit=10&cryptocurrency_type=tokens&convert=USD,BTC
    url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest"
    rsp = requests.get(url=url, params=params, headers=header)
    if rsp.status_code != 200:
        logger.error('获取数据异常[%d] %s', rsp.status_code, rsp.content)
        return
    ret_dic = rsp.json()
    data_list = ret_dic['data']

    data_dic_list = []
    for dic in data_list:
        data_dic = {}
        for key, val in dic.items():
            if key == 'quote':
                for sub_key, sub_val in val['USD'].items():
                    data_dic[sub_key] = sub_val
            else:
                data_dic[key] = val
        data_dic_list.append(data_dic)

    data_df = pd.DataFrame(data_dic_list)
    # 数据整理
    data_df['date_added'] = data_df['date_added'].apply(
        lambda x: str_2_datetime(x, DATETIME_FORMAT_STR))
    data_df['last_updated'] = data_df['last_updated'].apply(
        lambda x: str_2_datetime(x, DATETIME_FORMAT_STR))
    data_count = bunch_insert_on_duplicate_update(data_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        # build_primary_key([table_name])
        create_pk_str = """ALTER TABLE {table_name}
        CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
        CHANGE COLUMN `last_updated` `last_updated` DATETIME NOT NULL AFTER `id`,
        ADD PRIMARY KEY (`id`, `last_updated`)""".format(table_name=table_name)
        execute_sql(engine_md, create_pk_str)
Example #25
0
def import_coin_daily(chain_param=None, id_set=None, begin_time=None):
    """插入历史数据到 cmc_coin_v1_daily 试用 v1 接口,该接口可能在2018年12月底到期"""
    table_name = "cmc_coin_v1_daily"
    info_table_name = "cmc_coin_v1_info"
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.id, symbol, ifnull(trade_date,date('2013-04-28')) date_frm, null delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   {info_table_name} info 
               LEFT OUTER JOIN
                   (SELECT id, adddate(max(date),1) trade_date FROM {table_name} GROUP BY id) daily
               ON info.id = daily.id
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY id""".format(table_name=table_name,
                                 info_table_name=info_table_name)
    else:
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
        sql_str = """
           SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT id, symbol, null date_frm, null delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM {info_table_name} info 
           ) tt
           ORDER BY id""".format(info_table_name=info_table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        stock_date_dic = {
            (coin_id, symbol):
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for coin_id, symbol, date_from, date_to in table.fetchall()
            if id_set is None or coin_id in id_set
        }
    # 设置 dtype
    dtype = {
        'id': String(60),
        'date': Date,
        'open': DOUBLE,
        'high': DOUBLE,
        'low': DOUBLE,
        'close': DOUBLE,
        'volume': DOUBLE,
        'market_cap': DOUBLE,
    }
    col_names = dtype.keys()
    data_df_list = []
    dic_count = len(stock_date_dic)
    data_count = 0
    # 获取接口数据
    logger.info('%d coins will been import into %s', dic_count, table_name)
    try:
        for data_num, ((coin_id, symbol),
                       (date_from,
                        date_to)) in enumerate(stock_date_dic.items(),
                                               start=1):
            logger.debug('%d/%d) %s[%s] [%s - %s]', data_num, dic_count,
                         coin_id, symbol, date_from, date_to)
            date_from_str = None
            try:
                if date_from is None:
                    scraper = CmcScraperV1(symbol, coin_id)
                else:
                    date_from_str = date_2_str(
                        str_2_date(date_from, DATE_FORMAT_STR),
                        DATE_FORMAT_STR_CMC)
                    scraper = CmcScraperV1(symbol,
                                           coin_id,
                                           start_date=date_from_str)
                data_df = scraper.get_dataframe()
            except Exception as exp:
                logger.exception("scraper('%s', '%s', start_date='%s')",
                                 symbol, coin_id, date_from_str)
                continue

            if data_df is None or data_df.shape[0] == 0:
                logger.warning('%d/%d) %s has no data during %s %s', data_num,
                               dic_count, coin_id, date_from, date_to)
                continue
            data_df.rename(columns={
                col_name: rename_by_dic(col_name, col_names)
                for col_name in data_df.columns
            },
                           inplace=True)
            data_df.rename(columns={'market cap': 'market_cap'}, inplace=True)
            data_df['market_cap'] = data_df['market_cap'].apply(
                lambda x: 0 if isinstance(x, str) else x)
            data_df['volume'] = data_df['volume'].apply(
                lambda x: 0 if isinstance(x, str) else x)
            logger.info('%d/%d) %d data of %s between %s and %s', data_num,
                        dic_count, data_df.shape[0], coin_id,
                        data_df['date'].min(), data_df['date'].max())
            data_df['id'] = coin_id
            data_df_list.append(data_df)
            data_count += data_df.shape[0]
            # 仅供调试使用
            if DEBUG and len(data_df_list) > 10:
                break

            if data_count > 10000:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(data_df_all,
                                                              table_name,
                                                              engine_md,
                                                              dtype=dtype)
                logging.info("%s %d 条信息被更新", table_name, data_count)
                data_df_list, data_count = [], 0

    finally:
        # 导入数据库 创建
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df_all,
                                                          table_name,
                                                          engine_md,
                                                          dtype=dtype)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
            CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
            CHANGE COLUMN `date` `date` DATE NOT NULL AFTER `id`,
            ADD PRIMARY KEY (`id`, `date`)""".format(table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
Example #26
0
def import_tushare_hsgt_top10(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_hsgt_top10'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('name', String(20)),
        ('close', DOUBLE),
        ('change', DOUBLE),
        ('rank', Integer),
        ('market_type', String(20)),
        ('amount', DOUBLE),
        ('net_amount', DOUBLE),
        ('buy', DOUBLE),
        ('sell', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE'  AND cal_date>='2014-11-17') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trade_date_list = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        trade_date_list_len = len(trade_date_list)
        for num, trade_date in enumerate(trade_date_list, start=1):
            trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS)
            for market_type in list(['1', '3']):
                data_df = invoke_hsgt_top10(trade_date=trade_date,
                                            market_type=market_type)
                if len(data_df) > 0:
                    data_count = bunch_insert_p(
                        data_df,
                        table_name=table_name,
                        dtype=dtype,
                        primary_keys=['ts_code', 'trade_date'])
                    logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num,
                                 trade_date_list_len, trade_date, table_name,
                                 data_count)
                else:
                    logging.info("无数据信息可被更新")
                    break
    except:
        logger.exception('更新 %s 表异常', table_name)
def tushare_tmt_twincome_info():
    table_name = 'tushare_tmt_twincome_info'
    has_table = engine_md.has_table(table_name)
    indicators_dic = [
        ['1', 'PC', '20110128'],
        ['2', 'NB', '20110128'],
        ['3', '主机板', '20110128'],
        ['4', '印刷电路板', '20110128'],
        ['5', 'IC载板', '20110128'],
        ['6', 'PCB组装', '20110128'],
        ['7', '软板', '20110128'],
        ['8', 'PCB', '20110128'],
        ['9', 'PCB原料', '20110128'],
        ['10', '铜箔基板', '20110128'],
        ['11', '玻纤纱布', '20110128'],
        ['12', 'FCCL', '20110128'],
        ['13', '显示卡', '20110128'],
        ['14', '绘图卡', '20110128'],
        ['15', '电视卡', '20110128'],
        ['16', '泛工业电脑', '20110128'],
        ['17', 'POS', '20110128'],
        ['18', '工业电脑', '20110128'],
        ['19', '光电IO', '20110128'],
        ['20', '监视器', '20110128'],
        ['21', '扫描器', '20110128'],
        ['22', 'PC周边', '20110128'],
        ['23', '储存媒体', '20110128'],
        ['24', '光碟', '20110128'],
        ['25', '硬盘磁盘', '20110128'],
        ['26', '发光二极体', '20110128'],
        ['27', '太阳能', '20110128'],
        ['28', 'LCD面板', '20110128'],
        ['29', '背光模组', '20110128'],
        ['30', 'LCD原料', '20110128'],
        ['31', 'LCD其它', '20110128'],
        ['32', '触控面板', '20110128'],
        ['33', '监控系统', '20110128'],
        ['34', '其它光电', '20110128'],
        ['35', '电子零组件', '20110128'],
        ['36', '二极体整流', '20110128'],
        ['37', '连接器', '20110128'],
        ['38', '电源供应器', '20110128'],
        ['39', '机壳', '20110128'],
        ['40', '被动元件', '20110128'],
        ['41', '石英元件', '20110128'],
        ['42', '3C二次电源', '20110128'],
        ['43', '网路设备', '20110128'],
        ['44', '数据机', '20110128'],
        ['45', '网路卡', '20110128'],
        ['46', '半导体', '20110128'],
        ['47', '晶圆制造', '20110128'],
        ['48', 'IC封测', '20110128'],
        ['49', '特用IC', '20110128'],
        ['50', '记忆体模组', '20110128'],
        ['51', '晶圆材料', '20110128'],
        ['52', 'IC设计', '20110128'],
        ['53', 'IC光罩', '20110128'],
        ['54', '电子设备', '20110128'],
        ['55', '手机', '20110128'],
        ['56', '通讯设备', '20110128'],
        ['57', '电信业', '20110128'],
        ['58', '网路服务', '20110128'],
        ['59', '卫星通讯', '20110128'],
        ['60', '光纤通讯', '20110128'],
        ['61', '3C通路', '20110128'],
        ['62', '消费性电子', '20110128'],
        ['63', '照相机', '20110128'],
        ['64', '软件服务', '20110128'],
        ['65', '系统整合', '20110128'],
    ]
    dtype = {
        'ts_code': String(20),
        'cn_name': String(120),
        'start_date':Date,

    }
    name_list = ['ts_code',  'cn_name','start_date']
    info_df = pd.DataFrame(data=indicators_dic, columns=name_list)
    data_count = bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype)
    logger.info('%d 条记录被更新', data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST,
                ADD PRIMARY KEY (`ts_code`)""".format(table_name=table_name)
        with with_db_session(engine_md) as session:
            session.execute(create_pk_str)
        logger.info('%s 表 `wind_code` 主键设置完成', table_name)
def import_tushare_hsgt_top10(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_hsgt_top10'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('ts_code', String(20)),
        ('name', String(20)),
        ('close', DOUBLE),
        ('change', DOUBLE),
        ('rank', Integer),
        ('market_type', String(20)),
        ('amount', DOUBLE),
        ('net_amount', DOUBLE),
        ('buy', DOUBLE),
        ('sell', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                  select cal_date            
                  FROM
                   (
                    select * from tushare_trade_date trddate 
                    where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                  )tt
                  where (is_open=1 
                         and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                         and exchange='SSE') """.format(table_name=table_name)
    else:
        sql_str = """
                  SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
               AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
               AND exchange='SSE'  AND cal_date>='2014-11-17') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            for market_type in list(['1', '3']):
                data_df = invoke_hsgt_top10(trade_date=trade_date, market_type=market_type)
                if len(data_df) > 0:
                    data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
                    logging.info("%s更新 %s 结束 %d 条信息被更新", trade_date, table_name, data_count)
                else:
                    logging.info("无数据信息可被更新")
                    break
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `trade_date` `trade_date` VARCHAR(20) NOT NULL FIRST,
                ADD PRIMARY KEY (`trade_date`)""".format(table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表  `trade_date` 主键设置完成', table_name)
Example #29
0
def import_future_daily_his(chain_param=None,
                            ths_code_set: set = None,
                            begin_time=None):
    """
    更新期货合约日级别行情信息
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_future_daily'
    info_table_name = 'ifind_future_info'
    logger.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    indicator_param_list = [
        ('preClose', String(20)),
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('volume', DOUBLE),
        ('amount', DOUBLE),
        ('avgPrice', DOUBLE),
        ('change', DOUBLE),
        ('changeRatio', DOUBLE),
        ('preSettlement', DOUBLE),
        ('settlement', DOUBLE),
        ('change_settlement', DOUBLE),
        ('chg_settlement', DOUBLE),
        ('openInterest', DOUBLE),
        ('positionChange', DOUBLE),
        ('amplitude', DOUBLE),
    ]
    json_indicator = ','.join([key for key, _ in indicator_param_list])
    if has_table:
        # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据
        # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全,
        # 如:AL0202.SHF AL9902.SHF CU0202.SHF
        # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理
        sql_str = """SELECT ths_code, date_frm, 
                if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM
            (
            SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi LEFT OUTER JOIN
                (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd
                ON fi.ths_code = wfd.ths_code
            ) tt
            WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) 
            AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM 
            (
            SELECT fi.ths_code, ths_start_trade_date_future date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi
            ) tt""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    # 设置 dtype
    dtype = {key: val for key, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    dtype['time'] = Date

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        logger.info("%d future instrument will be handled", code_count)
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code, json_indicator,
                'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0
                logging.info("%s 新增数据 %d 条", table_name, data_count)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype)
            tot_data_count += data_count
            logging.info("%s 新增数据 %d 条", table_name, data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_tushare_tmt_twincome(chain_param=None, ts_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_tmt_twincome'
    logging.info("更新 %s 开始", table_name)

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_stock_daily
    if has_table:
        sql_str = """
            SELECT ts_code, date_frm start_date, end_date
            FROM
            (
            SELECT info.ts_code, ifnull(date, start_date) date_frm, 
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                tushare_tmt_twincome_info info 
            LEFT OUTER JOIN
                (SELECT item, adddate(max(date),1) date FROM {table_name} GROUP BY item ) income
            ON info.ts_code = income.item
            ) tt
            order by ts_code""".format(table_name=table_name)
    else:
        sql_str = """SELECT ts_code, start_date ,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date 
            FROM tushare_tmt_twincome_info info """
        logger.warning('%s 不存在,仅使用 tushare_tmt_twincome_info 表进行计算日期范围', table_name)


    # ts_code_set = None
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time,ts_code_set = None,None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ts_code, date_from, date_to in table.fetchall() if
            ts_code_set is None or ts_code in ts_code_set}

    data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic)
    logger.info('%d Taiwan TMT information will been import into tushare_tmt_twincome', data_len)
    # 将data_df数据,添加到data_df_list

    Cycles = 1
    try:
        for num, (ts_code, (start_date, end_date)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, start_date, end_date)
            data_df = invoke_tmt_twincome(item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS),
                                     end_date=datetime_2_str(end_date, STR_FORMAT_DATE_TS))
            # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, start_date, date_to)
            if len(data_df) > 0 and data_df['date'] is not None:
                while try_2_date(data_df['date'].iloc[-1]) > try_2_date(start_date):
                    last_date_in_df_last, last_date_in_df_cur = try_2_date(data_df['date'].iloc[-1]), None
                    df2 = invoke_tmt_twincome(item=ts_code,
                                              start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS),
                                              end_date=datetime_2_str(try_2_date(data_df['date'].iloc[-1]) - timedelta(days=1),STR_FORMAT_DATE_TS))
                    if len(df2) > 0 and df2['date'] is not None:
                        last_date_in_df_cur = try_2_date(df2['date'].iloc[-1])
                        if last_date_in_df_cur < last_date_in_df_last:
                            data_df = pd.concat([data_df, df2])

                        elif last_date_in_df_cur == last_date_in_df_last:
                            break
                        if data_df is None:
                            logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, start_date,
                                           end_date)
                            continue
                        logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code,
                                    start_date, end_date)
                    elif len(df2) <= 0:
                        break
            # 把数据攒起来
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 1000:
                data_df_all = pd.concat(data_df_list)
                bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME)
                all_data_count += data_count
                data_df_list, data_count = [], 0


    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME)
            all_data_count = all_data_count + data_count
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)