def import_trade_date(): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表 日后将会考虑将两张表进行合并 :return: """ table_name = 'wind_trade_date' has_table = engine_md.has_table(table_name) if has_table: with with_db_session(engine_md) as session: try: table = session.execute('SELECT exch_code,max(trade_date) FROM {table_name} GROUP BY exch_code'.format( table_name=table_name)) exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()} except Exception: logger.exception("交易日获取异常") else: exch_code_trade_date_dic = {} exchange_code_dict = { "HKEX": "香港", "NYSE": "纽约", "SZSE": "深圳", "TWSE": "台湾", "NASDAQ": "纳斯达克", "AMEX": "美国证券交易所", "TSE": "东京", "LSE": "伦敦", "SGX": "新加坡" } exchange_code_list = list(exchange_code_dict.keys()) for exchange_code in exchange_code_list: if exchange_code in exch_code_trade_date_dic: trade_date_max = exch_code_trade_date_dic[exchange_code] trade_date_start = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE) else: trade_date_start = '1980-01-01' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE) if exchange_code is None or exchange_code == "": trade_date_list = invoker.tdays(trade_date_start, end_date_str) else: trade_date_list = invoker.tdays(trade_date_start, end_date_str, "TradingCalendar=%s" % exchange_code) if trade_date_list is None: logger.warning("没有查询到交易日期") date_count = len(trade_date_list) if date_count > 0: logger.info("%d 条交易日数据将被导入", date_count) trade_date_df = pd.DataFrame({'trade_date': trade_date_list}) trade_date_df['exch_code'] = exchange_code bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={'trade_date': Date, 'exch_code': String(20)}, myisam_if_create_table=True, primary_keys=['trade_date', 'exch_code'], schema=config.DB_SCHEMA_MD) logger.info('%s %d 条交易日数据导入 %s 完成', exchange_code, date_count, table_name)
def import_tushare_stock_info(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 """ table_name = 'tushare_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) wind_indicator_param_list = [ ('ts_code', String(20)), ('symbol', DOUBLE), ('list_date', Date), ('delist_date', Date), ('name', String(30)), ('fullname', String(100)), ('enname', String(200)), ('exchange_id', String(30)), ('list_status', String(10)), ('is_hs', String(10)), ] # # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in wind_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in wind_indicator_param_list} dtype['ts_code'] = String(20) # 数据提取 stock_info_all_df = pro.stock_basic( exchange_id='', fields= 'ts_code,symbol,name,fullname,enname,exchange_id,curr_type,list_date,list_status,delist_date,is_hs' ) logging.info('%s stock data will be import', stock_info_all_df.shape[0]) data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def insert_db(df: pd.DataFrame): nonlocal has_table insert_data_count = bunch_insert_on_duplicate_update(df, table_name, engine_md, dtype=dtype) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) has_table = True return insert_data_count
def import_tushare_suspend(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_suspend' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_suspend # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(suspend_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = pro.suspend(ts_code='', suspend_date=trade_date, resume_date='', fields='') if len(data_df) > 0: data_count = bunch_insert(data_df, table_name=table_name, dtype=DTYPE_TUSHARE_SUSPEND, primary_keys=['ts_code', 'suspend_date']) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("%s 当日无停牌股票", trade_date_list_len) except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_daily_basic(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_basic' primary_keys = ["ts_code", "trade_date"] logging.info("更新 %s 开始", table_name) check_sqlite_db_primary_keys(table_name, primary_keys) has_table = engine_md.has_table(table_name) # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!! # 比如本表是 tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了 if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) try: for_count = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_daily_basic(ts_code='', trade_date=trade_date) if data_df is not None and data_df.shape[0] > 0: data_count = bunch_insert( data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_BASIC, primary_keys=primary_keys) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, for_count, trade_date, table_name, data_count) else: logging.info("%d/%d) %s 无数据信息可被更新", num, for_count, trade_date) except: logger.exception("更新 %s 表异常", table_name)
def import_trade_date(chain_param=None): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表 日后将会考虑将两张表进行合并 :return: """ table_name = TABLE_NAME exch_code_trade_date_dic = {} has_table = engine_md.has_table(table_name) if has_table: with with_db_session(engine_md) as session: try: table = session.execute('SELECT exchange,max(cal_date) FROM {table_name} GROUP BY exchange'.format( table_name=table_name )) exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()} except Exception as exp: logger.exception("交易日获取异常") exchange_code_dict = { "HKEX": "香港联合交易所", "SZSE": "深圳证券交易所", "SSE": "上海证券交易所", } exchange_code_list = list(exchange_code_dict.keys()) for exchange_code in exchange_code_list: if exchange_code in exch_code_trade_date_dic: trade_date_max = exch_code_trade_date_dic[exchange_code] start_date_str = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE_TS) else: start_date_str = '19900101' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE_TS) trade_date_df = pro.trade_cal(exchange_id='', start_date=start_date_str, end_date=end_date_str) if trade_date_df is None or trade_date_df.shape[0] == 0: logger.warning('%s[%s] [%s - %s] 没有查询到交易日期', exchange_code_dict[exchange_code], exchange_code, start_date_str, end_date_str) continue date_count = trade_date_df.shape[0] logger.info("%s[%s] %d 条交易日数据将被导入 %s", exchange_code_dict[exchange_code], exchange_code, date_count, table_name) date_count = bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={ 'exchange': String(10), 'cal_date': Date, 'is_open': DOUBLE, }, myisam_if_create_table=True, primary_keys=['exchange', 'cal_date'], schema=config.DB_SCHEMA_MD) logger.info('%s[%s] %d 条交易日数据导入 %s 完成', exchange_code_dict[exchange_code], exchange_code, date_count, table_name)
def import_tushare_fund_portfolio(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fund_portfolio' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # if has_table is True: # sql_str="""""" # else: sql_str = """SELECT ts_code FROM md_integration.tushare_fund_basic""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) code_list = list(row[0] for row in table.fetchall()) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_list) try: for num, (ts_code) in enumerate(code_list, start=1): data_df = invoke_fund_portfolio(ts_code=ts_code) logging.info(" 提取%s 基金行情信息%d 条,整体完成%d/%d", ts_code, len(data_df), num, data_len) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 20000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUND_PORTFOLIO) logging.info(" %s 表 %d 条净值信息被更新", table_name, data_count) all_data_count += data_count data_df_list, data_count = [], 0 finally: #插入剩余数据 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUND_PORTFOLIO) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def merge_tick_2_bar(): """ 将tick数据合并成为1分钟数据 :return: """ "" table_name = 'pytdx_stock_min1' tick_table_name = 'pytdx_tick_temp' has_table = engine_md.has_table(table_name) sql_create_table = f"""CREATE TABLE `{table_name}` ( `ts_code` varchar(12) NOT NULL, `trade_date` datetime DEFAULT NULL, `date` date NOT NULL, `time` time NOT NULL, `open` DOUBLE DEFAULT NULL, `high` DOUBLE DEFAULT NULL, `low` DOUBLE DEFAULT NULL, `close` DOUBLE DEFAULT NULL, `vol` int(11) DEFAULT NULL, `amount` DOUBLE DEFAULT NULL, PRIMARY KEY (`ts_code`,`date`,`time`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8""" sql_str = f"""insert ignore {table_name} select bar.ts_code, bar.trade_date, bar.date, bar.time, tick_open.price open, bar.high, bar.low, tick_close.price close, bar.vol, bar.amount from ( select ts_code, trade_date, time, date, min(`index`) index_min, max(`index`) index_max, max(price) high, min(price) low, sum(vol) vol, sum(price*vol) amount from {tick_table_name} group by ts_code, date, time ) bar left join {tick_table_name} tick_open on bar.ts_code = tick_open.ts_code and bar.date = tick_open.date and bar.index_min = tick_open.index left join {tick_table_name} tick_close on bar.ts_code = tick_close.ts_code and bar.date = tick_close.date and bar.index_max = tick_close.index""" with with_db_session(engine_md) as session: # 创建表 if not has_table: session.execute(sql_create_table) logger.info('创建表 %s ', table_name) # tick 数据合并成 min1 数据 rslt = session.execute(sql_str) insert_count = rslt.rowcount logger.info('合并分钟数据 %d 条', insert_count)
def save(self): self.logger.info("更新 %s 开始", self.table_name) has_table = engine_md.has_table(self.table_name) # 判断表是否已经存在 if has_table: with with_db_session(engine_md) as session: sql_str = f"""select trade_date from jq_trade_date where trade_date>(select max(day) from {self.table_name}) order by trade_date""" table = session.execute(sql_str, params={"trade_date": self.BASE_DATE}) trade_date_list = [_[0] for _ in table.fetchall()] date_start = execute_scalar(sql_str, engine_md) self.logger.info('查询 %s 数据使用起始日期 %s', self.table_name, date_2_str(date_start)) else: with with_db_session(engine_md) as session: sql_str = "select trade_date from jq_trade_date where trade_date>=:trade_date order by trade_date" table = session.execute(sql_str, params={"trade_date": self.BASE_DATE}) trade_date_list = [_[0] for _ in table.fetchall()] self.logger.warning('%s 不存在,使用基础日期 %s', self.table_name, self.BASE_DATE) # 查询最新的 trade_date_list.sort() data_count_tot, for_count = 0, len(trade_date_list) try: for num, trade_date in enumerate(trade_date_list): q = query(self.statement) df = get_fundamentals(q, date=date_2_str(trade_date)) if df is None or df.shape[0] == 0: continue logger.debug('%d/%d) %s 包含 %d 条数据', num, for_count, trade_date, df.shape[0]) data_count = bunch_insert_on_duplicate_update( df, self.table_name, engine_md, dtype=self.dtype, myisam_if_create_table=True, primary_keys=['id'], schema=config.DB_SCHEMA_MD) data_count_tot += data_count except: logger.exception("更新 %s 异常", self.table_name) finally: # 导入数据库 logging.info("更新 %s 结束 %d 条信息被更新", self.table_name, data_count_tot)
def merge_tushare_daily(date_from=None): """ 合並 adj_factor,daily_basic,stock,suspend 表格數據 :param date_from: :return: """ table_name = 'tushare_stock_daily' has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format( table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) tushare_adj_factor_df = get_tushare_daily_df( 'tushare_stock_daily_adj_factor', date_from) tushare_daily_basic_df = get_tushare_daily_df('tushare_stock_daily_basic', date_from) tushare_stock_daily_md_df = get_tushare_daily_df('tushare_stock_daily_md', date_from) # tushare_stock_daily_suspend_df = get_tushare_daily_df('tushare_stock_daily_suspend', date_from) tushare_daily_two_form_df = pd.merge(tushare_adj_factor_df, tushare_daily_basic_df, how='outer', on=['ts_code', 'trade_date']) tushare_daily_df = pd.merge(tushare_daily_two_form_df, tushare_stock_daily_md_df, how='outer', on=['ts_code', 'trade_date']) # tushare_daily_df = pd.merge(tushare_daily_three_form_df, tushare_stock_daily_suspend_df, how='outer', # on=['ts_code']) # 设置 dtype dtype = {} for dic in [ DTYPE_TUSHARE_SUSPEND, DTYPE_TUSHARE_STOCK_DAILY_BASIC, DTYPE_TUSHARE_STOCK_DAILY_MD ]: for key, val in dic.items(): dtype[key] = val tushare_daily_df["close"] = tushare_daily_df.apply(get_merge, axis=1) tushare_daily_df.drop(['close_x', 'close_y'], axis=1, inplace=True) data_count = bunch_insert_on_duplicate_update(tushare_daily_df, table_name, engine_md, dtype) logging.info('%s 更新 %d', table_name, data_count) # if not has_table and engine_md.has_table(table_name): # alter_table_2_myisam(engine_md, table_name) # build_primary_key([table_name]) return tushare_daily_df
def import_tushare_stock_info(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 """ table_name = 'tushare_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) tushare_indicator_param_list = [ ('ts_code', String(20)), ('symbol', String(20)), ('name', String(40)), ('area', String(100)), ('industry', String(200)), ('fullname', String(100)), ('enname', String(200)), ('market', String(100)), ('exchange', String(20)), ('curr_type', String(20)), ('list_status', String(20)), ('list_date', Date), ('delist_date', Date), ('is_hs', String(20)), ] # # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in tushare_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in tushare_indicator_param_list} dtype['ts_code'] = String(20) # 数据提取 stock_info_all_df = pro.stock_basic( exchange='', fields= 'ts_code,symbol,name,area,industry,fullname,enname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs,is_hs,is_hs' ) logging.info('%s stock data will be import', stock_info_all_df.shape[0]) data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype, myisam_if_create_table=True, primary_keys=['ts_code'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
def import_tushare_fund_basic(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fund_basic' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily for market in list(['E','O']): data_df = pro.fund_basic(market=market) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表 %d 条基金信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新")
def get_latest_constituent_df(sector_code): """ 获取最新的交易日日期,及相应的成分股数据 :return: """ table_name = "wind_sectorconstituent" has_table = engine_md.has_table(table_name) if not has_table: return None, None sql_str = 'SELECT max(trade_date) FROM wind_sectorconstituent WHERE sector_code = :sector_code' with with_db_session(engine_md) as session: date_latest = session.execute(sql_str, params={"sector_code": sector_code}).scalar() if date_latest is None: return None, None sql_str = "SELECT * FROM wind_sectorconstituent WHERE sector_code = %s AND trade_date = %s" sec_df = pd.read_sql(sql_str, engine_md, params=[sector_code, date_latest]) return date_latest, sec_df
def import_tushare_stock_company(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_company' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily data_df = pro.stock_company() for i in range(len(data_df['setup_date'])): if data_df['setup_date'][i] is not None and len(data_df['setup_date'][i])<8 : data_df['setup_date'][i]=np.nan if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_COMPANY) logging.info(" %s 表 %d 条上市公司基本信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新")
def get_latest_constituent_df(index_code): """ 获取最新的交易日日期,及相应的成分股数据 :return: """ table_name = "wind_index_constituent" has_table = engine_md.has_table(table_name) if not has_table: return None, None with with_db_session(engine_md) as session: date_latest = session.execute( "select max(trade_date) from wind_index_constituent where index_code = :index_code", params={ "index_code": index_code }).scalar() if date_latest is None: return None, None sql_str = "select * from wind_index_constituent where index_code = %s and trade_date = %s" sec_df = pd.read_sql(sql_str, engine_md, params=[index_code, date_latest]) return date_latest, sec_df
def import_wind_fund_nav_to_nav(): """ 将 wind_fund_nav 数据导入到 fund_nav 表中 :return: """ table_name = 'fund_nav' has_table = engine_md.has_table(table_name) logger.info("开始将 wind_fund_nav_daily 数据导入到 fund_nav_tmp_df") create_sql_str = """CREATE TABLE {table_name} ( `wind_code` varchar(20) NOT NULL COMMENT '基金代码', `nav_date` date NOT NULL COMMENT '净值日期', `nav` double DEFAULT NULL COMMENT '净值', `nav_acc` double DEFAULT NULL COMMENT '累计净值', PRIMARY KEY (`wind_code`,`nav_date`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8""".format(table_name=table_name) # TODO: 需要对 Group by 子句进行跳转 sql_str = """INSERT INTO fund_nav(wind_code, nav_date, nav, nav_acc) SELECT wfn.wind_code, wfn.nav_date, wfn.nav, wfn.nav_acc FROM ( SELECT wind_code, nav_date, max(nav) nav, max(nav_acc) nav_acc FROM wind_fund_nav_daily GROUP BY wind_code, nav_date ) AS wfn LEFT OUTER JOIN fund_nav fn ON wfn.wind_code = fn.wind_code AND wfn.nav_date = fn.nav_date WHERE fn.nav_date IS NULL""" with with_db_session(engine_md) as session: if not has_table: session.execute(create_sql_str) logger.info("创建 %s 表", table_name) session.execute(sql_str) logger.info('导入结束')
def fresh_tushare_stock_fund_holdings(year, quarter): table_name = 'tushare_stock_fund_holdings' logging.info("更新 %s 表%s年%s季度基金持股信息开始", table_name, year, quarter) has_table = engine_md.has_table(table_name) tushare_fund_holdings_indicator_param_list = [ ('ts_code', String(20)), ('sec_name', String(20)), ('end_date', Date), ('nums', DOUBLE), ('nlast', DOUBLE), ('count', DOUBLE), ('clast', DOUBLE), ('amount', DOUBLE), ('ratio', DOUBLE), ] tushare_fund_holdings_dtype = { key: val for key, val in tushare_fund_holdings_indicator_param_list } data_df_list, data_count, all_data_count, = [], 0, 0 data_df = invoke_fund_holdings(year, quarter) ts_code_list = [] for i in data_df.code: if i[0] == '6': sh = i + '.SH' ts_code_list.append(sh) else: sz = i + '.SZ' ts_code_list.append(sz) data_df.code = ts_code_list data_df = data_df.rename(columns={ 'code': 'ts_code', 'name': 'sec_name', 'date': 'end_date' }) bunch_insert_on_duplicate_update(data_df, table_name, engine_md, tushare_fund_holdings_dtype) logging.info("%s年%s季度 %s 更新 %d 条基金持股信息", year, quarter, table_name, all_data_count)
def import_jq_stock_income(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ logger.info("更新 %s 开始", TABLE_NAME) has_table = engine_md.has_table(TABLE_NAME) # 判断表是否已经存在 if has_table: sql_str = f"""select max(pub_date) from {TABLE_NAME}""" date_start = execute_scalar(sql_str, engine_md) logger.info('查询 %s 数据使用起始日期 %s', TABLE_NAME, date_2_str(date_start)) else: date_start = BASE_DATE logger.warning('%s 不存在,使用基础日期 %s', TABLE_NAME, date_2_str(date_start)) # 查询最新的 pub_date date_end = datetime.date.today() if date_start >= date_end: logger.info('%s 已经是最新数据,无需进一步获取', date_start) return data_count_tot = 0 try: for num, (df, date_from, date_to) in enumerate( get_df_iter(date_start, date_end, LOOP_STEP)): # logger.debug('%d) [%s ~ %s] 包含 %d 条数据', num, date_from, date_to, df.shape[0]) data_count = bunch_insert_on_duplicate_update( df, TABLE_NAME, engine_md, dtype=DTYPE, myisam_if_create_table=True, primary_keys=['id'], schema=config.DB_SCHEMA_MD) data_count_tot += data_count finally: # 导入数据库 logging.info("更新 %s 结束 %d 条信息被更新", TABLE_NAME, data_count_tot)
def save(self): self.logger.info("更新 %s 开始", self.table_name) has_table = engine_md.has_table(self.table_name) # 判断表是否已经存在 if has_table: sql_str = f"""select max(pub_date) from {self.table_name}""" date_start = execute_scalar(sql_str, engine_md) self.logger.info('查询 %s 数据使用起始日期 %s', self.table_name, date_2_str(date_start)) else: date_start = self.BASE_DATE self.logger.warning('%s 不存在,使用基础日期 %s', self.table_name, date_2_str(date_start)) # 查询最新的 pub_date date_end = datetime.date.today() if date_start >= date_end: self.logger.info('%s %s 已经是最新数据,无需进一步获取', self.table_name, date_start) return data_count_tot = 0 try: for num, (df, date_from, date_to) in enumerate( self.get_df_iter(date_start, date_end, self.loop_step)): # logger.debug('%d) [%s ~ %s] 包含 %d 条数据', num, date_from, date_to, df.shape[0]) if df is not None and df.shape[0] > 0: data_count = bunch_insert_on_duplicate_update( df, self.table_name, engine_md, dtype=self.dtype, myisam_if_create_table=True, primary_keys=['id'], schema=config.DB_SCHEMA_MD) data_count_tot += data_count finally: # 导入数据库 logging.info("更新 %s 结束 %d 条信息被更新", self.table_name, data_count_tot)
def import_tushare_fund_company(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fund_company' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily data_df = pro.fund_company() for i in range(len(data_df.setup_date)): if data_df.setup_date[i] is not None and len(data_df.setup_date[i]) != 8: data_df.setup_date[i] = np.nan for i in range(len(data_df.end_date)): if data_df.end_date[i] is not None and len(data_df.end_date[i]) != 8: data_df.end_date[i] = np.nan # data_df=data_df[data_df['shortname'].apply(lambda x:x is not None)] if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表 %d 条公募基金公司信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新")
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_daily_md' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info LEFT OUTER JOIN (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily ON info.ts_code = daily.ts_code ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, base_date date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into tushare_stock_index_daily_md', data_len) # 将data_df数据,添加到data_df_list try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # data_df = df if len(data_df) > 0: while try_2_date(data_df['trade_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['trade_date'].iloc[-1]), None df2 = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['trade_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2 > 0): last_date_in_df_cur = try_2_date( df2['trade_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) else: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 5: break # 大于阀值有开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_df_all = data_df # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, # DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # data_df = [] finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_tushare_block_trade(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_block_trade' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ts_code', String(20)), ('price', DOUBLE), ('vol', DOUBLE), ('amount', DOUBLE), ('buyer', String(100)), ('seller', String(100)), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有 table_name if has_table: sql_str = f"""select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """ else: # 2003-08-02 大宗交易制度开始实施 sql_str = """SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2003-08-02') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_block_trade(trade_date=trade_date) if len(data_df) > 0: # 当前表不设置主键,由于存在重复记录,因此无法设置主键 # 例如:002325.SZ 2014-11-17 华泰证券股份有限公司沈阳光荣街证券营业部 两笔完全相同的大宗交易 data_count = bunch_insert( data_df, table_name=table_name, dtype=dtype) logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("%d/%d) %s 无数据信息可被更新", num, trade_date_list_len, trade_date) except: logger.exception('更新 %s 表异常', table_name)
def merge_latest(chain_param=None, ): """ 将 cmc_coin_v1_daily 历史数据 以及 cmc_coin_pro_latest 最新价格数据 合并到 cmc_coin_merged_latest :return: """ table_name = 'cmc_coin_merged_latest' logger.info("开始合并数据到 %s 表", table_name) has_table = engine_md.has_table(table_name) create_sql_str = """CREATE TABLE {table_name} ( `id` VARCHAR(60) NOT NULL, `date` DATE NOT NULL, `datetime` DATETIME NULL, `name` VARCHAR(60) NULL, `symbol` VARCHAR(20) NULL, `close` DOUBLE NULL, `volume` DOUBLE NULL, `market_cap` DOUBLE NULL, PRIMARY KEY (`id`, `date`)) ENGINE = MyISAM""".format(table_name=table_name) with with_db_session(engine_md) as session: if not has_table: session.execute(create_sql_str) logger.info("创建 %s 表", table_name) session.execute( 'truncate table {table_name}'.format(table_name=table_name)) insert_sql_str = """INSERT INTO `{table_name}` (`id`, `date`, `datetime`, `name`, `symbol`, `close`, `volume`, `market_cap`) select daily.id, `date`, `date`, `name`, `symbol`, `close`, `volume`, `market_cap` from cmc_coin_v1_daily daily left join cmc_coin_v1_info info on daily.id = info.id""".format(table_name=table_name) session.execute(insert_sql_str) session.commit() insert_latest_sql_str = """INSERT INTO `{table_name}` (`id`, `date`, `datetime`, `name`, `symbol`, `close`, `volume`, `market_cap`) select info.id, date(latest.last_updated), latest.last_updated, latest.name, latest.symbol, price, volume_24h, market_cap from cmc_coin_pro_latest latest left join ( select latest.name, latest.symbol, max(latest.last_updated) last_updated from cmc_coin_pro_latest latest group by latest.name, latest.symbol ) g on latest.name = g.name and latest.symbol = g.symbol and latest.last_updated = g.last_updated left outer join cmc_coin_v1_info info on latest.name = info.name and latest.symbol = info.symbol on duplicate key update `datetime`=values(`datetime`), `name`=values(`name`), `symbol`=values(`symbol`), `close`=values(`close`), `volume`=values(`volume`), `market_cap`=values(`market_cap`)""".format( table_name=table_name) session.execute(insert_latest_sql_str) session.commit() data_count = session.execute( "select count(*) from {table_name}".format( table_name=table_name)).scalar() logger.info("%d 条记录插入到 %s", data_count, table_name)
def import_coin_latest(chain_param=None, ): """插入最新价格数据到 cmc_coin_pro_latest """ table_name = 'cmc_coin_pro_latest' has_table = engine_md.has_table(table_name) # 设置 dtype dtype = { 'id': Integer, 'name': String(60), 'slug': String(60), 'symbol': String(20), 'date_added': DATETIME, 'last_updated': DATETIME, 'market_cap': DOUBLE, 'circulating_supply': DOUBLE, 'max_supply': DOUBLE, 'num_market_pairs': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'price': DOUBLE, 'total_supply': DOUBLE, 'volume_24h': DOUBLE, 'cmc_rank': DOUBLE, } header = { 'Content-Type': 'application/json', 'X-CMC_PRO_API_KEY': config.CMC_PRO_API_KEY } params = { # 'CMC_PRO_API_KEY': config.CMC_PRO_API_KEY, 'limit': 5000, 'start': 1 } # https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?sort=market_cap&start=0&limit=10&cryptocurrency_type=tokens&convert=USD,BTC url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest" rsp = requests.get(url=url, params=params, headers=header) if rsp.status_code != 200: logger.error('获取数据异常[%d] %s', rsp.status_code, rsp.content) return ret_dic = rsp.json() data_list = ret_dic['data'] data_dic_list = [] for dic in data_list: data_dic = {} for key, val in dic.items(): if key == 'quote': for sub_key, sub_val in val['USD'].items(): data_dic[sub_key] = sub_val else: data_dic[key] = val data_dic_list.append(data_dic) data_df = pd.DataFrame(data_dic_list) # 数据整理 data_df['date_added'] = data_df['date_added'].apply( lambda x: str_2_datetime(x, DATETIME_FORMAT_STR)) data_df['last_updated'] = data_df['last_updated'].apply( lambda x: str_2_datetime(x, DATETIME_FORMAT_STR)) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , CHANGE COLUMN `last_updated` `last_updated` DATETIME NOT NULL AFTER `id`, ADD PRIMARY KEY (`id`, `last_updated`)""".format(table_name=table_name) execute_sql(engine_md, create_pk_str)
def import_coin_daily(chain_param=None, id_set=None, begin_time=None): """插入历史数据到 cmc_coin_v1_daily 试用 v1 接口,该接口可能在2018年12月底到期""" table_name = "cmc_coin_v1_daily" info_table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.id, symbol, ifnull(trade_date,date('2013-04-28')) date_frm, null delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT id, adddate(max(date),1) trade_date FROM {table_name} GROUP BY id) daily ON info.id = daily.id ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY id""".format(table_name=table_name, info_table_name=info_table_name) else: logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) sql_str = """ SELECT id, symbol, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT id, symbol, null date_frm, null delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt ORDER BY id""".format(info_table_name=info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 stock_date_dic = { (coin_id, symbol): (date_from if begin_time is None else min([date_from, begin_time]), date_to) for coin_id, symbol, date_from, date_to in table.fetchall() if id_set is None or coin_id in id_set } # 设置 dtype dtype = { 'id': String(60), 'date': Date, 'open': DOUBLE, 'high': DOUBLE, 'low': DOUBLE, 'close': DOUBLE, 'volume': DOUBLE, 'market_cap': DOUBLE, } col_names = dtype.keys() data_df_list = [] dic_count = len(stock_date_dic) data_count = 0 # 获取接口数据 logger.info('%d coins will been import into %s', dic_count, table_name) try: for data_num, ((coin_id, symbol), (date_from, date_to)) in enumerate(stock_date_dic.items(), start=1): logger.debug('%d/%d) %s[%s] [%s - %s]', data_num, dic_count, coin_id, symbol, date_from, date_to) date_from_str = None try: if date_from is None: scraper = CmcScraperV1(symbol, coin_id) else: date_from_str = date_2_str( str_2_date(date_from, DATE_FORMAT_STR), DATE_FORMAT_STR_CMC) scraper = CmcScraperV1(symbol, coin_id, start_date=date_from_str) data_df = scraper.get_dataframe() except Exception as exp: logger.exception("scraper('%s', '%s', start_date='%s')", symbol, coin_id, date_from_str) continue if data_df is None or data_df.shape[0] == 0: logger.warning('%d/%d) %s has no data during %s %s', data_num, dic_count, coin_id, date_from, date_to) continue data_df.rename(columns={ col_name: rename_by_dic(col_name, col_names) for col_name in data_df.columns }, inplace=True) data_df.rename(columns={'market cap': 'market_cap'}, inplace=True) data_df['market_cap'] = data_df['market_cap'].apply( lambda x: 0 if isinstance(x, str) else x) data_df['volume'] = data_df['volume'].apply( lambda x: 0 if isinstance(x, str) else x) logger.info('%d/%d) %d data of %s between %s and %s', data_num, dic_count, data_df.shape[0], coin_id, data_df['date'].min(), data_df['date'].max()) data_df['id'] = coin_id data_df_list.append(data_df) data_count += data_df.shape[0] # 仅供调试使用 if DEBUG and len(data_df_list) > 10: break if data_count > 10000: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("%s %d 条信息被更新", table_name, data_count) data_df_list, data_count = [], 0 finally: # 导入数据库 创建 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , CHANGE COLUMN `date` `date` DATE NOT NULL AFTER `id`, ADD PRIMARY KEY (`id`, `date`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_tushare_hsgt_top10(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_hsgt_top10' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ts_code', String(20)), ('name', String(20)), ('close', DOUBLE), ('change', DOUBLE), ('rank', Integer), ('market_type', String(20)), ('amount', DOUBLE), ('net_amount', DOUBLE), ('buy', DOUBLE), ('sell', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2014-11-17') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) for market_type in list(['1', '3']): data_df = invoke_hsgt_top10(trade_date=trade_date, market_type=market_type) if len(data_df) > 0: data_count = bunch_insert_p( data_df, table_name=table_name, dtype=dtype, primary_keys=['ts_code', 'trade_date']) logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("无数据信息可被更新") break except: logger.exception('更新 %s 表异常', table_name)
def tushare_tmt_twincome_info(): table_name = 'tushare_tmt_twincome_info' has_table = engine_md.has_table(table_name) indicators_dic = [ ['1', 'PC', '20110128'], ['2', 'NB', '20110128'], ['3', '主机板', '20110128'], ['4', '印刷电路板', '20110128'], ['5', 'IC载板', '20110128'], ['6', 'PCB组装', '20110128'], ['7', '软板', '20110128'], ['8', 'PCB', '20110128'], ['9', 'PCB原料', '20110128'], ['10', '铜箔基板', '20110128'], ['11', '玻纤纱布', '20110128'], ['12', 'FCCL', '20110128'], ['13', '显示卡', '20110128'], ['14', '绘图卡', '20110128'], ['15', '电视卡', '20110128'], ['16', '泛工业电脑', '20110128'], ['17', 'POS', '20110128'], ['18', '工业电脑', '20110128'], ['19', '光电IO', '20110128'], ['20', '监视器', '20110128'], ['21', '扫描器', '20110128'], ['22', 'PC周边', '20110128'], ['23', '储存媒体', '20110128'], ['24', '光碟', '20110128'], ['25', '硬盘磁盘', '20110128'], ['26', '发光二极体', '20110128'], ['27', '太阳能', '20110128'], ['28', 'LCD面板', '20110128'], ['29', '背光模组', '20110128'], ['30', 'LCD原料', '20110128'], ['31', 'LCD其它', '20110128'], ['32', '触控面板', '20110128'], ['33', '监控系统', '20110128'], ['34', '其它光电', '20110128'], ['35', '电子零组件', '20110128'], ['36', '二极体整流', '20110128'], ['37', '连接器', '20110128'], ['38', '电源供应器', '20110128'], ['39', '机壳', '20110128'], ['40', '被动元件', '20110128'], ['41', '石英元件', '20110128'], ['42', '3C二次电源', '20110128'], ['43', '网路设备', '20110128'], ['44', '数据机', '20110128'], ['45', '网路卡', '20110128'], ['46', '半导体', '20110128'], ['47', '晶圆制造', '20110128'], ['48', 'IC封测', '20110128'], ['49', '特用IC', '20110128'], ['50', '记忆体模组', '20110128'], ['51', '晶圆材料', '20110128'], ['52', 'IC设计', '20110128'], ['53', 'IC光罩', '20110128'], ['54', '电子设备', '20110128'], ['55', '手机', '20110128'], ['56', '通讯设备', '20110128'], ['57', '电信业', '20110128'], ['58', '网路服务', '20110128'], ['59', '卫星通讯', '20110128'], ['60', '光纤通讯', '20110128'], ['61', '3C通路', '20110128'], ['62', '消费性电子', '20110128'], ['63', '照相机', '20110128'], ['64', '软件服务', '20110128'], ['65', '系统整合', '20110128'], ] dtype = { 'ts_code': String(20), 'cn_name': String(120), 'start_date':Date, } name_list = ['ts_code', 'cn_name','start_date'] info_df = pd.DataFrame(data=indicators_dic, columns=name_list) data_count = bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype) logger.info('%d 条记录被更新', data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, ADD PRIMARY KEY (`ts_code`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `wind_code` 主键设置完成', table_name)
def import_tushare_hsgt_top10(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_hsgt_top10' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ts_code', String(20)), ('name', String(20)), ('close', DOUBLE), ('change', DOUBLE), ('rank', Integer), ('market_type', String(20)), ('amount', DOUBLE), ('net_amount', DOUBLE), ('buy', DOUBLE), ('sell', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2014-11-17') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) for market_type in list(['1', '3']): data_df = invoke_hsgt_top10(trade_date=trade_date, market_type=market_type) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("%s更新 %s 结束 %d 条信息被更新", trade_date, table_name, data_count) else: logging.info("无数据信息可被更新") break finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `trade_date` `trade_date` VARCHAR(20) NOT NULL FIRST, ADD PRIMARY KEY (`trade_date`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `trade_date` 主键设置完成', table_name)
def import_future_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 更新期货合约日级别行情信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_future_daily' info_table_name = 'ifind_future_info' logger.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) indicator_param_list = [ ('preClose', String(20)), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('avgPrice', DOUBLE), ('change', DOUBLE), ('changeRatio', DOUBLE), ('preSettlement', DOUBLE), ('settlement', DOUBLE), ('change_settlement', DOUBLE), ('chg_settlement', DOUBLE), ('openInterest', DOUBLE), ('positionChange', DOUBLE), ('amplitude', DOUBLE), ] json_indicator = ','.join([key for key, _ in indicator_param_list]) if has_table: # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据 # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全, # 如:AL0202.SHF AL9902.SHF CU0202.SHF # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理 sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd ON fi.ths_code = wfd.ths_code ) tt WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to FROM ( SELECT fi.ths_code, ths_start_trade_date_future date_frm, ths_last_td_date_future lasttrade_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} fi ) tt""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } # 设置 dtype dtype = {key: val for key, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: logger.info("%d future instrument will be handled", code_count) for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 logging.info("%s 新增数据 %d 条", table_name, data_count) # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) tot_data_count += data_count logging.info("%s 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_tushare_tmt_twincome(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_tmt_twincome' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm start_date, end_date FROM ( SELECT info.ts_code, ifnull(date, start_date) date_frm, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_tmt_twincome_info info LEFT OUTER JOIN (SELECT item, adddate(max(date),1) date FROM {table_name} GROUP BY item ) income ON info.ts_code = income.item ) tt order by ts_code""".format(table_name=table_name) else: sql_str = """SELECT ts_code, start_date , if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_tmt_twincome_info info """ logger.warning('%s 不存在,仅使用 tushare_tmt_twincome_info 表进行计算日期范围', table_name) # ts_code_set = None with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time,ts_code_set = None,None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic) logger.info('%d Taiwan TMT information will been import into tushare_tmt_twincome', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (start_date, end_date)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, start_date, end_date) data_df = invoke_tmt_twincome(item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS), end_date=datetime_2_str(end_date, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, start_date, date_to) if len(data_df) > 0 and data_df['date'] is not None: while try_2_date(data_df['date'].iloc[-1]) > try_2_date(start_date): last_date_in_df_last, last_date_in_df_cur = try_2_date(data_df['date'].iloc[-1]), None df2 = invoke_tmt_twincome(item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS), end_date=datetime_2_str(try_2_date(data_df['date'].iloc[-1]) - timedelta(days=1),STR_FORMAT_DATE_TS)) if len(df2) > 0 and df2['date'] is not None: last_date_in_df_cur = try_2_date(df2['date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, start_date, end_date) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, start_date, end_date) elif len(df2) <= 0: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)