def import_jq_trade_date(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ logger.info("更新 %s 开始", TABLE_NAME) # 判断表是否已经存在 has_table = engine_md.has_table(TABLE_NAME) if has_table: trade_day_max = execute_scalar(engine_md, f'SELECT max(trade_date) FROM {TABLE_NAME}') trade_date_list = get_trade_days(start_date=(trade_day_max + timedelta(days=1)), end_date=(date.today() + timedelta(days=366))) else: trade_date_list = get_all_trade_days() date_count = len(trade_date_list) if date_count == 0: logger.info("没有更多的交易日数据可被导入") return logger.info("%d 条交易日数据将被导入", date_count) trade_date_df = pd.DataFrame({'trade_date': trade_date_list}) bunch_insert_on_duplicate_update(trade_date_df, TABLE_NAME, engine_md, dtype={'trade_date': Date}, myisam_if_create_table=True, primary_keys=['trade_date'], schema=config.DB_SCHEMA_MD) logger.info('%d 条交易日数据导入 %s 完成', date_count, TABLE_NAME)
def fresh_tushare_stock_fund_holdings(year, quarter): table_name = 'tushare_stock_fund_holdings' logging.info("更新 %s 表%s年%s季度基金持股信息开始", table_name, year, quarter) has_table = engine_md.has_table(table_name) tushare_fund_holdings_indicator_param_list = [ ('ts_code', String(20)), ('sec_name', String(20)), ('end_date', Date), ('nums', DOUBLE), ('nlast', DOUBLE), ('count', DOUBLE), ('clast', DOUBLE), ('amount', DOUBLE), ('ratio', DOUBLE), ] tushare_fund_holdings_dtype = {key: val for key, val in tushare_fund_holdings_indicator_param_list} data_df_list, data_count, all_data_count, = [], 0, 0 data_df = invoke_fund_holdings(year, quarter) ts_code_list = [] for i in data_df.code: if i[0] == '6': sh = i + '.SH' ts_code_list.append(sh) else: sz = i + '.SZ' ts_code_list.append(sz) data_df.code = ts_code_list data_df = data_df.rename(columns={'code': 'ts_code', 'name': 'sec_name', 'date': 'end_date'}) bunch_insert_on_duplicate_update(data_df, table_name, engine_md, tushare_fund_holdings_dtype) logging.info("%s年%s季度 %s 更新 %d 条基金持股信息", year, quarter, table_name, all_data_count)
def insert_into_db(data_df_list, engine_md): data_count = len(data_df_list) table_name = 'wind_stock_tick' has_table = engine_md.has_table(table_name) param_list = [ ('datetime', DateTime), ('open', DOUBLE), ('high', DOUBLE), ('low', DOUBLE), ('close', DOUBLE), ('ask1', DOUBLE), ('bid1', DOUBLE), ('asize1', DOUBLE), ('bsize1', DOUBLE), ('volume', DOUBLE), ('amount', DOUBLE), ('preclose', DOUBLE), ] dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) if data_count > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('datetime', inplace=True) data_df_all.reset_index(inplace=True) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype) logger.info('%d data imported', data_df_all.shape[0]) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) return data_count
def import_tushare_stock_pledge_stat(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_pledge_stat' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily sql_str = """SELECT ts_code FROM tushare_stock_info """ logger.warning('使用 tushare_stock_info 表确认需要提取股票质押数据的范围') with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) ts_code_list = list(row[0] for row in table.fetchall()) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( ts_code_list) logger.info('%d 只股票的质押信息将被插入 tushare_stock_pledge_stat 表', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for ts_code in ts_code_list: data_df = invoke_pledge_stat(ts_code=ts_code) logger.warning('提取 %s 质押信息 %d 条', ts_code, len(data_df)) # 把数据攒起来 if data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_PLEDGE_STAT) logger.warning('更新股票质押信息 %d 条', data_count) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 2: break finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_PLEDGE_STAT) all_data_count = all_data_count + data_count logging.info("更新 %s 结束,总共 %d 条信息被更新", table_name, all_data_count)
def import_trade_date(): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表 日后将会考虑将两张表进行合并 :return: """ table_name = 'wind_trade_date' has_table = engine_md.has_table(table_name) if has_table: with with_db_session(engine_md) as session: try: table = session.execute('SELECT exch_code,max(trade_date) FROM {table_name} GROUP BY exch_code'.format( table_name=table_name)) exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()} except Exception: logger.exception("交易日获取异常") else: exch_code_trade_date_dic = {} exchange_code_dict = { "HKEX": "香港", "NYSE": "纽约", "SZSE": "深圳", "TWSE": "台湾", "NASDAQ": "纳斯达克", "AMEX": "美国证券交易所", "TSE": "东京", "LSE": "伦敦", "SGX": "新加坡" } exchange_code_list = list(exchange_code_dict.keys()) for exchange_code in exchange_code_list: if exchange_code in exch_code_trade_date_dic: trade_date_max = exch_code_trade_date_dic[exchange_code] trade_date_start = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE) else: trade_date_start = '1980-01-01' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE) if exchange_code is None or exchange_code == "": trade_date_list = invoker.tdays(trade_date_start, end_date_str) else: trade_date_list = invoker.tdays(trade_date_start, end_date_str, "TradingCalendar=%s" % exchange_code) if trade_date_list is None: logger.warning("没有查询到交易日期") date_count = len(trade_date_list) if date_count > 0: logger.info("%d 条交易日数据将被导入", date_count) trade_date_df = pd.DataFrame({'trade_date': trade_date_list}) trade_date_df['exch_code'] = exchange_code bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={'trade_date': Date, 'exch_code': String(20)}, myisam_if_create_table=True, primary_keys=['trade_date', 'exch_code'], schema=config.DB_SCHEMA_MD) logger.info('%s %d 条交易日数据导入 %s 完成', exchange_code, date_count, table_name)
def import_tushare_stock_fund_holdings(): table_name = 'tushare_stock_fund_holdings' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) tushare_fund_holdings_indicator_param_list = [ ('ts_code', String(20)), ('sec_name', String(20)), ('end_date', Date), ('nums', DOUBLE), ('nlast', DOUBLE), ('count', DOUBLE), ('clast', DOUBLE), ('amount', DOUBLE), ('ratio', DOUBLE), ] tushare_fund_holdings_dtype = {key: val for key, val in tushare_fund_holdings_indicator_param_list} data_df_list, data_count, all_data_count, = [], 0, 0 years = list(range(2013, 2019)) try: for year in years: for quarter in list([1, 2, 3, 4]): print((year, quarter)) data_df = invoke_fund_holdings(year, quarter) ts_code_list = [] for i in data_df.code: if i[0] == '6': sh = i + '.SH' ts_code_list.append(sh) else: sz = i + '.SZ' ts_code_list.append(sz) data_df.code = ts_code_list data_df = data_df.rename(columns={'code': 'ts_code', 'name': 'sec_name', 'date': 'end_date'}) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 50: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, tushare_fund_holdings_dtype) all_data_count += data_count data_df_list, data_count = [], 0 finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, tushare_fund_holdings_dtype) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_tushare_fund_portfolio(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fund_portfolio' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # if has_table is True: # sql_str="""""" # else: sql_str = """SELECT ts_code FROM md_integration.tushare_fund_basic""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) code_list = list(row[0] for row in table.fetchall()) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_list) try: for num, (ts_code) in enumerate(code_list, start=1): data_df = invoke_fund_portfolio(ts_code=ts_code) logging.info(" 提取%s 基金行情信息%d 条,整体完成%d/%d", ts_code, len(data_df), num, data_len) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 20000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUND_PORTFOLIO) logging.info(" %s 表 %d 条净值信息被更新", table_name, data_count) all_data_count += data_count data_df_list, data_count = [], 0 finally: #插入剩余数据 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUND_PORTFOLIO) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_coin_info(): """获取全球交易币基本信息""" table_name = 'tushare_coin_info' has_table = engine_md.has_table(table_name) # 设置 dtype dtype = { 'coin': String(60), 'en_name': String(60), 'cn_name': String(60), 'issue_date': Date, 'amount': DOUBLE, } coinlist_df = pro.coinlist(start_date='20170101', end_date=date_2_str(date.today(), DATE_FORMAT_STR)) data_count = bunch_insert_on_duplicate_update(coinlist_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 新增数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `coin` `coin` VARCHAR(60) NOT NULL FIRST, CHANGE COLUMN `en_name` `en_name` VARCHAR(60) NOT NULL AFTER `coin`, ADD PRIMARY KEY (`coin`, `en_name`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_tushare_fund_company(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fund_company' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily data_df = pro.fund_company() for i in range(len(data_df.setup_date)): if data_df.setup_date[i] is not None and len( data_df.setup_date[i]) != 8: data_df.setup_date[i] = np.nan for i in range(len(data_df.end_date)): if data_df.end_date[i] is not None and len(data_df.end_date[i]) != 8: data_df.end_date[i] = np.nan # data_df=data_df[data_df['shortname'].apply(lambda x:x is not None)] if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表 %d 条公募基金公司信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新")
def import_tushare_stock_info(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 """ table_name = 'tushare_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) wind_indicator_param_list = [ ('ts_code', String(20)), ('symbol', DOUBLE), ('list_date', Date), ('delist_date', Date), ('name', String(30)), ('fullname', String(100)), ('enname', String(200)), ('exchange_id', String(30)), ('list_status', String(10)), ('is_hs', String(10)), ] # # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in wind_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in wind_indicator_param_list} dtype['ts_code'] = String(20) # 数据提取 stock_info_all_df = pro.stock_basic(exchange_id='', fields='ts_code,symbol,name,fullname,enname,exchange_id,curr_type,list_date,list_status,delist_date,is_hs') logging.info('%s stock data will be import', stock_info_all_df.shape[0]) data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def save(self): self.logger.info("更新 %s 开始", self.table_name) has_table = engine_md.has_table(self.table_name) # 判断表是否已经存在 if has_table: sql_str = f"""select max(pub_date) from {self.table_name}""" date_start = execute_scalar(engine_md, sql_str) self.logger.info('查询 %s 数据使用起始日期 %s', self.table_name, date_2_str(date_start)) else: date_start = self.BASE_DATE self.logger.warning('%s 不存在,使用基础日期 %s', self.table_name, date_2_str(date_start)) # 查询最新的 pub_date date_end = datetime.date.today() if date_start >= date_end: self.logger.info('%s %s 已经是最新数据,无需进一步获取', self.table_name, date_start) return data_count_tot = 0 try: for num, (df, date_from, date_to) in enumerate(self.get_df_iter(date_start, date_end, self.loop_step)): # logger.debug('%d) [%s ~ %s] 包含 %d 条数据', num, date_from, date_to, df.shape[0]) if df is not None and df.shape[0] > 0: data_count = bunch_insert_on_duplicate_update( df, self.table_name, engine_md, dtype=self.dtype, myisam_if_create_table=True, primary_keys=['id'], schema=config.DB_SCHEMA_MD) data_count_tot += data_count finally: # 导入数据库 logging.info("更新 %s 结束 %d 条信息被更新", self.table_name, data_count_tot)
def merge_tushare_daily(date_from=None): """ 合並 adj_factor,daily_basic,stock,suspend 表格數據 :param date_from: :return: """ table_name = 'tushare_stock_daily' has_table = engine_md.has_table(table_name) if date_from is None and has_table: sql_str = "select adddate(max(trade_date),1) from {table_name}".format(table_name=table_name) with with_db_session(engine_md) as session: date_from = date_2_str(session.execute(sql_str).scalar()) tushare_adj_factor_df = get_tushare_daily_df('tushare_stock_daily_adj_factor', date_from) tushare_daily_basic_df = get_tushare_daily_df('tushare_stock_daily_basic', date_from) tushare_stock_daily_md_df = get_tushare_daily_df('tushare_stock_daily_md', date_from) # tushare_stock_daily_suspend_df = get_tushare_daily_df('tushare_stock_daily_suspend', date_from) tushare_daily_two_form_df = pd.merge(tushare_adj_factor_df, tushare_daily_basic_df, how='outer', on=['ts_code', 'trade_date']) tushare_daily_df = pd.merge(tushare_daily_two_form_df, tushare_stock_daily_md_df, how='outer', on=['ts_code', 'trade_date']) # tushare_daily_df = pd.merge(tushare_daily_three_form_df, tushare_stock_daily_suspend_df, how='outer', # on=['ts_code']) # 设置 dtype dtype = {} for dic in [DTYPE_TUSHARE_SUSPEND, DTYPE_TUSHARE_DAILY_BASIC, DTYPE_TUSHARE_DAILY]: for key, val in dic.items(): dtype[key] = val tushare_daily_df["close"] = tushare_daily_df.apply(get_merge, axis=1) tushare_daily_df.drop(['close_x', 'close_y'], axis=1, inplace=True) data_count = bunch_insert_on_duplicate_update(tushare_daily_df, table_name, engine_md, dtype) logging.info('%s 更新 %d', table_name, data_count) # if not has_table and engine_md.has_table(table_name): # alter_table_2_myisam(engine_md, table_name) # build_primary_key([table_name]) return tushare_daily_df
def import_jq_stock_income(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ logger.info("更新 %s 开始", TABLE_NAME) has_table = engine_md.has_table(TABLE_NAME) # 判断表是否已经存在 if has_table: sql_str = f"""select max(pub_date) from {TABLE_NAME}""" date_start = execute_scalar(engine_md, sql_str) logger.info('查询 %s 数据使用起始日期 %s', TABLE_NAME, date_2_str(date_start)) else: date_start = BASE_DATE logger.warning('%s 不存在,使用基础日期 %s', TABLE_NAME, date_2_str(date_start)) # 查询最新的 pub_date date_end = datetime.date.today() if date_start >= date_end: logger.info('%s 已经是最新数据,无需进一步获取', date_start) return data_count_tot = 0 try: for num, (df, date_from, date_to) in enumerate(get_df_iter(date_start, date_end, LOOP_STEP)): # logger.debug('%d) [%s ~ %s] 包含 %d 条数据', num, date_from, date_to, df.shape[0]) data_count = bunch_insert_on_duplicate_update( df, TABLE_NAME, engine_md, dtype=DTYPE, myisam_if_create_table=True, primary_keys=['id'], schema=config.DB_SCHEMA_MD) data_count_tot += data_count finally: # 导入数据库 logging.info("更新 %s 结束 %d 条信息被更新", TABLE_NAME, data_count_tot)
def import_macroeconomy_info(chain_param=None): """ :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_macroeconomy_info' has_table = engine_md.has_table(table_name) indicators_dic = [ # 人民币汇率 ["M0067855", "us2rmb", "美元兑人民币即期汇率", "1994-01-04", None, '中国货币网'], ] dtype = { 'key': String(20), 'en_name': String(120), 'cn_name': String(120), 'begin_date': Date, 'end_date': Date, 'remark': Text, } name_list = [ 'key', 'en_name', 'cn_name', 'begin_date', 'end_date', 'remark' ] info_df = pd.DataFrame(data=indicators_dic, columns=name_list) data_count = bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype) logger.info('%d 条记录被更新', data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `key` `key` VARCHAR(20) NOT NULL FIRST, ADD PRIMARY KEY (`key`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `key` 主键设置完成', table_name)
def import_fut_basic(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_future_basic' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) exchange_list = ['DCE', 'CZCE', 'SHFE', 'CFFEX', 'INE'] try: for i in range(len(exchange_list)): exchange_name = exchange_list[i] data_df = invoke_fut_basic(exchange=exchange_name) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_FUTURE_BASIC, myisam_if_create_table=True) logging.info("更新 %s 期货合约基础信息结束, %d 条信息被更新", exchange_name, data_count) else: logging.info("无数据信息可被更新") finally: logger.info('%s 表 数据更新完成', table_name)
def import_tushare_index_basic(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_basic' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) fields='ts_code','name','fullname','market','publisher','index_type','category','base_date','base_point','list_date','weight_rule','desc','exp_date' market_list=list(['MSCI','CSI','SSE','SZSE','CICC','SW','CNI','OTH']) for mkt in market_list: # trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = invoke_index_basic(market=mkt, fields=fields) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_BASIC, myisam_if_create_table=True, primary_keys=['ts_code'], schema=config.DB_SCHEMA_MD ) logging.info("%s更新 %s 结束 %d 条信息被更新", mkt, table_name, data_count) else: logging.info("无数据信息可被更新")
def import_future_trade_cal(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_future_trade_cal' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) try: data_df = pro.trade_cal(exchange='DCE') if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_FUTURE_TRADE_CAL, myisam_if_create_table=True) logging.info("更新期货交易日历数据结束, %d 条信息被更新", data_count) else: logging.info("无数据信息可被更新") finally: logger.info('%s 表 数据更新完成', table_name)
def import_index_info(wind_codes, chain_param=None): """ 导入指数信息 :param wind_codes: :return: """ table_name = 'wind_index_info' has_table = engine_md.has_table(table_name) col_name_param_list = [ ('LAUNCHDATE', Date), ('BASEDATE', Date), ('BASEVALUE', DOUBLE), ('COUNTRY', String(20)), ('CRM_ISSUER', String(20)), ('SEC_NAME', String(20)), ] col_name_param = ",".join([key.lower() for key, _ in col_name_param_list]) col_name_param_dic = {col_name.upper(): col_name.lower() for col_name, _ in col_name_param_list} # 设置dtype类型 dtype = {key.lower(): val for key, val in col_name_param_list} dtype['wind_code'] = String(20) info_df = invoker.wss(wind_codes, col_name_param) if info_df is None or info_df.shape[0] == 0: logger.warning("没有数据可导入") return info_df.rename(columns=col_name_param_dic, inplace=True) info_df.index.rename("wind_code", inplace=True) info_df.reset_index(inplace=True) bunch_insert_on_duplicate_update(info_df, table_name, engine_md, dtype=dtype) # info_df.to_sql(table_name, engine_md, if_exists='append', index=True, # dtype={ # 'wind_code': String(20), # 'null': Date, # 'basedate': Date, # 'basevalue': DOUBLE, # 'country': String(20), # 'crm_issuer': String(20), # 'sec_name': String(20), # }) logger.info('%d 条指数信息导入成功\n%s', info_df.shape[0], info_df) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_tushare_adj_factor(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_adj_factor' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = pro.adj_factor(ts_code='', trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表 %s 日 %d 条信息被更新", table_name, trade_date, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `trade_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
def save_ifind_stock_hk_daily_his(table_name, data_df_list, dtype): """保存数据到 table_name""" if len(data_df_list) > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) # data_count = tot_data_df.shape[0] data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) # logger.info('保存数据到 %s 成功,包含 %d 条记录', table_name, data_count) return data_count else: return 0
def import_coin_info(chain_param=None, ): """插入基础信息数据到 cmc_coin_v1_info""" table_name = "cmc_coin_v1_info" logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # url = 'https://api.coinmarketcap.com/v2/listings/' # dtype = { # 'id': String(60), # 'name': String(60), # 'symbol': String(20), # 'website_slug': String(60), # } url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0' dtype = { 'id': String(60), 'name': String(60), 'symbol': String(20), 'rank': Integer, 'price_usd': DOUBLE, 'price_btc': DOUBLE, '24h_volume_usd': DOUBLE, 'market_cap_usd': DOUBLE, 'available_supply': DOUBLE, 'total_supply': DOUBLE, 'max_supply': DOUBLE, 'percent_change_1h': DOUBLE, 'percent_change_24h': DOUBLE, 'percent_change_7d': DOUBLE, 'last_updated': DATETIME, } rsp = requests.get(url) if rsp.status_code != 200: raise ValueError('请求 listings 相应失败') json = rsp.json() data_df = pd.DataFrame(json) data_df['last_updated'] = data_df['last_updated'].apply( lambda x: None if x is None else datetime.datetime.fromtimestamp(float(x))) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST , ADD PRIMARY KEY (`id`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str)
def import_index_info(chain_param=None, ths_code=None): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_index_info' has_table = engine_md.has_table(table_name) logging.info("更新 ifind_index_info 开始") if ths_code is None: # 获取全市场股票代码及名称 date_end = date.today() stock_code_set = set() stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub ths_code = ','.join(stock_code_set) indicator_param_list = [ ('ths_index_short_name_index', '', String(20)), ('ths_index_code_index', '', String(10)), ('ths_index_category_index', '', String(20)), ('ths_index_base_period_index', '', Date), ('ths_index_base_point_index', '', DOUBLE), ('ths_publish_org_index', '', String(20)), ] # indicator' = 'ths_index_short_name_index;ths_index_code_index;ths_thscode_index;ths_index_category_index; # ths_index_base_period_index;ths_index_base_point_index;ths_publish_org_index', # param = ';;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 index info 可以更新") return dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def save_ifind_stock_daily_his(data_df_list, dtype): """保存数据到 ifind_stock_daily_his""" if len(data_df_list) > 0: table_name = 'ifind_stock_daily_his' data_df_all = pd.concat(data_df_list) # TODO: 需要解决重复数据插入问题,日后改为sql语句插入模式 # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) # data_count = tot_data_df.shape[0] data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype) logger.info('保存数据到 %s 成功,包含 %d 条记录', table_name, data_count) return data_count else: return 0
def fund_nav_df_2_sql(table_name, fund_nav_df, engine_md, is_append=True): col_name_param_list = [ ('NAV_DATE', Date), ('NAV', DOUBLE), ('NAV_ACC', DOUBLE), ] col_name_dic = {col_name.upper(): col_name.lower() for col_name, _ in col_name_param_list} dtype = {col_name.lower(): val for col_name, val in col_name_param_list} dtype['wind_code'] = String(200) dtype['trade_date'] = Date # print('reorg dfnav data[%d, %d]' % fund_nav_df.shape) try: fund_nav_df['NAV_DATE'] = pd.to_datetime(fund_nav_df['NAV_DATE']).apply(lambda x: x.date()) except Exception as exp: logger.exception(str(fund_nav_df['NAV_DATE'])) return None trade_date_s = pd.to_datetime(fund_nav_df.index) trade_date_latest = trade_date_s.max().date() fund_nav_df['trade_date'] = trade_date_s fund_nav_df.rename(columns=col_name_dic, inplace=True) # fund_nav_df['trade_date'] = trade_date_s fund_nav_df.set_index(['wind_code', 'trade_date'], inplace=True) fund_nav_df.reset_index(inplace=True) # action_str = 'append' if is_append else 'replace' # print('df--> sql fundnav table if_exists="%s"' % action_str) bunch_insert_on_duplicate_update(fund_nav_df, table_name, engine_md, dtype=dtype) # fund_nav_df.to_sql(table_name, engine_md, if_exists=action_str, index_label=['wind_code', 'trade_date'], # dtype={ # 'wind_code': String(200), # 'nav_date': Date, # 'trade_date': Date, # }) # , index=False logger.info('%d data inserted', fund_nav_df.shape[0]) return trade_date_latest
def save_fund_info(fund_info_df, col_type): """ 将更新的fund_info 信息保存的 wind_fund_info 表中,进行数据清洗,到fund_info表 :param fund_info_df: :param get_df: :param mode: :return: """ # 设置col_name列表参数 col_name_param_list = [ ('SEC_NAME', String(200)), ('FUND_SETUPDATE', Date), ('FUND_MATURITYDATE', Date), ('FUND_MGRCOMP', String(200)), ('FUND_EXISTINGYEAR', String(200)), ('FUND_PTMYEAR', String(200)), ('FUND_TYPE', String(200)), ('FUND_FUNDMANAGER', String(200)), ] col_name_param_dic = {col_name.upper(): col_name.lower() for col_name, _ in col_name_param_list} dtype = {col_name.lower(): val for col_name, val in col_name_param_list} dtype['win_code'] = String(200) dtype['strategy_type'] = String(200) if fund_info_df.shape[0] == 0: return table_name = 'wind_fund_info' fund_info_df.rename(columns=col_name_param_dic, inplace=True) fund_info_df.index.names = ['wind_code'] fund_info_df.reset_index(inplace=True) fund_info_df.drop_duplicates(inplace=True) data_count = bunch_insert_on_duplicate_update(fund_info_df, table_name, engine_md, dtype=dtype) logging.info('%d funds inserted', data_count) # TODO: 对于存量数据,需要进行定期更新操作 # wind_fund_info 表中增量数据插入到 fund_info sql_str = """ INSERT INTO fund_info(wind_code, sec_name, strategy_type, fund_setupdate, fund_maturitydate, fund_mgrcomp, fund_existingyear, fund_ptmyear, fund_type, fund_fundmanager) SELECT wfi.wind_code, wfi.sec_name, wfi.strategy_type, wfi.fund_setupdate, wfi.fund_maturitydate, wfi.fund_mgrcomp, wfi.fund_existingyear, wfi.fund_ptmyear, wfi.fund_type, wfi.fund_fundmanager FROM wind_fund_info wfi LEFT OUTER JOIN fund_info fi ON wfi.wind_code=fi.wind_code WHERE fi.wind_code IS NULL""" with with_db_session(engine_md) as session: table = session.execute(sql_str) logging.info('new data was inserted into fund_info from wind_fund_info table') return fund_info_df
def import_trade_date(chain_param=None): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表 日后将会考虑将两张表进行合并 :return: """ table_name = "tushare_trade_date" exch_code_trade_date_dic = {} with with_db_session(engine_md) as session: try: table = session.execute('SELECT exchange,max(cal_date) FROM {table_name} GROUP BY exchange'.format( table_name=table_name )) exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()} except Exception as exp: logger.exception("交易日获取异常") exchange_code_dict = { "HKEX": "香港联合交易所", "SZSE": "深圳证券交易所", "SSE": "上海证券交易所", } exchange_code_list = list(exchange_code_dict.keys()) for exchange_code in exchange_code_list: if exchange_code in exch_code_trade_date_dic: trade_date_max = exch_code_trade_date_dic[exchange_code] start_date_str = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE_TS) else: start_date_str = '19900101' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE_TS) trade_date_df = pro.trade_cal(exchange_id='', start_date=start_date_str, end_date=end_date_str) if trade_date_df is None or trade_date_df.shape[0] == 0: logger.warning('%s[%s] [%s - %s] 没有查询到交易日期', exchange_code_dict[exchange_code], exchange_code, start_date_str, end_date_str) continue date_count = trade_date_df.shape[0] logger.info("%s[%s] %d 条交易日数据将被导入 %s", exchange_code_dict[exchange_code], exchange_code, date_count, table_name) date_count = bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={ 'exchange': String(10), 'cal_date': Date, 'is_open': DOUBLE, }, myisam_if_create_table=True, primary_keys=['exchange', 'cal_date'], schema=config.DB_SCHEMA_MD) logger.info('%s[%s] %d 条交易日数据导入 %s 完成', exchange_code_dict[exchange_code], exchange_code, date_count, table_name)
def import_tushare_namechange(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_namechange' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """select max(start_date) start_date FROM md_integration.tushare_stock_namechange""" else: sql_str = """select min(list_date) start_date FROM md_integration.tushare_stock_info""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) start_date = list(row[0] for row in table.fetchall()) start_date = datetime_2_str(start_date[0], STR_FORMAT_DATE_TS) end_date = datetime_2_str(date.today(), STR_FORMAT_DATE_TS) try: data_df = pro.namechange( start_date=start_date, end_date=end_date, fields='ts_code,name,start_date,end_date,change_reason') if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_NAMECHANGE) logging.info("更新 %s 结束 %d 条上市公司更名信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `start_date` `start_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `start_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `start_date` 主键设置完成', table_name)
def import_tushare_stock_info(chain_param=None, refresh=False): """ 获取全市场股票代码及名称 """ table_name = 'tushare_stock_info' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) tushare_indicator_param_list = [ ('ts_code', String(20)), ('symbol', String(20)), ('name', String(40)), ('area', String(100)), ('industry', String(200)), ('fullname', String(100)), ('enname', String(200)), ('market', String(100)), ('exchange', String(20)), ('curr_type', String(20)), ('list_status', String(20)), ('list_date', Date), ('delist_date', Date), ('is_hs', String(20)), ] # # 获取列属性名,以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng" param = ",".join([key for key, _ in tushare_indicator_param_list]) # 设置 dtype dtype = {key: val for key, val in tushare_indicator_param_list} dtype['ts_code'] = String(20) # 数据提取 stock_info_all_df = pro.stock_basic( exchange='', fields= 'ts_code,symbol,name,area,industry,fullname,enname,market,exchange,curr_type,list_status,list_date,delist_date,is_hs,is_hs,is_hs' ) logging.info('%s stock data will be import', stock_info_all_df.shape[0]) data_count = bunch_insert_on_duplicate_update(stock_info_all_df, table_name, engine_md, dtype=dtype, myisam_if_create_table=True, primary_keys=['ts_code'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
def import_variety_info(): """保存期货交易所品种信息,一次性数据导入,以后基本上不需要使用了""" table_name = 'ifind_future_variety_info' exchange_list = [ '上海期货交易所', '大连商品交易所', '郑州商品交易所', '中国金融期货交易所', '纽约商业期货交易所(NYMEX)', '纽约商品交易所(COMEX)', # '纽约期货交易所(NYBOT)', # 暂无数据 '芝加哥期货交易所(CBOT)', # '洲际交易所(ICE)', # 暂无数据 '伦敦金属交易所(LME)', '马来西亚衍生品交易所(BMD)', '新加坡证券交易所(SGX)', ] # 设置 dtype dtype = { 'exchange': String(20), 'ID': String(20), 'SECURITY_NAME': String(20), } data_df_list = [] data_count = len(exchange_list) try: for num, exchange in enumerate(exchange_list): logger.debug("%d/%d) %s 获取交易品种信息", num, data_count, exchange) data_df = invoker.THS_DataPool('variety', exchange, 'variety:Y,id:Y') data_df['exchange'] = exchange data_df_list.append(data_df) finally: if len(data_df_list) > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql('ifind_variety_info', engine_md, index=False, if_exists='append', dtype=dtype) tot_data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype=dtype) else: tot_data_count = 0 logger.info('保存交易所品种信息完成, %d条数据被保存', tot_data_count)
def import_tushare_fund_basic(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fund_basic' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily for market in list(['E', 'O']): data_df = pro.fund_basic(market=market) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表 %d 条基金信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新")