def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_daily_md' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info LEFT OUTER JOIN (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily ON info.ts_code = daily.ts_code ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, base_date date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into tushare_stock_index_daily_md', data_len) # 将data_df数据,添加到data_df_list try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # data_df = df if len(data_df) > 0: while try_2_date(data_df['trade_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['trade_date'].iloc[-1]), None df2 = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['trade_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2 > 0): last_date_in_df_cur = try_2_date( df2['trade_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) else: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 5: break # 大于阀值有开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_df_all = data_df # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, # DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # data_df = [] finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_tushare_stock_fina_audit(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_audit' logging.info("更新 %s 开始", table_name) param_list = [ ('ts_code', String(20)), ('ann_date', Date), ('end_date', Date), ('audit_result', Text), ('audit_fees', DOUBLE), ('audit_agency', String(100)), ('audit_sign', String(100)), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, subdate(list_date,365*8)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) fina_audit ON info.ts_code = fina_audit.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code DESC """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} data_len = len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_fina_audit( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) data_df = df if len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_fina_audit( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_df_all = data_df data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count) # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_df_all = data_df data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_stock_balancesheet(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_balancesheet' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) balancesheet ON info.ts_code = balancesheet.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, list_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code DESC """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) # ts_code_set = None with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info( '%d stock balancesheets will been import into tushare_stock_balancesheet', data_len) # 将data_df数据,添加到data_df_list cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_balancesheet( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) if data_df is not None and data_df.shape[0] > 0 and data_df[ 'ann_date'].iloc[-1] is not None: last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1]) while try_2_date(data_df['ann_date'].iloc[-1]) > date_from: df2 = invoke_balancesheet( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if df2 is None: break if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None or data_df.shape[0] == 0: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.debug('%d/%d), %d 条 %s 资产负债表被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # 把数据攒起来 if data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_BALABCESHEET) logger.info('%d 条资产负债表数据被插入 %s 表', data_count, table_name) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, # DTYPE_TUSHARE_STOCK_BALABCESHEET) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # data_df = [] # 仅调试使用 cycles = cycles + 1 if DEBUG and cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_BALABCESHEET, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条资产负债表信息被更新", table_name, all_data_count)
def import_tushare_stock_top10_holders(ts_code_set=None,chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_top10_holders' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, ifnull(end_date, subdate(list_date,365*10)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) end_date FROM {table_name} GROUP BY ts_code) top10_holders ON info.ts_code = top10_holders.ts_code ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_top10_holders(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) if len(data_df) > 0 and data_df['ann_date'].iloc[-1] is not None: last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1]) while last_date_in_df_last > date_from: df2 = invoke_top10_holders(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(last_date_in_df_last - timedelta(days=1),STR_FORMAT_DATE_TS)) if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None: last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1]) if last_date_in_df_cur != last_date_in_df_last: data_df = pd.concat([data_df, df2]) last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1]) elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) > 0 and df2['ann_date'].iloc[-1] is None: last_date_in_df_cur = try_2_date(df2['end_date'].iloc[-1]) if last_date_in_df_cur != last_date_in_df_last: data_df = pd.concat([data_df, df2]) last_date_in_df_last = try_2_date(data_df['end_date'].iloc[-1]) elif last_date_in_df_cur == last_date_in_df_last: break else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from,date_to) elif data_df is not None: logger.info('整体进度:%d/%d), %d 条 %s 前10股东被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code,date_from, date_to) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 500 and len(data_df_list)>0 : data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_TOP10_HOLDERS) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 25: break finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,DTYPE_TUSHARE_STOCK_TOP10_HOLDERS) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_indicator(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_indicator' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) fina_indicator ON info.ts_code = fina_indicator.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, list_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} fields = 'ts_code', 'ann_date', 'end_date', 'eps', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', 'surplus_rese_ps', \ 'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', 'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', \ 'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', 'assets_turn', 'op_income', 'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', \ 'fcfe', 'current_exint', 'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', 'working_capital', 'networking_capital', 'invest_capital', \ 'retained_earnings', 'diluted2_eps', 'bps', 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', \ 'cogs_of_sales', 'expense_of_sales', 'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', 'impai_ttm', 'gc_of_gr', 'op_of_gr', \ 'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'npta', 'roic', 'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt', \ 'n_op_profit_of_ebt', 'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', 'capitalized_to_da', 'debt_to_assets', \ 'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt', \ 'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt', 'eqt_to_interestdebt', 'tangibleasset_to_debt', 'tangasset_to_intdebt', \ 'tangibleasset_to_netdebt', 'ocf_to_debt', 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', 'ebitda_to_debt', \ 'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', 'non_op_profit', 'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', \ 'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly', 'total_fa_trun', 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', \ 'q_eps', 'q_netprofit_margin', 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 'q_adminexp_to_gr', 'q_finaexp_to_gr', \ 'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_npta', 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', \ 'q_salescash_to_or', 'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', 'netprofit_yoy', 'dt_netprofit_yoy', \ 'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', \ 'q_profit_yoy', 'q_profit_qoq', 'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp' data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic) logger.info('%d 财务指标信息将被插入 tushare_stock_fin_indicator 表', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_fina_indicator(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS), fields=fields) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) if len(data_df) > 0 and data_df['ann_date'].iloc[-1] is not None: while try_2_date(data_df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1]) df2 = invoke_fina_indicator(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['ann_date'].iloc[-1]) - timedelta(days=1),STR_FORMAT_DATE_TS), fields=fields) if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None: last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info('整体进度:%d/%d), %d 条 %s 财务指标已提取,起止时间 %s 和 %s', num, data_len, data_df.shape[0], ts_code,date_from, date_to) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md,DTYPE_STOCK_FINA_INDICATOR) logger.info('%d 条财务指标将数据插入 %s 表', data_count,table_name) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_STOCK_FINA_INDICATOR) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_indicator(ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_indicator' logging.info("更新 %s 开始", table_name) param_list = [ ('ts_code', String(20)), ('ann_date', Date), ('end_date', Date), ('eps', DOUBLE), ('dt_eps', DOUBLE), ('total_revenue_ps', DOUBLE), ('revenue_ps', DOUBLE), ('capital_rese_ps', DOUBLE), ('surplus_rese_ps', DOUBLE), ('undist_profit_ps', DOUBLE), ('extra_item', DOUBLE), ('profit_dedt', DOUBLE), ('gross_margin', DOUBLE), ('current_ratio', DOUBLE), ('quick_ratio', DOUBLE), ('cash_ratio', DOUBLE), ('invturn_days', DOUBLE), ('arturn_days', DOUBLE), ('inv_turn', DOUBLE), ('ar_turn', DOUBLE), ('ca_turn', DOUBLE), ('fa_turn', DOUBLE), ('assets_turn', DOUBLE), ('op_income', DOUBLE), ('valuechange_income', DOUBLE), ('interst_income', DOUBLE), ('daa', DOUBLE), ('ebit', DOUBLE), ('ebitda', DOUBLE), ('fcff', DOUBLE), ('fcfe', DOUBLE), ('current_exint', DOUBLE), ('noncurrent_exint', DOUBLE), ('interestdebt', DOUBLE), ('netdebt', DOUBLE), ('tangible_asset', DOUBLE), ('working_capital', DOUBLE), ('networking_capital', DOUBLE), ('invest_capital', DOUBLE), ('retained_earnings', DOUBLE), ('diluted2_eps', DOUBLE), ('bps', DOUBLE), ('ocfps', DOUBLE), ('retainedps', DOUBLE), ('cfps', DOUBLE), ('ebit_ps', DOUBLE), ('fcff_ps', DOUBLE), ('fcfe_ps', DOUBLE), ('netprofit_margin', DOUBLE), ('grossprofit_margin', DOUBLE), ('cogs_of_sales', DOUBLE), ('expense_of_sales', DOUBLE), ('profit_to_gr', DOUBLE), ('saleexp_to_gr', DOUBLE), ('adminexp_of_gr', DOUBLE), ('finaexp_of_gr', DOUBLE), ('impai_ttm', DOUBLE), ('gc_of_gr', DOUBLE), ('op_of_gr', DOUBLE), ('ebit_of_gr', DOUBLE), ('roe', DOUBLE), ('roe_waa', DOUBLE), ('roe_dt', DOUBLE), ('roa', DOUBLE), ('npta', DOUBLE), ('roic', DOUBLE), ('roe_yearly', DOUBLE), ('roa2_yearly', DOUBLE), ('roe_avg', DOUBLE), ('opincome_of_ebt', DOUBLE), ('investincome_of_ebt', DOUBLE), ('n_op_profit_of_ebt', DOUBLE), ('tax_to_ebt', DOUBLE), ('dtprofit_to_profit', DOUBLE), ('salescash_to_or', DOUBLE), ('ocf_to_or', DOUBLE), ('ocf_to_opincome', DOUBLE), ('capitalized_to_da', DOUBLE), ('debt_to_assets', DOUBLE), ('assets_to_eqt', DOUBLE), ('dp_assets_to_eqt', DOUBLE), ('ca_to_assets', DOUBLE), ('nca_to_assets', DOUBLE), ('tbassets_to_totalassets', DOUBLE), ('int_to_talcap', DOUBLE), ('eqt_to_talcapital', DOUBLE), ('currentdebt_to_debt', DOUBLE), ('longdeb_to_debt', DOUBLE), ('ocf_to_shortdebt', DOUBLE), ('debt_to_eqt', DOUBLE), ('eqt_to_debt', DOUBLE), ('eqt_to_interestdebt', DOUBLE), ('tangibleasset_to_debt', DOUBLE), ('tangasset_to_intdebt', DOUBLE), ('tangibleasset_to_netdebt', DOUBLE), ('ocf_to_debt', DOUBLE), ('ocf_to_interestdebt', DOUBLE), ('ocf_to_netdebt', DOUBLE), ('ebit_to_interest', DOUBLE), ('longdebt_to_workingcapital', DOUBLE), ('ebitda_to_debt', DOUBLE), ('turn_days', DOUBLE), ('roa_yearly', DOUBLE), ('roa_dp', DOUBLE), ('fixed_assets', DOUBLE), ('profit_prefin_exp', DOUBLE), ('non_op_profit', DOUBLE), ('op_to_ebt', DOUBLE), ('nop_to_ebt', DOUBLE), ('ocf_to_profit', DOUBLE), ('cash_to_liqdebt', DOUBLE), ('cash_to_liqdebt_withinterest', DOUBLE), ('op_to_liqdebt', DOUBLE), ('op_to_debt', DOUBLE), ('roic_yearly', DOUBLE), ('total_fa_trun', DOUBLE), ('profit_to_op', DOUBLE), ('q_opincome', DOUBLE), ('q_investincome', DOUBLE), ('q_dtprofit', DOUBLE), ('q_eps', DOUBLE), ('q_netprofit_margin', DOUBLE), ('q_gsprofit_margin', DOUBLE), ('q_exp_to_sales', DOUBLE), ('q_profit_to_gr', DOUBLE), ('q_saleexp_to_gr', DOUBLE), ('q_adminexp_to_gr', DOUBLE), ('q_finaexp_to_gr', DOUBLE), ('q_impair_to_gr_ttm', DOUBLE), ('q_gc_to_gr', DOUBLE), ('q_op_to_gr', DOUBLE), ('q_roe', DOUBLE), ('q_dt_roe', DOUBLE), ('q_npta', DOUBLE), ('q_opincome_to_ebt', DOUBLE), ('q_investincome_to_ebt', DOUBLE), ('q_dtprofit_to_profit', DOUBLE), ('q_salescash_to_or', DOUBLE), ('q_ocf_to_sales', DOUBLE), ('q_ocf_to_or', DOUBLE), ('basic_eps_yoy', DOUBLE), ('dt_eps_yoy', DOUBLE), ('cfps_yoy', DOUBLE), ('op_yoy', DOUBLE), ('ebt_yoy', DOUBLE), ('netprofit_yoy', DOUBLE), ('dt_netprofit_yoy', DOUBLE), ('ocf_yoy', DOUBLE), ('roe_yoy', DOUBLE), ('bps_yoy', DOUBLE), ('assets_yoy', DOUBLE), ('eqt_yoy', DOUBLE), ('tr_yoy', DOUBLE), ('or_yoy', DOUBLE), ('q_gr_yoy', DOUBLE), ('q_gr_qoq', DOUBLE), ('q_sales_yoy', DOUBLE), ('q_sales_qoq', DOUBLE), ('q_op_yoy', DOUBLE), ('q_op_qoq', DOUBLE), ('q_profit_yoy', DOUBLE), ('q_profit_qoq', DOUBLE), ('q_netprofit_yoy', DOUBLE), ('q_netprofit_qoq', DOUBLE), ('equity_yoy', DOUBLE), ('rd_exp', DOUBLE), ] sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info""" logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} # dtype['ts_code'] = String(20) # dtype['trade_date'] = Date fields = 'ts_code', 'ann_date', 'end_date', 'eps', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', \ 'surplus_rese_ps', 'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', \ 'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', 'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', \ 'assets_turn', 'op_income', 'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', \ 'fcfe', 'current_exint', 'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', \ 'working_capital', 'networking_capital', 'invest_capital', 'retained_earnings', 'diluted2_eps', 'bps', \ 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', \ 'cogs_of_sales', 'expense_of_sales', 'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', \ 'impai_ttm', 'gc_of_gr', 'op_of_gr', 'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'npta', 'roic', \ 'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt', 'n_op_profit_of_ebt', \ 'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', \ 'capitalized_to_da', 'debt_to_assets', 'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', \ 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt', \ 'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt', 'eqt_to_interestdebt', \ 'tangibleasset_to_debt', 'tangasset_to_intdebt', 'tangibleasset_to_netdebt', 'ocf_to_debt', \ 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', \ 'ebitda_to_debt', 'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', \ 'non_op_profit', 'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', \ 'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly', 'total_fa_trun', \ 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', 'q_eps', 'q_netprofit_margin', \ 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 'q_adminexp_to_gr', \ 'q_finaexp_to_gr', 'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_npta', \ 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', 'q_salescash_to_or', \ 'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', \ 'netprofit_yoy', 'dt_netprofit_yoy', 'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', \ 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', \ 'q_profit_yoy', 'q_profit_qoq', 'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp' data_len = len(code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_fina_indicator( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS), fields=fields) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_fina_indicator( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS), fields=fields) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) data_df = [] # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date', 'end_date'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_stock_cashflow(ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_cashflow' logging.info("更新 %s 开始", table_name) sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info;""" logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} data_len = len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_cashflow(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date(df['ann_date'].iloc[-1]), None df2 = invoke_cashflow(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_CASHFLOW) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) data_df = [] # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_CASHFLOW) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_stock_balancesheet(ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_balancesheet' logging.info("更新 %s 开始", table_name) param_list = [ ('ts_code', String(20)), ('ann_date', Date), ('f_ann_date', Date), ('end_date', Date), ('report_type', DOUBLE), ('comp_type', DOUBLE), ('total_share', DOUBLE), ('cap_rese', DOUBLE), ('undistr_porfit', DOUBLE), ('surplus_rese', DOUBLE), ('special_rese', DOUBLE), ('money_cap', DOUBLE), ('trad_asset', DOUBLE), ('notes_receiv', DOUBLE), ('accounts_receiv', DOUBLE), ('oth_receiv', DOUBLE), ('prepayment', DOUBLE), ('div_receiv', DOUBLE), ('int_receiv', DOUBLE), ('inventories', DOUBLE), ('amor_exp', DOUBLE), ('nca_within_1y', DOUBLE), ('sett_rsrv', DOUBLE), ('loanto_oth_bank_fi', DOUBLE), ('premium_receiv', DOUBLE), ('reinsur_receiv', DOUBLE), ('reinsur_res_receiv', DOUBLE), ('pur_resale_fa', DOUBLE), ('oth_cur_assets', DOUBLE), ('total_cur_assets', DOUBLE), ('fa_avail_for_sale', DOUBLE), ('htm_invest', DOUBLE), ('lt_eqt_invest', DOUBLE), ('invest_real_estate', DOUBLE), ('time_deposits', DOUBLE), ('oth_assets', DOUBLE), ('lt_rec', DOUBLE), ('fix_assets', DOUBLE), ('cip', DOUBLE), ('const_materials', DOUBLE), ('fixed_assets_disp', DOUBLE), ('produc_bio_assets', DOUBLE), ('oil_and_gas_assets', DOUBLE), ('intan_assets', DOUBLE), ('r_and_d', DOUBLE), ('goodwill', DOUBLE), ('lt_amor_exp', DOUBLE), ('defer_tax_assets', DOUBLE), ('decr_in_disbur', DOUBLE), ('oth_nca', DOUBLE), ('total_nca', DOUBLE), ('cash_reser_cb', DOUBLE), ('depos_in_oth_bfi', DOUBLE), ('prec_metals', DOUBLE), ('deriv_assets', DOUBLE), ('rr_reins_une_prem', DOUBLE), ('rr_reins_outstd_cla', DOUBLE), ('rr_reins_lins_liab', DOUBLE), ('rr_reins_lthins_liab', DOUBLE), ('refund_depos', DOUBLE), ('ph_pledge_loans', DOUBLE), ('refund_cap_depos', DOUBLE), ('indep_acct_assets', DOUBLE), ('client_depos', DOUBLE), ('client_prov', DOUBLE), ('transac_seat_fee', DOUBLE), ('invest_as_receiv', DOUBLE), ('total_assets', DOUBLE), ('lt_borr', DOUBLE), ('st_borr', DOUBLE), ('cb_borr', DOUBLE), ('depos_ib_deposits', DOUBLE), ('loan_oth_bank', DOUBLE), ('trading_fl', DOUBLE), ('notes_payable', DOUBLE), ('acct_payable', DOUBLE), ('adv_receipts', DOUBLE), ('sold_for_repur_fa', DOUBLE), ('comm_payable', DOUBLE), ('payroll_payable', DOUBLE), ('taxes_payable', DOUBLE), ('int_payable', DOUBLE), ('oth_payable', DOUBLE), ('acc_exp', DOUBLE), ('deferred_inc', DOUBLE), ('st_bonds_payable', DOUBLE), ('payable_to_reinsurer', DOUBLE), ('rsrv_insur_cont', DOUBLE), ('acting_trading_sec', DOUBLE), ('acting_uw_sec', DOUBLE), ('non_cur_liab_due_1y', DOUBLE), ('oth_cur_liab', DOUBLE), ('total_cur_liab', DOUBLE), ('bond_payable', DOUBLE), ('lt_payable', DOUBLE), ('specific_payables', DOUBLE), ('estimated_liab', DOUBLE), ('defer_tax_liab', DOUBLE), ('defer_inc_non_cur_liab', DOUBLE), ('oth_ncl', DOUBLE), ('total_ncl', DOUBLE), ('depos_oth_bfi', DOUBLE), ('deriv_liab', DOUBLE), ('depos', DOUBLE), ('agency_bus_liab', DOUBLE), ('oth_liab', DOUBLE), ('prem_receiv_adva', DOUBLE), ('depos_received', DOUBLE), ('ph_invest', DOUBLE), ('reser_une_prem', DOUBLE), ('reser_outstd_claims', DOUBLE), ('reser_lins_liab', DOUBLE), ('reser_lthins_liab', DOUBLE), ('indept_acc_liab', DOUBLE), ('pledge_borr', DOUBLE), ('indem_payable', DOUBLE), ('policy_div_payable', DOUBLE), ('total_liab', DOUBLE), ('treasury_share', DOUBLE), ('ordin_risk_reser', DOUBLE), ('forex_differ', DOUBLE), ('invest_loss_unconf', DOUBLE), ('minority_int', DOUBLE), ('total_hldr_eqy_exc_min_int', DOUBLE), ('total_hldr_eqy_inc_min_int', DOUBLE), ('total_liab_hldr_eqy', DOUBLE), ('lt_payroll_payable', DOUBLE), ('oth_comp_income', DOUBLE), ('oth_eqt_tools', DOUBLE), ('oth_eqt_tools_p_shr', DOUBLE), ('lending_funds', DOUBLE), ('acc_receivable', DOUBLE), ('st_fin_payable', DOUBLE), ('payables', DOUBLE), ('hfs_assets', DOUBLE), ('hfs_sales', DOUBLE), ] # 进行表格判断,确定是否含有tushare_stock_daily sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info""" logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name) # ts_code_set = None with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} # dtype['ts_code'] = String(20) # dtype['trade_date'] = Date data_len = len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_balancesheet( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_balancesheet( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) data_df = [] # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_stock_top10_floatholders(chain_param=None,ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_top10_floatholders' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, ifnull(end_date, subdate(list_date,365*10)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) end_date FROM {table_name} GROUP BY ts_code) top10_floatholders ON info.ts_code = top10_floatholders.ts_code ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} data_len = len(code_date_range_dic) logger.info('%d stocks will been import into top10_floatholders', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_top10_floatholders(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date(df['ann_date'].iloc[-1]), None df2 = invoke_top10_floatholders(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_TOP10_FLOATHOLDERS) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 5: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_TOP10_FLOATHOLDERS) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_stock_income(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_income' logging.info("更新 %s 开始", table_name) # wind_indictor_str = ",".join([key for key, _ in param_list]) # rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, subdate(list_date,365*10)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) income ON info.ts_code = income.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code DESC """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_income(ts_code=ts_code, start_date=datetime_2_str( date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) data_df = df if len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_income( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info('整体进度:%d/%d), %d 条 %s 的利润表数据被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # # 数据插入数据库 # data_df_all = data_df # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, # DTYPE_TUSHARE_STOCK_INCOME) # logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INCOME) logger.info('%d 条财务指标将数据插入 %s 表', data_count, table_name) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INCOME) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_mainbz(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_mainbz' logging.info("更新 %s 开始", table_name) # param_list = [ # ('ts_code', String(20)), # ('end_date', Date), # ('bz_item', String(200)), # ('bz_sales', DOUBLE), # ('bz_profit', DOUBLE), # ('bz_cost', DOUBLE), # ('curr_type', String(20)), # ('update_flag', String(20)), # ('market_type', String(20)), # ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, ifnull(end_date, subdate(list_date,365*10)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(end_date),1) end_date FROM {table_name} GROUP BY ts_code) mainbz ON info.ts_code = mainbz.ts_code ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # 设置 dtype # dtype = {key: val for key, val in param_list} # dtype['ts_code'] = String(20) # dtype['trade_date'] = Date data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): for mainbz_type in list(['P', 'D']): logger.debug('%d/%d) %s [%s - %s] %s', num, data_len, ts_code, date_from, date_to, mainbz_type) data_df = invoke_fina_mainbz( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS), type=mainbz_type) data_df['market_type'] = mainbz_type # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) # data_df = df if len(data_df) > 0: while try_2_date(data_df['end_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['end_date'].iloc[-1]), None df2 = invoke_fina_mainbz( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['end_date'].iloc[-1]), STR_FORMAT_DATE_TS), type=mainbz_type) df2['market_type'] = mainbz_type if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['end_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur <= last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None: logger.warning('%d/%d) %s 在 %s 到 %s 这段时间如数据', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info( '整体进度:%d/%d), 提取出%d 条 %s 的主营业务数据,类型为%s,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, mainbz_type, date_from, date_to) # # 数据插入数据库 # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 100 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_FINA_MAINBZ, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 2: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_FINA_MAINBZ, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) all_data_count = all_data_count + data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_tushare_tmt_twincome(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_tmt_twincome' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm start_date, end_date FROM ( SELECT info.ts_code, ifnull(date, start_date) date_frm, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_tmt_twincome_info info LEFT OUTER JOIN (SELECT item, adddate(max(date),1) date FROM {table_name} GROUP BY item ) income ON info.ts_code = income.item ) tt order by ts_code""".format(table_name=table_name) else: sql_str = """SELECT ts_code, start_date , if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_tmt_twincome_info info """ logger.warning('%s 不存在,仅使用 tushare_tmt_twincome_info 表进行计算日期范围', table_name) # ts_code_set = None with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time, ts_code_set = None, None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info( '%d Taiwan TMT information will been import into tushare_tmt_twincome', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (start_date, end_date)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, start_date, end_date) data_df = invoke_tmt_twincome( item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS), end_date=datetime_2_str(end_date, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, start_date, date_to) if len(data_df) > 0 and data_df['date'] is not None: while try_2_date( data_df['date'].iloc[-1]) > try_2_date(start_date): last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['date'].iloc[-1]), None df2 = invoke_tmt_twincome( item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0 and df2['date'] is not None: last_date_in_df_cur = try_2_date(df2['date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, start_date, end_date) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, start_date, end_date) elif len(df2) <= 0: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)