def stock_basic_crawler(): ts.set_token('7e33fd87cfa25664c9b20f637b9d75ce613aea3d57d7d41ba66cebcc') pro = ts.pro_api() fields = 'ts_code,symbol,name,fullname,enname,exchange_id,curr_type,list_date,list_status,delist_date,is_hs' data = pro.query('stock_basic', fields=fields) # index = ['ts_code', 'symbol'] insert_many('stock_basic', data, memo='更新股票列表')
def index_basic_crawler(): ts.set_token('7e33fd87cfa25664c9b20f637b9d75ce613aea3d57d7d41ba66cebcc') pro = ts.pro_api() fields = 'ts_code,name,fullname,market,publisher,index_type,category,' \ 'base_date,base_point,list_date,weight_rule,desc,exp_date' market_list = ['MSCI', 'CSI', 'SSE', 'SZSE', 'CICC', 'SW', 'CNI', 'OTH'] for market in market_list: data = pro.index_basic(market=market, fields=fields) insert_many('index_basic', data, memo='更新股票指数基本信息%s' % market)
def init_base_schedule(start=start1, end=end1): data = mysql_execute("select * from job_basic where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已设置初始任务") return data_s = mysql_search("select ts_code from stock_basic order by ts_code") update_requests = [] for data in data_s: doc = {'ts_code': data[0], 'start': start, 'end': end, 'daily': '0', 'adj_factor': '0', 'suspend': '0', 'daily_basic': '0', 'cal_hfq': '0', 'index': '0'} update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_basic', df, memo='设置基础任务')
def init_finance_job_base(start=start1, end=end1): data = mysql_execute("select * from job_finance_basic where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已经设置财务任务[%s-%s]%d" % (start, end, data)) return data_s = mysql_search("select ts_code from stock_basic order by ts_code") update_requests = [] for data in data_s: doc = {'ts_code': data[0], 'start': start, 'end': end, 'income': '0', 'balancesheet': '0', 'cashflow': '0', 'forecast': '0', 'express': '0', 'fina_indicator': '0', "fina_audit": '0', "fina_mainbz": '0'} update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_finance_basic', df, memo='设置财务数据基础任务')
def __daily_crawler_job(query_name, code_ls, fields= None): i = 0 l = len(code_ls) for code in code_ls: i += 1 logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name,query_name,code,i,l)) ts_code = code[0] start = code[1] end = code[2] data = None update_field = '' if query_name == 'daily': update_field = "daily" data = pro.query(query_name,ts_code=ts_code, start_date=start, end_date=end) elif query_name == 'adj_factor': update_field = "adj_factor" data = pro.query(query_name, ts_code=ts_code) elif query_name == 'suspend': update_field = "suspend" data = pro.query(query_name, ts_code=ts_code, fields="ts_code,suspend_date,resume_date,ann_date,suspend_reason,reason_type") elif query_name == 'daily_basic': update_field = "daily_basic" data = pro.query(query_name, ts_code=ts_code, start_date=start, end_date=end) else : logger.info("不在列表之内") return memo = "执行任务%s[%s]" % (query_name, code) result = insert_many(query_name, data, memo=memo) memo = "更新执行任务" + query_name + "[%s]状态" % code if result is not None and result > 0: u_sql = "update job_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % (update_field, ts_code, start, end) , memo=memo)
def __get_fina_indicator(query_name, code, start, end): start_date = datetime.strptime(start, '%Y%m%d') end_date = datetime.strptime(end, '%Y%m%d') date_list = [] memo = "%s子线程执行%s[%s-%s]任务[%d/%d]" + query_name.value while start_date < end_date: temp_date = start_date+timedelta(days=360*10) temp_date = end_date if temp_date > end_date else temp_date start_str = start_date.strftime('%Y%m%d') end_str = temp_date.strftime('%Y%m%d') date_list.append({'start':start_str, 'end': end_str}) start_date = temp_date all_l = 0 i = 0 l = len(date_list) for date in date_list: i += 1 memo = "%s子线程执行%s[%s-%s]任务[%d/%d]%s"\ %(threading.current_thread().name,code,date['start'], date['end'], i, l, query_name.value) logger.info(memo) data = pro.query(query_name.value, ts_code=code, start_date=date['start'], end_date=date['end']) res = insert_many('fina_indicator', data, memo=memo) memo = "更新执行任务" + query_name.value + "[%s][%s-%s]状态" % (code, date['start'],date['end']) all_l += res if res is not None else 0 if all_l>0: u_sql = "update job_finance_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % ('fina_indicator', code, start, end), memo=memo)
def __daily_crawler_job(query_name, code_ls, fields=None): i = 0 l = len(code_ls) for code in code_ls: i += 1 logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name, query_name, code, i, l)) ts_code = code[0] start = code[1] end = code[2] data = None update_field = '' if query_name == 'index_daily': update_field = "index_daily" data = pro.index_daily(ts_code=ts_code, start_date=start, end_date=end) elif query_name == 'index_weigth': update_field = "index_weigth" data = pro.index_weight(index_code=ts_code, start_date=start, end_date=end) else: logger.info("不在列表之内") return memo = "执行任务" + query_name result = insert_many(query_name, data, memo=memo) memo = "更新执行任务" + query_name + "[%s]状态" % code if result is not None and result > 0: u_sql = "update job_basic_index set `%s`=1 where `index_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % (update_field, ts_code, start, end), memo=memo)
def trade_cal_crawler(start_date='19901220', end_date='20190101', is_before7=False): ts.set_token('7e33fd87cfa25664c9b20f637b9d75ce613aea3d57d7d41ba66cebcc') pro = ts.pro_api() fields = 'exchange_id,cal_date,is_open,pretrade_date' if is_before7: now = datetime.now() before = now - timedelta(days=7) end_date = now.strftime('%Y%m%d') start_date = before.strftime('%Y%m%d') data = pro.query('trade_cal', start_date=start_date, end_date=end_date, fields=fields) # index = ['exchange_id', 'cal_date'] insert_many('trade_cal', data, memo='更新股票交易日期')
def init_base_schedule(start=start1, end=end1): data = mysql_execute( "select * from job_basic_index where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已设置初始任务") return datas = mysql_search("select ts_code from index_basic order by ts_code") update_requests = [] for data in datas: doc = { 'index_code': data[0], 'start': start, 'end': end, 'index_daily': '0', 'index_weigth': '0' } update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_basic_index', df, memo='设置指数基础任务')
def __finance_crawler_job(query_name, code_ls): i = 0 l = len(code_ls) for code in code_ls: i += 1 logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name, query_name.value, code, i, l)) ts_code = code[0] start = code[1] end = code[2] data = None update_field = query_name.value if query_name == QueryName.income: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.balance_sheet: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.cash_flow: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.forecast: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.express: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.fina_indicator: __get_fina_indicator(query_name, ts_code, start, end) continue elif query_name == QueryName.fina_audit: data = pro.fina_audit(ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.fina_main_bz: pro.fina_mainbz(ts_code=ts_code, start_date=start, end_date=end) else: logger.info("不在业务范围内") return memo = "执行任务" + query_name.value res = insert_many(query_name.value, data, memo=memo) memo = "更新执行任务" + query_name.value + "[%s]状态" % code if res is not None and res > 0: u_sql = "update job_finance_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % (update_field, ts_code, start, end), memo=memo)