def index_daily_crawler_job(query_name, is_before7=False, is_thread=False, thread_n=3, fields=None): if is_before7: now = datetime.now() before = now - timedelta(days=7) end_date = now.strftime('%Y%m%d') start_date = before.strftime('%Y%m%d') init_base_schedule(start=start_date, end=end_date) # datas = find_data(TSDB_CONN['job_basic'], whereParam={query_name: False}, # selParam={'ts_code': True, 'start': True, 'end': True, '_id': False}) datas = mysql_search( "select `index_code`,`start`,`end` from job_basic_index where `%s`=0 order by 1" % query_name) if is_thread: code_list = np.array_split(np.asarray(datas), thread_n, axis=0) else: code_list = np.array_split(np.asarray(datas), 1, axis=0) for code_ls in code_list: t1 = threading.Thread(target=__daily_crawler_job, args=(query_name, code_ls, { 'fields': fields })) t1.start()
def daily_crawler_job(query_name, is_thread=False, thread_n=3, fields=None): data_s = mysql_search("select `ts_code`,`start`,`end` from job_basic where `%s`=0 " % query_name) if is_thread: code_list = np.array_split(np.asarray(data_s), thread_n, axis=0) else: code_list = np.array_split(np.asarray(data_s), 1, axis=0) for code_ls in code_list: t1 = threading.Thread(target=__daily_crawler_job, args=(query_name, code_ls, {'fields': fields})) t1.start()
def init_base_schedule(start=start1, end=end1): data = mysql_execute("select * from job_basic where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已设置初始任务") return data_s = mysql_search("select ts_code from stock_basic order by ts_code") update_requests = [] for data in data_s: doc = {'ts_code': data[0], 'start': start, 'end': end, 'daily': '0', 'adj_factor': '0', 'suspend': '0', 'daily_basic': '0', 'cal_hfq': '0', 'index': '0'} update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_basic', df, memo='设置基础任务')
def init_finance_job_base(start=start1, end=end1): data = mysql_execute("select * from job_finance_basic where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已经设置财务任务[%s-%s]%d" % (start, end, data)) return data_s = mysql_search("select ts_code from stock_basic order by ts_code") update_requests = [] for data in data_s: doc = {'ts_code': data[0], 'start': start, 'end': end, 'income': '0', 'balancesheet': '0', 'cashflow': '0', 'forecast': '0', 'express': '0', 'fina_indicator': '0', "fina_audit": '0', "fina_mainbz": '0'} update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_finance_basic', df, memo='设置财务数据基础任务')
def init_base_schedule(start=start1, end=end1): data = mysql_execute( "select * from job_basic_index where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已设置初始任务") return datas = mysql_search("select ts_code from index_basic order by ts_code") update_requests = [] for data in datas: doc = { 'index_code': data[0], 'start': start, 'end': end, 'index_daily': '0', 'index_weigth': '0' } update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_basic_index', df, memo='设置指数基础任务')
def finance_crawler_job(query_name, thread_n=3): data = mysql_search("select `ts_code`,`start`,`end` from job_finance_basic where `%s`=0" % query_name.value) code_list = np.array_split(np.asarray(data), thread_n, axis=0) for code_ls in code_list: t1 = threading.Thread(target=__finance_crawler_job, args=(query_name, code_ls)) t1.start()