from common.util.sls_log_service import get_logger from crawler.base.db_base.stock_db_base import StockDBBase from multiprocessing.pool import Pool from multiprocessing import Queue from crawler.worker.crawler.stock_crawl_worker import CrawlerWorker MULTI_PROCESS_NO = 2 PROCESS_EXIT_TIMEOUT = 120 logger = get_logger() class CrawlTaskScheduler(StockDBBase): def __init__(self): super(CrawlTaskScheduler, self).__init__() self.worker_pools = [] self.task_queue = Queue() self.result_queue = Queue() for i in xrange(MULTI_PROCESS_NO): p = CrawlerWorker(i + 1, self.task_queue, self.result_queue) p.start() self.worker_pools.append(p) def start_schedule(self): t_sql = 'select * from stock_task_scheduler where status=1 and last_crawl_day<%s' c_tasks = self.query(t_sql, self.dt) logger.info("Need execute task: %s" % len(c_tasks)) for task in c_tasks: task['cur_dt'] = self.dt self.task_queue.put(task)
"gmt_modify": "2019-09-07 14:01:51", "last_crawl_day": "2018-01-15", "crawl_type": "BasicTradeInfo", "dt": "2019-09-07", "crawl_start": "2019-09-01 12:00:00", "crawl_status": "waiting", "cur_dt": "2019-09-08" } ''' API_NAME = 'daily_basic' ''' 每日指标: https://tushare.pro/document/2?doc_id=32 ''' logger = get_logger(uuid=arrow.now().date().strftime('%Y-%m-%d')) class DailyBasicInfoTask(BaseTask): def __init__(self): super(DailyBasicInfoTask, self).__init__() def close(self): super(DailyBasicInfoTask, self).close() def run(self, task_define): start_dt = arrow.get(task_define['last_crawl_day']) # .strftime('%Y%m%d') end_dt = arrow.get(task_define['cur_dt']) for r_dt in arrow.Arrow.range('day', start_dt, end_dt): logger.info('Start to crawl data of task: %s, dt: %s' % (self.__class__.__name__, r_dt)) t_dt = r_dt.strftime('%Y%m%d')