Beispiel #1
0
from common.util.sls_log_service import get_logger
from crawler.base.db_base.stock_db_base import StockDBBase
from multiprocessing.pool import Pool
from multiprocessing import Queue
from crawler.worker.crawler.stock_crawl_worker import CrawlerWorker

MULTI_PROCESS_NO = 2
PROCESS_EXIT_TIMEOUT = 120

logger = get_logger()


class CrawlTaskScheduler(StockDBBase):
    def __init__(self):
        super(CrawlTaskScheduler, self).__init__()
        self.worker_pools = []
        self.task_queue = Queue()
        self.result_queue = Queue()
        for i in xrange(MULTI_PROCESS_NO):
            p = CrawlerWorker(i + 1, self.task_queue, self.result_queue)
            p.start()
            self.worker_pools.append(p)

    def start_schedule(self):
        t_sql = 'select * from stock_task_scheduler where status=1 and last_crawl_day<%s'
        c_tasks = self.query(t_sql, self.dt)
        logger.info("Need execute task: %s" % len(c_tasks))
        for task in c_tasks:
            task['cur_dt'] = self.dt
            self.task_queue.put(task)
Beispiel #2
0
    "gmt_modify": "2019-09-07 14:01:51",
    "last_crawl_day": "2018-01-15",
    "crawl_type": "BasicTradeInfo",
    "dt": "2019-09-07",
    "crawl_start": "2019-09-01 12:00:00",
    "crawl_status": "waiting",
    "cur_dt": "2019-09-08"
}
'''
API_NAME = 'daily_basic'

'''
每日指标: https://tushare.pro/document/2?doc_id=32
'''

logger = get_logger(uuid=arrow.now().date().strftime('%Y-%m-%d'))


class DailyBasicInfoTask(BaseTask):
    def __init__(self):
        super(DailyBasicInfoTask, self).__init__()

    def close(self):
        super(DailyBasicInfoTask, self).close()

    def run(self, task_define):
        start_dt = arrow.get(task_define['last_crawl_day'])  # .strftime('%Y%m%d')
        end_dt = arrow.get(task_define['cur_dt'])
        for r_dt in arrow.Arrow.range('day', start_dt, end_dt):
            logger.info('Start to crawl data of task: %s, dt: %s' % (self.__class__.__name__, r_dt))
            t_dt = r_dt.strftime('%Y%m%d')