def sync_job_status(self, project): """ 同步scrapyd服务器上的job状态 到 系统的job_execution任务执行数据库中来 :param project: :return: """ for spider_service_instance in self.spider_service_instances_slave: # 从scrapyd中根据爬虫项目名获取爬虫项目下的蜘蛛任务运行状态 # ex: {'pending': [], 'running': [], 'finish': []} job_status = spider_service_instance.get_job_list(project.project_name) # 从数据库中获取未完成('pending', 'running')的蜘蛛任务 job_execution_list = JobExecution.list_uncomplete_job() # 根据job_execution 任务执行 数据库中的数据构造 {'任务执行id': '任务执行详情'} 字典 job_execution_dict = dict( [(job_execution.service_job_execution_id.split('>')[-1], job_execution) for job_execution in job_execution_list]) ''' 把数据库中的job_execution任务执行情况 与 scrapyd中的任务执行情况做匹配 更新其相应的字段 ''' # 正在运行的(PENDING) for job_execution_info in job_status[SpiderStatus.RUNNING]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status == SpiderStatus.PENDING: job_execution.start_time = job_execution_info['start_time'] job_execution.running_status = SpiderStatus.RUNNING # 运行完成的(FINISH) for job_execution_info in job_status[SpiderStatus.FINISHED]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status != SpiderStatus.FINISHED: job_execution.start_time = job_execution_info['start_time'] job_execution.end_time = job_execution_info['end_time'] job_execution.running_status = SpiderStatus.FINISHED db.session.commit()
def sync_job_status(self, project): for spider_service_instance in self.spider_service_instances: job_status = spider_service_instance.get_job_list(project.project_name) job_execution_list = JobExecution.list_uncomplete_job() job_execution_dict = dict( [(job_execution.service_job_execution_id, job_execution) for job_execution in job_execution_list]) # running for job_execution_info in job_status[SpiderStatus.RUNNING]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status == SpiderStatus.PENDING: job_execution.start_time = job_execution_info['start_time'] job_execution.running_status = SpiderStatus.RUNNING # finished for job_execution_info in job_status[SpiderStatus.FINISHED]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status != SpiderStatus.FINISHED: job_execution.start_time = job_execution_info['start_time'] job_execution.end_time = job_execution_info['end_time'] job_execution.running_status = SpiderStatus.FINISHED # commit try: db.session.commit() except: db.session.rollback() raise
def sync_job_status(self, project): for spider_service_instance in self.spider_service_instances: job_status = spider_service_instance.get_job_list(project.project_name) job_execution_list = JobExecution.list_uncomplete_job() job_execution_dict = dict( [(job_execution.service_job_execution_id, job_execution) for job_execution in job_execution_list]) # running for job_execution_info in job_status[SpiderStatus.RUNNING]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status == SpiderStatus.PENDING: job_execution.start_time = job_execution_info['start_time'] job_execution.running_status = SpiderStatus.RUNNING # finished for job_execution_info in job_status[SpiderStatus.FINISHED]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status != SpiderStatus.FINISHED: job_execution.start_time = job_execution_info['start_time'] job_execution.end_time = job_execution_info['end_time'] job_execution.running_status = SpiderStatus.FINISHED # commit db.session.commit()