def api_remove_fail_worker(params): """清除fail worker记录,以及相关队列 Args: params: dict, 参数字典,必须包括worker_name """ is_ok, error = check_params(params, "worker_name") if not is_ok: return result(400, "params error", str(error)) else: worker_name = params.pop("worker_name") record = RecorderManager.instance().get_fail_worker_record(worker_name) if not record: return result(400, "not exist this fail worker", worker_name) else: try: schedule_params = record.get('schedule_kwargs') spider_params = record.get('spider_kwargs') schedule_path = record.get('schedule_class') spider_path = record.get('spider_class') schedule = get_schedule_class(schedule_path)(**schedule_params) spider = get_spider_class(spider_path)(schedule, **spider_params) spider.clear_all() RecorderManager.instance().remove_last_fail_worker(worker_name) except ScheduleError, e: return result(400, message="init schedule failed", result=str(e)) except SpiderError, e: return result(400, message="init spider failed", result=str(e)) except WorkerError, e: return result(400, message="recover worker failed", result=str(e))
def api_start_worker(params): ''' 启动一个worker,worker执行的内容spider,以及执行的规则 schedule_path; spider_path, spider_..,...,...., 这里为已spider_开头的参数 schedule_..,...,...,这里为以schedule_开头的参数 ''' is_ok, errors = check_params(params, 'schedule_path', 'spider_path') if not is_ok: return result(400, "params error", str(errors)) else: try: schedule_path = params.pop('schedule_path') spider_path = params.pop('spider_path') schedule_params = dict([(key[9:], value) for key, value in params.items() if key.startswith('schedule_')]) spider_params = dict([(key[8:], value) for key, value in params.items() if key.startswith('spider_')]) schedule = get_schedule_class(schedule_path)(**schedule_params) spider = get_spider_class(spider_path)(schedule, **spider_params) start_worker(spider) except ScheduleError, e: return result(400, message="init schedule failed", result=str(e)) except SpiderError, e: return result(400, message="init spider failed", result=str(e))
def api_recover_worker(params): '''以恢复模式启动一个worker Args: params: 字典, 参数字典:必须包括对应的worker_name ''' is_ok, errors = check_params(params, 'worker_name') if not is_ok: return result(400, "params error", str(errors)) else: try: worker_name = params.pop('worker_name') record = RecorderManager.instance().get_fail_worker_record(worker_name) if not record: return result(400, "not exist this fail worker", worker_name) else: schedule_params = record.get('schedule_kwargs') spider_params = record.get('spider_kwargs') schedule_path = record.get('schedule_class') spider_path = record.get('spider_class') schedule = get_schedule_class(schedule_path)(**schedule_params) spider = get_spider_class(spider_path)(schedule, **spider_params) recover_worker(spider) RecorderManager.instance().remove_last_fail_worker(worker_name) except ScheduleError, e: return result(400, message="init schedule failed", result=str(e)) except SpiderError, e: return result(400, message="init spider failed", result=str(e))
def api_remove_all_fail_worker(params): """remove all fail worker Args: params: dict, param dict """ records = RecorderManager.instance().get_last_fail_worker() remove_rs = [] try: for record in records: worker_name = record.get('worker_name') schedule_params = record.get('schedule_kwargs') spider_params = record.get('spider_kwargs') schedule_path = record.get('schedule_class') spider_path = record.get('spider_class') try: schedule = get_schedule_class(schedule_path)(**schedule_params) spider = get_spider_class(spider_path)(schedule, **spider_params) spider.clear_all() RecorderManager.instance().remove_last_fail_worker(worker_name) except Exception, e: remove_rs.append({ "worker_name": worker_name, "result": "fail", "error": str(e) }) else: remove_rs.append({ "worker_name": worker_name, "result": "success", "error": "" }) except Exception, e: return result(500, "unsupported exception", result=str(e))
def api_recover_worker(params): """以恢复模式启动一个worker Args: params: 字典, 参数字典:必须包括对应的worker_name """ is_ok, errors = check_params(params, 'worker_name') if not is_ok: return result(400, "params error", str(errors)) else: try: worker_name = params.pop('worker_name') record = RecorderManager.instance().\ get_fail_worker_record(worker_name) if not record: return result(400, "not exist this fail worker", worker_name) else: schedule_params = record.get('schedule_kwargs') spider_params = record.get('spider_kwargs') schedule_path = record.get('schedule_class') spider_path = record.get('spider_class') schedule = get_schedule_class(schedule_path)(**schedule_params) spider = get_spider_class(spider_path)(schedule, **spider_params) recover_worker(spider) RecorderManager.instance().remove_last_fail_worker(worker_name) except ScheduleError, e: return result(400, message="init schedule failed", result=str(e)) except SpiderError, e: return result(400, message="init spider failed", result=str(e))
def api_start_worker(params): """ 启动一个worker,worker执行的内容spider,以及执行的规则 schedule_path; spider_path, spider_..,...,...., 这里为已spider_开头的参数 schedule_..,...,...,这里为以schedule_开头的参数 """ is_ok, errors = check_params(params, 'schedule_path', 'spider_path') if not is_ok: return result(400, "params error", str(errors)) else: try: schedule_path = params.pop('schedule_path') spider_path = params.pop('spider_path') schedule_params = dict([(key[9:], value) for key, value in params.items() if key.startswith('schedule_')]) spider_params = dict([(key[8:], value) for key, value in params.items() if key.startswith('spider_')]) schedule = get_schedule_class(schedule_path)(**schedule_params) spider = get_spider_class(spider_path)(schedule, **spider_params) start_worker(spider) except ScheduleError, e: return result(400, message="init schedule failed", result=str(e)) except SpiderError, e: return result(400, message="init spider failed", result=str(e))
def api_remove_all_fail_worker(params): """remove all fail worker Args: params: dict, param dict """ records = RecorderManager.instance().get_last_fail_worker() remove_rs = [] try: for record in records: worker_name = record.get('worker_name') schedule_params = record.get('schedule_kwargs') spider_params = record.get('spider_kwargs') schedule_path = record.get('schedule_class') spider_path = record.get('spider_class') try: schedule = get_schedule_class(schedule_path)(**schedule_params) spider = get_spider_class(spider_path)(schedule, **spider_params) spider.clear_all() RecorderManager.instance().remove_last_fail_worker(worker_name) except Exception, e: remove_rs.append({"worker_name": worker_name, "result": "fail", "error": str(e)}) else: remove_rs.append({"worker_name": worker_name, "result": "success", "error": ""}) except Exception, e: return result(500, "unsupported exception", result=str(e))