Example #1
0
def api_remove_fail_worker(params):
    """清除fail worker记录,以及相关队列
        Args:
            params: dict, 参数字典,必须包括worker_name
    """
    is_ok, error = check_params(params, "worker_name")
    if not is_ok:
        return result(400, "params error", str(error))
    else:
        worker_name = params.pop("worker_name")
        record = RecorderManager.instance().get_fail_worker_record(worker_name)
        if not record:
            return result(400, "not exist this fail worker", worker_name)
        else:
            try:
                schedule_params = record.get('schedule_kwargs')
                spider_params = record.get('spider_kwargs')
                schedule_path = record.get('schedule_class')
                spider_path = record.get('spider_class')
                schedule = get_schedule_class(schedule_path)(**schedule_params)
                spider = get_spider_class(spider_path)(schedule, **spider_params)
                spider.clear_all()
                RecorderManager.instance().remove_last_fail_worker(worker_name)
            except ScheduleError, e:
                return result(400, message="init schedule failed", result=str(e))
            except SpiderError, e:
                return result(400, message="init spider failed", result=str(e))
            except WorkerError, e:
                return result(400, message="recover worker failed", result=str(e))
Example #2
0
def api_start_worker(params):
    '''
    启动一个worker,worker执行的内容spider,以及执行的规则
    schedule_path;
    spider_path,
    spider_..,...,...., 这里为已spider_开头的参数
    schedule_..,...,...,这里为以schedule_开头的参数
    '''
    is_ok, errors = check_params(params, 'schedule_path', 'spider_path')
    if not is_ok:
        return result(400, "params error", str(errors))
    else:
        try:
            schedule_path = params.pop('schedule_path')
            spider_path = params.pop('spider_path')
            schedule_params = dict([(key[9:], value) for key, value in params.items()
                               if key.startswith('schedule_')])
            spider_params = dict([(key[8:], value) for key, value in params.items()
                             if key.startswith('spider_')])
            schedule = get_schedule_class(schedule_path)(**schedule_params)
            spider = get_spider_class(spider_path)(schedule, **spider_params)
            start_worker(spider)
        except ScheduleError, e:
            return result(400, message="init schedule failed", result=str(e))
        except SpiderError, e:
            return result(400, message="init spider failed", result=str(e))
Example #3
0
def api_recover_worker(params):
    '''以恢复模式启动一个worker
        Args:
            params: 字典, 参数字典:必须包括对应的worker_name
    '''

    is_ok, errors = check_params(params, 'worker_name')
    if not is_ok:
        return result(400, "params error", str(errors))
    else:
        try:
            worker_name = params.pop('worker_name')
            record = RecorderManager.instance().get_fail_worker_record(worker_name)
            if not record:
                return result(400, "not exist this fail worker", worker_name)
            else:
                schedule_params = record.get('schedule_kwargs')
                spider_params = record.get('spider_kwargs')
                schedule_path = record.get('schedule_class')
                spider_path = record.get('spider_class')
            schedule = get_schedule_class(schedule_path)(**schedule_params)
            spider = get_spider_class(spider_path)(schedule, **spider_params)
            recover_worker(spider)
            RecorderManager.instance().remove_last_fail_worker(worker_name)
        except ScheduleError, e:
            return result(400, message="init schedule failed", result=str(e))
        except SpiderError, e:
            return result(400, message="init spider failed", result=str(e))
Example #4
0
def api_remove_all_fail_worker(params):
    """remove all fail worker
       Args:
           params: dict, param dict
    """
    records = RecorderManager.instance().get_last_fail_worker()
    remove_rs = []

    try:
        for record in records:
            worker_name = record.get('worker_name')
            schedule_params = record.get('schedule_kwargs')
            spider_params = record.get('spider_kwargs')
            schedule_path = record.get('schedule_class')
            spider_path = record.get('spider_class')
            try:
                schedule = get_schedule_class(schedule_path)(**schedule_params)
                spider = get_spider_class(spider_path)(schedule,
                                                       **spider_params)
                spider.clear_all()
                RecorderManager.instance().remove_last_fail_worker(worker_name)
            except Exception, e:
                remove_rs.append({
                    "worker_name": worker_name,
                    "result": "fail",
                    "error": str(e)
                })
            else:
                remove_rs.append({
                    "worker_name": worker_name,
                    "result": "success",
                    "error": ""
                })
    except Exception, e:
        return result(500, "unsupported exception", result=str(e))
Example #5
0
def api_remove_fail_worker(params):
    """清除fail worker记录,以及相关队列
        Args:
            params: dict, 参数字典,必须包括worker_name
    """
    is_ok, error = check_params(params, "worker_name")
    if not is_ok:
        return result(400, "params error", str(error))
    else:
        worker_name = params.pop("worker_name")
        record = RecorderManager.instance().get_fail_worker_record(worker_name)
        if not record:
            return result(400, "not exist this fail worker", worker_name)
        else:
            try:
                schedule_params = record.get('schedule_kwargs')
                spider_params = record.get('spider_kwargs')
                schedule_path = record.get('schedule_class')
                spider_path = record.get('spider_class')
                schedule = get_schedule_class(schedule_path)(**schedule_params)
                spider = get_spider_class(spider_path)(schedule,
                                                       **spider_params)
                spider.clear_all()
                RecorderManager.instance().remove_last_fail_worker(worker_name)
            except ScheduleError, e:
                return result(400,
                              message="init schedule failed",
                              result=str(e))
            except SpiderError, e:
                return result(400, message="init spider failed", result=str(e))
            except WorkerError, e:
                return result(400,
                              message="recover worker failed",
                              result=str(e))
Example #6
0
def api_recover_worker(params):
    """以恢复模式启动一个worker
        Args:
            params: 字典, 参数字典:必须包括对应的worker_name
    """

    is_ok, errors = check_params(params, 'worker_name')
    if not is_ok:
        return result(400, "params error", str(errors))
    else:
        try:
            worker_name = params.pop('worker_name')
            record = RecorderManager.instance().\
                get_fail_worker_record(worker_name)
            if not record:
                return result(400, "not exist this fail worker", worker_name)
            else:
                schedule_params = record.get('schedule_kwargs')
                spider_params = record.get('spider_kwargs')
                schedule_path = record.get('schedule_class')
                spider_path = record.get('spider_class')
            schedule = get_schedule_class(schedule_path)(**schedule_params)
            spider = get_spider_class(spider_path)(schedule, **spider_params)
            recover_worker(spider)
            RecorderManager.instance().remove_last_fail_worker(worker_name)
        except ScheduleError, e:
            return result(400, message="init schedule failed", result=str(e))
        except SpiderError, e:
            return result(400, message="init spider failed", result=str(e))
Example #7
0
def api_start_worker(params):
    """
    启动一个worker,worker执行的内容spider,以及执行的规则
    schedule_path;
    spider_path,
    spider_..,...,...., 这里为已spider_开头的参数
    schedule_..,...,...,这里为以schedule_开头的参数
    """
    is_ok, errors = check_params(params, 'schedule_path', 'spider_path')
    if not is_ok:
        return result(400, "params error", str(errors))
    else:
        try:
            schedule_path = params.pop('schedule_path')
            spider_path = params.pop('spider_path')
            schedule_params = dict([(key[9:], value)
                                    for key, value in params.items()
                                    if key.startswith('schedule_')])
            spider_params = dict([(key[8:], value)
                                  for key, value in params.items()
                                  if key.startswith('spider_')])
            schedule = get_schedule_class(schedule_path)(**schedule_params)
            spider = get_spider_class(spider_path)(schedule, **spider_params)
            start_worker(spider)
        except ScheduleError, e:
            return result(400, message="init schedule failed", result=str(e))
        except SpiderError, e:
            return result(400, message="init spider failed", result=str(e))
Example #8
0
def api_remove_all_fail_worker(params):
    """remove all fail worker
       Args:
           params: dict, param dict
    """
    records = RecorderManager.instance().get_last_fail_worker()
    remove_rs = []

    try:
        for record in records:
            worker_name = record.get('worker_name')
            schedule_params = record.get('schedule_kwargs')
            spider_params = record.get('spider_kwargs')
            schedule_path = record.get('schedule_class')
            spider_path = record.get('spider_class')
            try:
                schedule = get_schedule_class(schedule_path)(**schedule_params)
                spider = get_spider_class(spider_path)(schedule,
                                                       **spider_params)
                spider.clear_all()
                RecorderManager.instance().remove_last_fail_worker(worker_name)
            except Exception, e:
                remove_rs.append({"worker_name": worker_name,
                                  "result": "fail",
                                  "error": str(e)})
            else:
                remove_rs.append({"worker_name": worker_name,
                                  "result": "success",
                                  "error": ""})
    except Exception, e:
        return result(500, "unsupported exception", result=str(e))