Beispiel #1
0
def list_jobs():
    """
    查看任务
    """
    server_host = request.args.get("server_host")
    server_name = request.args.get("server_name")
    project_name = request.args.get("project_name")

    scrapyd = ScrapydAPI(server_host)
    jobs = scrapyd.list_jobs(project_name)
    lst = []
    for job_status, job_list in jobs.items():
        for job in job_list:
            item = {
                "status": job_status,
                "spider": job["spider"],
                "start_time": scrapyd_utils.format_time(job.get("start_time", "")),
                "end_time": scrapyd_utils.format_time(job.get("end_time", "")),
                "timestamp": scrapyd_utils.get_timestamp(job.get("end_time"), job.get("start_time")),
                "job_id": job["id"]
            }
            lst.append(item)

    data = {
        "server_host": server_host,
        "server_name": server_name,
        "project_name": project_name,
        "jobs": lst,
    }

    return jsonify(data)
def get_server_status(server_list):
    """
    获取服务器状态  版本不一致
    scrapyd=1.2.0
    服务器使用 scrapyd=1.1.0 没有接口 daemon_status

    :param server_list:
    :return:
    """
    servers = []
    count = 0

    for item in server_list:

        server_name = item["server_name"]
        server_host = item["server_host"]

        count += 1
        scrapyd = ScrapydAPI(server_host)
        server_status = scrapyd.daemon_status()

        # 兼容老版本
        if server_status.get("status") == "error":

            projects = scrapyd.list_projects()
            print("{}: {}".format(server_host, projects))

            if len(projects) == 0:
                status = "error"
            else:
                status = "ok"

            server_status = {
                "status": status,
            }

            status = defaultdict(int)
            for project in set(projects):
                jobs = scrapyd.list_jobs(project)

                for key, value in jobs.items():
                    status[key] += len(value)

            server_status.update(status)

        item = {
            "index": count,
            "server_name": server_name,
            "server_host": server_host,
            "server_status": server_status,
        }
        servers.append(item)
    return servers
def cancel_all_spider(server):
    """
    取消服务器上所有的爬虫任务
    :param server:
    :return:
    """
    scrapyd = ScrapydAPI(server)
    projects = scrapyd.list_projects()
    for project in projects:
        jobs = scrapyd.list_jobs(project)
        for job, value in jobs.items():
            print(job, value)
            for status in value:
                uid = status.get("id")
                print("{}: {}".format(project, uid))

                scrapyd.cancel(project, uid)