コード例 #1
0
    def get(self, *args, **kwargs):
        """ list of workers with checked-in data """
        def add_status(worker):
            not_seen_since = getnow() - worker["last_seen"]
            worker["status"] = ("online" if
                                not_seen_since.total_seconds() < OFFLINE_DELAY
                                else "offline")
            return worker

        request_args = SkipLimitSchema().load(request.args.to_dict())
        skip, limit = request_args["skip"], request_args["limit"]

        query = {}
        count = Workers().count_documents(query)
        projection = {
            "_id": 0,
            "name": 1,
            "username": 1,
            "offliners": 1,
            "resources": 1,
            "last_seen": 1,
        }
        cursor = (Workers().find(query, projection).sort(
            "name", pymongo.ASCENDING).skip(skip).limit(limit))
        workers = list(map(add_status, cursor))

        return jsonify({
            "meta": {
                "skip": skip,
                "limit": limit,
                "count": count
            },
            "items": workers
        })
コード例 #2
0
ファイル: requested_task.py プロジェクト: shuntaroy12/zimfarm
    def get(self, token: AccessToken.Payload):
        """ list of requested tasks to be retrieved by workers, auth-only """

        request_args = request.args.to_dict()
        worker_name = request_args.get("worker")

        # record we've seen a worker, if applicable
        if token and worker_name:
            Workers().update_one(
                {"name": worker_name, "username": token.username},
                {"$set": {"last_seen": getnow()}},
            )

        request_args = WorkerRequestedTaskSchema().load(request_args)

        task = find_requested_task_for(
            token.username,
            worker_name,
            request_args["avail_cpu"],
            request_args["avail_memory"],
            request_args["avail_disk"],
        )

        return jsonify(
            {
                "meta": {"skip": 0, "limit": 1, "count": 1 if task else 0},
                "items": [task] if task else [],
            }
        )
コード例 #3
0
ファイル: user.py プロジェクト: zux-hidden/zimfarm
 def delete(self, token: AccessToken.Payload, username: str):
     # delete user
     deleted_count = Users().delete_one({
         "username": username
     }).deleted_count
     if deleted_count == 0:
         raise errors.NotFound()
     Workers().delete_many({"username": username})
     return Response(status=HTTPStatus.NO_CONTENT)
コード例 #4
0
    def put(self, name: str, *args, **kwargs):
        try:
            request_json = WorkerCheckInSchema().load(request.get_json())
        except ValidationError as e:
            raise InvalidRequestJSON(e.messages)

        document = {
            "name": name,
            "username": request_json["username"],
            "selfish": request_json["selfish"],
            "resources": {
                "cpu": request_json["cpu"],
                "memory": request_json["memory"],
                "disk": request_json["disk"],
            },
            "offliners": request_json["offliners"],
            "platforms": request_json.get("platforms", {}),
            "last_seen": getnow(),
        }
        Workers().replace_one({"name": name}, document, upsert=True)

        BROADCASTER.broadcast_worker_checkin(document)

        return Response(status=HTTPStatus.NO_CONTENT)
コード例 #5
0
ファイル: scheduling.py プロジェクト: zux-hidden/zimfarm
def find_requested_task_for(username, worker_name, avail_cpu, avail_memory,
                            avail_disk):
    """optimal requested_task to run now for a given worker

    Accounts for:
     - longest tasks this worker can do (total resources)
     - available resources now (sent)
     - extimated duration to reclaim resources for longest tasks
    """

    # get total resources for that worker
    worker = Workers().find_one(
        {
            "username": username,
            "name": worker_name
        },
        {
            "resources": 1,
            "offliners": 1,
            "last_seen": 1,
            "name": 1,
            "selfish": 1,
            "platforms": 1,
        },
    )

    # worker is not checked-in
    if worker is None:
        logger.error(f"worker `{worker_name}` not checked-in")
        return None

    # retrieve list of tasks we are currently running with associated resources
    running_tasks = get_currently_running_tasks(worker_name)

    # find all requested tasks that this worker can do with its total resources
    #   sorted by priorities
    #   sorted by max durations
    tasks_worker_could_do = get_reqs_doable_by(worker)

    # filter-out requested tasks that are not doable now due to platform limitations
    worker_platform_filter = functools.partial(
        does_platform_allow_worker_to_run, worker, running_tasks)
    tasks_worker_could_do = filter(worker_platform_filter,
                                   tasks_worker_could_do)

    # record available resources
    available_resources = {
        "cpu": avail_cpu,
        "memory": avail_memory,
        "disk": avail_disk
    }

    try:
        # candidate is task[0]
        candidate = next(tasks_worker_could_do)
    except StopIteration:
        logger.debug(
            f"no request doable by worker (selfish={worker.get('selfish')})")
        return None

    # can worker do task[0] ?
    #   if yes -> return task[0]
    if can_run(candidate, available_resources):
        logger.debug("first candidate can be run!")
        return candidate

    # we don't have enough resources for task[0].

    # find out missing resources
    missing_cpu = max([candidate["config"]["resources"]["cpu"] - avail_cpu, 0])
    missing_memory = max(
        [candidate["config"]["resources"]["memory"] - avail_memory, 0])
    missing_disk = max(
        [candidate["config"]["resources"]["disk"] - avail_disk, 0])
    logger.debug(
        f"missing cpu:{missing_cpu}, mem:{missing_memory}, dsk:{missing_disk}")

    # pile-up all of those which we need to complete to have enough resources
    preventing_tasks = []
    # sorted by ETA as it's the order in which there're gonna complete
    for task in sorted(running_tasks, key=lambda x: x["eta"]):
        preventing_tasks.append(task)
        if (sum([t["config"]["resources"]["cpu"]
                 for t in preventing_tasks]) >= missing_cpu and sum([
                     t["config"]["resources"]["memory"]
                     for t in preventing_tasks
                 ]) >= missing_memory and sum([
                     t["config"]["resources"]["disk"] for t in preventing_tasks
                 ]) >= missing_disk):
            # stop when we'd have reclaimed our missing resources
            break

    if not preventing_tasks:
        # we should not get there: no preventing task yet we don't have our total
        # resources available? problem.
        logger.error("we have no preventing tasks. oops")
        return None

    logger.debug(f"we have {len(preventing_tasks)} tasks blocking out way")
    opening_eta = preventing_tasks[-1]["eta"]
    logger.debug(f"opening_eta:{opening_eta}")

    # get the number of available seconds from now to that ETA
    available_time = (opening_eta - getnow()).total_seconds()
    logger.debug("we have approx. {}mn to reclaim resources".format(
        available_time / 60))

    # loop on task[1+] to find the first task which can fit
    temp_candidate = get_possible_task_with(tasks_worker_could_do,
                                            available_resources,
                                            available_time)
    if temp_candidate:
        return temp_candidate

    # if none in the loop are possible, return None (worker will wait)
    logger.debug(
        "unable to fit anything, you'll have to wait for task to complete")
    return None
コード例 #6
0
def list_of_requested_tasks(token: AccessToken.Payload = None):
    """ list of requested tasks  """

    request_args = request.args.to_dict()
    worker = request_args.get("worker")

    # record we've seen a worker, if applicable
    if token and worker:
        Workers().update_one(
            {
                "name": worker,
                "username": token.username
            },
            {"$set": {
                "last_seen": getnow()
            }},
        )

    request_args["matching_offliners"] = request.args.getlist(
        "matching_offliners")
    request_args["schedule_name"] = request.args.getlist("schedule_name")
    request_args = RequestedTaskSchema().load(request_args)

    # unpack query parameter
    skip, limit = request_args["skip"], request_args["limit"]
    schedule_names = request_args["schedule_name"]
    priority = request_args.get("priority")

    # get requested tasks from database
    query = {}
    if schedule_names:
        query["schedule_name"] = {"$in": schedule_names}

    if priority:
        query["priority"] = {"$gte": priority}

    if worker:
        query["worker"] = {"$in": [None, worker]}

    for res_key in ("cpu", "memory", "disk"):
        key = f"matching_{res_key}"
        if key in request_args:
            query[f"config.resources.{res_key}"] = {"$lte": request_args[key]}
    matching_offliners = request_args.get("matching_offliners")
    if matching_offliners:
        query["config.task_name"] = {"$in": matching_offliners}

    cursor = (RequestedTasks().find(
        query,
        {
            "_id": 1,
            "status": 1,
            "schedule_name": 1,
            "config.task_name": 1,
            "config.resources": 1,
            "timestamp.requested": 1,
            "requested_by": 1,
            "priority": 1,
            "worker": 1,
        },
    ).sort([
        ("priority", pymongo.DESCENDING),
        ("timestamp.reserved", pymongo.DESCENDING),
        ("timestamp.requested", pymongo.DESCENDING),
    ]).skip(skip).limit(limit))
    count = RequestedTasks().count_documents(query)

    return jsonify({
        "meta": {
            "skip": skip,
            "limit": limit,
            "count": count
        },
        "items": [task for task in cursor],
    })