コード例 #1
0
    def cleanup_nodes(self) -> bool:
        if self.state == ScalesetState.halt:
            self.halt()
            return True

        nodes = Node.search_states(scaleset_id=self.scaleset_id,
                                   states=NodeState.ready_for_reset())

        outdated = Node.search_outdated(
            scaleset_id=self.scaleset_id,
            states=[NodeState.free],
        )

        if not (nodes or outdated):
            logging.debug("scaleset node gc done (no nodes) %s",
                          self.scaleset_id)
            return False

        to_delete = []
        to_reimage = []

        for node in outdated:
            if node.version == "1.0.0":
                to_reimage.append(node)
            else:
                stop_message = NodeMessage(
                    agent_id=node.machine_id,
                    message=NodeCommand(stop=StopNodeCommand()),
                )
                stop_message.save()

        for node in nodes:
            # delete nodes that are not waiting on the scaleset GC
            if not node.scaleset_node_exists():
                node.delete()
            elif node.state in [NodeState.shutdown, NodeState.halt]:
                to_delete.append(node)
            else:
                to_reimage.append(node)

        # Perform operations until they fail due to scaleset getting locked
        try:
            if to_delete:
                self.delete_nodes(to_delete)
                for node in to_delete:
                    node.state = NodeState.halt
                    node.save()

            if to_reimage:
                self.reimage_nodes(to_reimage)
        except UnableToUpdate:
            logging.info("scaleset update already in progress: %s",
                         self.scaleset_id)
        return True
コード例 #2
0
ファイル: nodes.py プロジェクト: ranweiler/onefuzz
    def add_ssh_public_key(self, public_key: str) -> Result[None]:
        if self.scaleset_id is None:
            return Error(
                code=ErrorCode.INVALID_REQUEST,
                errors=["only able to add ssh keys to scaleset nodes"],
            )

        if not public_key.endswith("\n"):
            public_key += "\n"

        self.send_message(
            NodeCommand(add_ssh_key=NodeCommandAddSshKey(
                public_key=public_key)))
        return None
コード例 #3
0
ファイル: nodes.py プロジェクト: ranweiler/onefuzz
    def stop_task(cls, task_id: UUID) -> None:
        # For now, this just re-images the node.  Eventually, this
        # should send a message to the node to let the agent shut down
        # gracefully
        nodes = NodeTasks.get_nodes_by_task_id(task_id)
        for node in nodes:
            node.send_message(
                NodeCommand(stop_task=StopTaskNodeCommand(task_id=task_id)))

            if not node.stop_if_complete():
                logging.info(
                    "nodes: stopped task on node, "
                    "but not reimaging due to other tasks: task_id:%s machine_id:%s",
                    task_id,
                    node.machine_id,
                )
コード例 #4
0
def post(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(CanScheduleRequest, req)
    if isinstance(request, Error):
        return not_ok(request, context="CanScheduleRequest")

    node = Node.get_by_machine_id(request.machine_id)
    if not node:
        return not_ok(
            Error(code=ErrorCode.UNABLE_TO_FIND,
                  errors=["unable to find node"]),
            context=request.machine_id,
        )

    allowed = True
    work_stopped = False
    if node.is_outdated():
        logging.info(
            "received can_schedule request from outdated node '%s' version '%s'",
            node.machine_id,
            node.version,
        )
        allowed = False
        stop_message = NodeMessage(
            agent_id=node.machine_id,
            message=NodeCommand(stop=StopNodeCommand()),
        )
        stop_message.save()

    task = Task.get_by_task_id(request.task_id)

    work_stopped = isinstance(task,
                              Error) or (task.state != TaskState.scheduled)

    if work_stopped:
        allowed = False

    return ok(CanSchedule(allowed=allowed, work_stopped=work_stopped))
コード例 #5
0
ファイル: nodes.py プロジェクト: ranweiler/onefuzz
 def stop(self, done: bool = False) -> None:
     self.to_reimage(done=done)
     self.send_message(NodeCommand(stop=StopNodeCommand()))
コード例 #6
0
ファイル: nodes.py プロジェクト: ranweiler/onefuzz
 def send_stop_if_free(self) -> None:
     if is_minimum_version(version=self.version, minimum="2.16.1"):
         self.send_message(
             NodeCommand(stop_if_free=NodeCommandStopIfFree()))
コード例 #7
0
 def stop(self) -> None:
     self.to_reimage()
     self.send_message(NodeCommand(stop=StopNodeCommand()))