Esempio n. 1
0
    def handle_nodeset(self, data):
        # first, clear the types of all repositories we had before, if we had any before
        # this is crucial because it's possible the node is attempting to shut down some repositories
        # clearing them out here is safe; we don't abort jobs on repo being lost anyway, and if the repo's about to come back, we'll just put it right back
        if data["name"] in self.nodes:
            for reponame in self.nodes[data["name"]]["repositories"]:
                if reponame in self.repositories and "type" in self.repositories[reponame]:
                    del self.repositories[reponame]["type"]

        # look for any jobs that we believe this node should be running, but isn't
        expectedRunningTasks = self.tasks.get_tasks_running_on(data["name"])
        for runningTask in expectedRunningTasks:
            if runningTask["name"] not in data["activetasks"]:
                # uhoh - a task has vanished. this might be caused by the node crashing; clean it up :(
                runningTask["status"] = "complete"
                runningTask["success"] = "terminated"

                self.task_finalize_and_write(runningTask)

        # register node data
        # this is a bit overkill since it includes a bunch of stuff we don't care about, but is easier
        # importantly, we *have* to include the repository paths here, so we can shut them down if the node fails to respond in the future
        self.nodes[data["name"]] = data

        # register repository data
        for reponame, repodata in data["repositories"].items():
            if reponame not in self.repositories:
                self.repositories[reponame] = {}

            self.repositories[reponame]["type"] = repodata["type"]
            self.repositories[reponame]["local"] = repodata["local"]
            self.repositories[reponame]["node"] = data["name"]

        coordinator_db.repositories_save(self.repositories)
Esempio n. 2
0
    def task_child_completion_notify(self, taskname):
        if self.task_and_children_complete(taskname):
            # there are certainly very clever efficient ways to do this
            # but because we have so few repositories, we just iterate over all repositories
            for reponame, repodata in self.repositories.items():
                if "task" in repodata and repodata["task"] == taskname:
                    del repodata["task"]
                    # TODO: clean up repository here

            coordinator_db.repositories_save(self.repositories)

            # we need to bother with this only if task and children are complete, since if not, the parent's children certainly won't be
            taskdata = self.tasks.get_task_by_name(taskname)
            if "parent" in taskdata and taskdata["parent"]:
                self.task_child_completion_notify(taskdata["parent"])
Esempio n. 3
0
    def update_node(self, node):
        # TODO: look for tasks to cancel
        # TODO: test to see if we should reboot the node

        taskToStart = None
        taskToStartRepos = None

        for task in self.tasks.get_tasks_idle():

            # This should be a priority test
            if taskToStart is not None:
                continue

            # Ensure resources are available on this node

            # Task can be used; ensure we have the right repos available
            # WARNING - If a task requests more than one repository of the same type, it could in theory acquire the same repo twice
            # This takes a little work to avoid and is why we currently don't permit more than one repo request
            chosenRepos = {}
            for requestRepoName, requestRepoData in task["repositories"].items():
                if "request" in requestRepoData:
                    # Requesting an entire new repo
                    repoOptions = []
                    for reponame, repodata in self.repositories.items():
                        if "task" in repodata:
                            # repo is currently being used
                            continue

                        if "type" not in repodata:
                            # repo is currently not available
                            continue

                        if repodata["type"] != requestRepoData["request"]:
                            # repo is of the wrong kind
                            continue

                        if "local" in requestRepoData and repodata["node"] != node:
                            # repo request is for a local repo, and this isn't one
                            continue

                        # success! this repo can be used
                        repoOptions.append(reponame)

                    if not repoOptions:
                        # no repo available, abort
                        chosenRepos = None
                        break

                    chosenRepos[requestRepoName] = random.choice(repoOptions)
                elif "local" in requestRepoData:
                    # Merely verifying that an already-claimed repo is local
                    if ("repo_" + requestRepoName) not in task["environment"]:
                        # no such repo even exists, this is confusing ;.;
                        chosenRepos = None
                        break

                    reponame = task["environment"]["repo_" + requestRepoName]
                    if reponame in self.repositories and self.repositories[reponame]["node"] != node:
                        # repo mismatch, abort
                        chosenRepos = None
                        break

            if chosenRepos is None:
                # couldn't find a repo :(
                continue

            taskToStart = task
            taskToStartRepos = chosenRepos

        if taskToStart is not None:
            # We are actually starting!

            # Lock repos, add to environment
            for envname, reponame in taskToStartRepos.items():
                self.repositories[reponame]["task"] = taskToStart["name"]
                taskToStart["environment"]["repo_" + envname] = reponame

            taskToStart["node"] = node
            taskToStart["status"] = "working"
            taskToStart["time-start"] = time.time()
            taskToStart["log"] = logdir + taskToStart["name"] + ".log"

            self.tasks.update_task(taskToStart)
            coordinator_db.repositories_save(self.repositories)

            startCommand = {
                "command": "task-run",
                "name": taskToStart["name"],
                "path": taskToStart["environment"]["repo_env"],
                "log": taskToStart["log"],
            }

            if "executable_pyscript" in taskToStart:
                startCommand["executable_pyscript"] = taskToStart["executable_pyscript"]
            if "executable_pyfile" in taskToStart:
                startCommand["executable_pyfile"] = taskToStart["executable_pyfile"]

            return startCommand