Example #1
0
class BuildDispatcher(multiprocessing.Process):
    """
    1) Fetch build task from frontend
    2) Get a free VM for it
    3) Create a worker for the job
    4) Start it asynchronously and go to 1)
    """
    def __init__(self, opts):
        multiprocessing.Process.__init__(self, name="build-dispatcher")

        self.opts = opts
        self.log = get_redis_logger(self.opts, "backend.build_dispatcher",
                                    "build_dispatcher")
        self.frontend_client = FrontendClient(self.opts, self.log)
        self.vm_manager = VmManager(self.opts)

        # Maps e.g. x86_64 && i386 => PC
        self.arch_to_group = dict()
        # PC => max N builders per user
        self.group_to_usermax = dict()

        self.init_internal_structures()

    def get_vm_group_id(self, arch):
        try:
            return self.arch_to_group[arch]
        except KeyError:
            raise DispatchBuildError("Unknown architecture {0}".format(arch))

    def update_process_title(self, msg=None):
        proc_title = "Build dispatcher"
        if msg:
            proc_title += " - " + msg
        setproctitle(proc_title)

    def init_internal_structures(self):
        self.arch_to_group = dict()
        self.group_to_usermax = dict()
        for group in self.opts.build_groups:
            group_id = group["id"]
            for arch in group["archs"]:
                self.arch_to_group[arch] = group_id
                self.log.debug("mapping {0} to {1} group".format(
                    arch, group_id))

            self.log.debug("user might use only {0}VMs for {1} group".format(
                group["max_vm_per_user"], group_id))
            self.group_to_usermax[group_id] = group["max_vm_per_user"]

    def load_job(self):
        """
        Retrieve a single build job from frontend.
        """
        self.log.info("Waiting for a job from frontend...")
        get_task_init_time = time.time()

        task = None
        while not task:
            self.update_process_title(
                "Waiting for a job from frontend for {} s".format(
                    int(time.time() - get_task_init_time)))
            try:
                r = get("{0}/backend/waiting/".format(
                    self.opts.frontend_base_url),
                        auth=("user", self.opts.frontend_auth))
                task = r.json().get("build")
            except (RequestException, ValueError) as error:
                self.log.exception(
                    "Retrieving build job from {} failed with error: {}".
                    format(self.opts.frontend_base_url, error))
            finally:
                if not task:
                    time.sleep(self.opts.sleeptime)

        self.log.info("Got new build job {}".format(task['task_id']))
        return BuildJob(task, self.opts)

    def can_build_start(self, job):
        """
        Announce to the frontend that the build is going to start so that
        it can confirm that and draw out another job for building.

        Returns
        -------
        True if the build can start
        False if the build can not start (build is cancelled)
        """
        try:
            can_build_start = self.frontend_client.starting_build(
                job.build_id, job.chroot)
        except (RequestException, ValueError) as error:
            self.log.exception(
                "Communication with Frontend to confirm build start failed with error: {}"
                .format(error))
            return False

        if not can_build_start:
            self.log.exception("Frontend forbade to start the job {}".format(
                job.task_id))

        return can_build_start

    def clean_finished_workers(self, workers):
        for worker in workers:
            if not worker.is_alive():
                worker.join(5)
                workers.remove(worker)
                self.log.info("Removed finished worker {} for job {}".format(
                    worker.worker_id, worker.job.task_id))

    def run(self):
        """
        Executes build dispatching process.
        """
        self.log.info("Build dispatching started.")
        self.update_process_title()

        workers = []
        next_worker_id = 1
        while True:
            self.clean_finished_workers(workers)

            job = self.load_job()

            try:
                self.log.info("Acquiring VM for job {}...".format(str(job)))
                vm_group_id = self.get_vm_group_id(job.arch)
                vm = self.vm_manager.acquire_vm(vm_group_id, job.project_owner,
                                                os.getpid(), job.task_id,
                                                job.build_id, job.chroot)
            except NoVmAvailable as error:
                self.log.info(
                    "No available resources for task {} (Reason: {}). Deferring job."
                    .format(job.task_id, error))
                self.frontend_client.defer_build(job.build_id, job.chroot)
                continue
            else:
                self.log.info("VM {} for job {} successfully acquired".format(
                    vm.vm_name, job.task_id))

            if not self.can_build_start(job):
                self.vm_manager.release_vm(vm.vm_name)
                continue

            worker = Worker(opts=self.opts,
                            frontend_client=self.frontend_client,
                            vm_manager=self.vm_manager,
                            worker_id=next_worker_id,
                            vm=vm,
                            job=job)
            workers.append(worker)
            worker.start()
            self.log.info("Started new worker {} for job {}".format(
                worker.worker_id, worker.job.task_id))
            next_worker_id = (next_worker_id + 1) % 2**15
Example #2
0
class BuildDispatcher(multiprocessing.Process):
    """
    1) Fetch build task from frontend
    2) Get a free VM for it
    3) Create a worker for the job
    4) Start it asynchronously and go to 1)
    """

    def __init__(self, opts):
        multiprocessing.Process.__init__(self, name="build-dispatcher")

        self.opts = opts
        self.log = get_redis_logger(self.opts, "backend.build_dispatcher", "build_dispatcher")
        self.frontend_client = FrontendClient(self.opts, self.log)
        self.vm_manager = VmManager(self.opts)

        # Maps e.g. x86_64 && i386 => PC
        self.arch_to_group = dict()
        # PC => max N builders per user
        self.group_to_usermax = dict()

        self.init_internal_structures()

    def get_vm_group_id(self, arch):
        try:
            return self.arch_to_group[arch]
        except KeyError:
            raise DispatchBuildError("Unknown architecture {0}".format(arch))

    def update_process_title(self, msg=None):
        proc_title = "Build dispatcher"
        if msg:
            proc_title += " - " + msg
        setproctitle(proc_title)

    def init_internal_structures(self):
           self.arch_to_group = dict()
           self.group_to_usermax = dict()
           for group in self.opts.build_groups:
               group_id = group["id"]
               for arch in group["archs"]:
                   self.arch_to_group[arch] = group_id
                   self.log.debug("mapping {0} to {1} group".format(arch, group_id))

               self.log.debug("user might use only {0}VMs for {1} group".format(group["max_vm_per_user"], group_id))
               self.group_to_usermax[group_id] = group["max_vm_per_user"]

    def load_job(self):
        """
        Retrieve a single build job from frontend.
        """
        self.log.info("Waiting for a job from frontend...")
        get_task_init_time = time.time()

        task = None
        while not task:
            self.update_process_title("Waiting for a job from frontend for {} s"
                                      .format(int(time.time() - get_task_init_time)))
            try:
                r = get("{0}/backend/waiting/".format(self.opts.frontend_base_url),
                        auth=("user", self.opts.frontend_auth))
                task = r.json().get("build")
            except (RequestException, ValueError) as error:
                self.log.exception("Retrieving build job from {} failed with error: {}"
                                   .format(self.opts.frontend_base_url, error))
            finally:
                if not task:
                    time.sleep(self.opts.sleeptime)

        self.log.info("Got new build job {}".format(task['task_id']))
        return BuildJob(task, self.opts)

    def acquire_vm_for_job(self, job, vm_group_id):
        return vm

    def can_build_start(self, job):
        """
        Announce to the frontend that the build is going to start so that
        it can confirm that and draw out another job for building.

        Returns
        -------
        True if the build can start
        False if the build can not start (build is cancelled)
        """
        try:
            can_build_start = self.frontend_client.starting_build(job.build_id, job.chroot)
        except (RequestException, ValueError) as error:
            self.log.exception("Communication with Frontend to confirm build start failed with error: {}".format(error))
            return False

        if not can_build_start:
            self.log.exception("Frontend forbade to start the job {}".format(self.job.task_id))

        return can_build_start

    def join_finished_workers(self, workers):
        for worker in workers:
            if not worker.is_alive():
                worker.join(5)
                workers.remove(worker)
                self.log.info("Removed finished worker {} for job {}"
                              .format(worker.worker_id, worker.job.task_id))

    def run(self):
        """
        Executes build dispatching process.
        """
        self.log.info("Build dispatching started.")
        self.update_process_title()

        workers = []
        next_worker_id = 1
        while True:
            self.join_finished_workers(workers)

            job = self.load_job()

            try:
                self.log.info("Acquiring VM for job {}...".format(str(job)))
                vm_group_id = self.get_vm_group_id(job.arch)
                vm = self.vm_manager.acquire_vm(vm_group_id, job.project_owner, os.getpid(),
                                                job.task_id, job.build_id, job.chroot)
            except NoVmAvailable as error:
                self.log.info("No available resources for task {} (Reason: {}). Deferring job."
                              .format(job.task_id, error))
                self.frontend_client.defer_build(job.build_id, job.chroot)
                continue
            else:
                self.log.info("VM {} for job {} successfully acquired".format(vm.vm_name, job.task_id))

            if not self.can_build_start(job):
                self.vm_manager.release_vm(vm.vm_name)
                continue

            worker = Worker(
                opts=self.opts,
                frontend_client=self.frontend_client,
                vm_manager=self.vm_manager,
                worker_id=next_worker_id,
                vm=vm, job=job
            )
            worker.start()
            workers.append(worker)
            self.log.info("Started new worker {} for job {}"
                          .format(worker.worker_id, worker.job.task_id))
            next_worker_id = (next_worker_id + 1) % 2**15