Python Queue.dequeue Examples

Programming Language: Python

Namespace/Package Name: retask.queue

Class/Type: Queue

Method/Function: dequeue

Examples at hotexamples.com: 13

Python Queue.dequeue - 13 examples found. These are the top rated real world Python examples of retask.queue.Queue.dequeue extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Queue(16)

connect(16)

enqueue(8)

dequeue(5)

wait(3)

send(2)

Example #1

Show file

File: worker.py Project: pep8bot/pep8bot

class Worker(object):
    """ Represents the worker process.  Waits for tasks to come in from the
    webapp and then acts on them.
    """

    def __init__(self):
        self.queue = Queue('commits')
        self.queue.connect()
        # TODO -- set both of these with the config file.
        # Use pyramid tools to load config.
        self.sleep_interval = 1
        self.scratch_dir = "/home/threebean/scratch/pep8bot-scratch"
        try:
            os.makedirs(self.scratch_dir)
        except OSError:
            pass  # Assume that the scratch_dir already exists.

    def run(self):
        while True:
            time.sleep(self.sleep_interval)
            print "Waking"
            if self.queue.length == 0:
                continue

            task = self.queue.dequeue()
            data = task.data
            url = data['repository']['url']

            # TODO -- don't clone this url.  But fork and clone our url.

            name = data['repository']['name']
            owner = data['repository']['owner']['name']
            self.working_dir = tempfile.mkdtemp(
                prefix=owner + '-' + name,
                dir=self.scratch_dir,
            )
            print "** Cloning to", self.working_dir
            print sh.git.clone(url, self.working_dir)
            print "** Processing files."
            for root, dirs, files in os.walk(self.working_dir):

                if '.git' in root:
                    continue

                for filename in files:
                    if filename.endswith(".py"):
                        infile = root + "/" + filename
                        print "** Tidying", infile
                        tmpfile = infile + ".bak"
                        script = os.path.expanduser(
                            "~/devel/PythonTidy/PythonTidy.py"
                        )
                        sh.python(script, infile, tmpfile)
                        shutil.move(tmpfile, infile)

            with directory(self.working_dir):
                print sh.pwd()
                print sh.git.status()

Example #2

Show file

def monitor_buildqueue():
    """
    This function monitors the build queue.

    If the build is still on then it puts it back to the queue.
    If the build is finished then it goes to the job queue.
    """
    key = get_key('darkbuildqueue')
    config = get_redis_config()
    jobqueue = Queue('jobqueue', config)
    jobqueue.connect()
    buildqueue = Queue('buildqueue', config)
    buildqueue.connect()
    rdb = redis_connection()
    if not rdb:
        log(key, 'redis is missing', 'error')
        return None
    rdb.set('darkbuildqueue-status', '1')
    while True:
        if check_shutdown():
            break
        try:
            time.sleep(60)
            length = buildqueue.length
            if length == 0:
                log(key, "Sleeping, no buildqueue job", 'info')
                time.sleep(60)
                continue
            task = buildqueue.dequeue()
            kojiurl = task.data['kojiurl']
            idx = task.data['jobid']
            kc = koji.ClientSession(kojiurl, {'debug': False, 'password': None,\
                            'debug_xmlrpc': False, 'user': None})

            res = kc.getBuild(idx)
            if not res:
                #We reached to the new build yet to start
                #Time to sleep
                log(key, "build deleted %s" % idx, 'error')
                continue
            if res['state'] == 1:
                #completed build now push to our redis queue
                jobqueue.enqueue(task)
                log(key, "in job queue %s" % idx, 'info')
                continue

            if res['state'] == 0:
                #building state
                buildqueue.enqueue(task)
                log(key, "in build queue %s" % idx, 'info')
                continue

        except Exception, error:
            log(key, str(error), 'error')

Example #3

Show file

if __name__ == '__main__':
    libimporter.loadconfig()
    create_rundir()
    key = 'darkjobworker'
    config = get_redis_config()
    jobqueue = Queue('jobqueue', config)
    jobqueue.connect()
    log_status('darkjobworker', 'Starting worker module')
    while True:

        if jobqueue.length == 0:
            log(key, "Sleeping, no jobqueue job", 'info')
            time.sleep(60)
            continue
        try:
            task = jobqueue.dequeue()
            if not task:
                continue
            instance = task.data['instance']
            idx = task.data['build_id']
            distro = task.data['release']
            utils.msgtext = task.data['instance']
            log(key, "Import started %s" % idx, 'info')
            do_buildid_import(instance, idx, distro, key)
            log(key, "Import finished %s" % idx, 'info')
        except Exception, err:
            log(key, str(err), 'error')
        print "one more done or crashed"
    remove_redis_keys('darkjobworker')

Example #4

Show file

File: jobgrabcontrol.py Project: danvratil/copr

class Channel(object):
    """
    Abstraction above retask (the set of "channels" between backend(s),
    jobgrabber and workers).  We could use multiple backends and/or diffferent
    "atomic" medium (other implemntation than Queue) in future.  But
    make sure nobody needs to touch the "medium" directly.
    """

    def __init__(self, opts, log=None):
        self.log = log
        self.opts = opts
        # channel for Backend <--> JobGrabber communication
        self.jg_start = Queue("jg_control_start")
        # channel for JobGrabber <--> [[Builders]] communication
        self.build_queues = dict()
        while not self.jg_start.connect():
            wait_log(self.log, "waiting for redis", 5)

    def _get_queue(self, bgroup):
        if not bgroup in self.build_queues:
            q_id = "copr-be-{0}".format(bgroup)
            q = Queue(q_id)
            if not q.connect():
                # As we already connected to jg_control_message, this should
                # be also OK.
                raise Exception("can't connect to redis, should never happen!")
            return q

        return self.build_queues[bgroup]

    def add_build(self, bgroup, build):
        """ this should be used by job_grab only for now """
        q = self._get_queue(bgroup)
        try:
            q.enqueue(Task(build))
        except Exception as err:
            # I've seen isses Task() was not able to jsonify urllib exceptions
            if not self.log:
                return False
            self.log.error("can't enqueue build {0}, reason:\n{1}".format(
                build, err
            ))

        return True

    # Builder's API
    def get_build(self, bgroup):
        """
        Return task from queue or return 0
        """
        q = self._get_queue(bgroup)
        t = q.dequeue()
        return t.data if t else None

    # JobGrab's API
    def backend_started(self):
        return self.jg_start.length

    def job_graber_initialized(self):
        while self.jg_start.dequeue():
            pass

    def remove_all_builds(self):
        for bgroup in self.build_queues:
            q = self._get_queue(bgroup)
            while q.dequeue():
                pass
        self.build_queues = dict()

    # Backend's API
    def backend_start(self):
        """ Notify jobgrab about service start. """
        if not self.jg_start.enqueue(Task("start")):
             raise Exception("can't append to retask queue, should never happen!")

        while self.jg_start.length:
            wait_log(self.log, "waiting until jobgrabber initializes queue")

Example #5

Show file

File: tests.py Project: d1ffuz0r/retask

 def runTest(self):
     queue = Queue('testqueue')
     queue.connect()
     task = queue.dequeue()
     i = task.data
     self.assertEqual(task.data['name'], u'kushal')

Example #6

Show file

File: dispatcher.py Project: 1dot75cm/Copr

class Worker(multiprocessing.Process):

    """
    Worker process dispatches building tasks. Backend spin-up multiple workers, each
    worker associated to one group_id and process one task at the each moment.

    Worker listens for the new tasks from :py:class:`retask.Queueu` associated with its group_id

    :param Bunch opts: backend config
    :param queue: (:py:class:`multiprocessing.Queue`) queue to announce new events
    :param int worker_num: worker number
    :param int group_id: group_id from the set of groups defined in config
    :param callback: callback object to handle internal workers events. Should implement method ``log(msg)``.
    :param lock: (:py:class:`multiprocessing.Lock`) global backend lock

    """

    def __init__(self, opts, events, worker_num, group_id,
                 callback=None, lock=None):

        # base class initialization
        multiprocessing.Process.__init__(self, name="worker-builder")

        self.opts = opts

        # job management stuff
        self.task_queue = Queue("copr-be-{0}".format(str(group_id)))
        self.task_queue.connect()
        # event queue for communicating back to dispatcher
        self.events = events
        self.worker_num = worker_num
        self.group_id = group_id

        self.kill_received = False
        self.lock = lock
        self.frontend_callback = FrontendClient(opts, events)
        self.callback = callback
        if not self.callback:
            log_name = "worker-{0}-{1}.log".format(
                self.group_name,
                self.worker_num)

            self.logfile = os.path.join(self.opts.worker_logdir, log_name)
            self.callback = WorkerCallback(logfile=self.logfile)

        self.vm_name = None
        self.vm_ip = None
        self.callback.log("creating worker: dynamic ip")

    @property
    def group_name(self):
        try:
            return self.opts.build_groups[self.group_id]["name"]
        except Exception as error:
            self.callback.log("Failed to get builder group name from config, using group_id as name."
                              "Original error: {}".format(error))
            return self.group_id

    def event(self, topic, template, content=None):
        """ Multi-purpose logging method.

        Logs messages to three different destinations:
            - To log file
            - The internal "events" queue for communicating back to the
              dispatcher.
            - The fedmsg bus.  Messages are posted asynchronously to a
              zmq.PUB socket.

        """

        content = content or {}
        what = template.format(**content)
        who = "worker-{0}".format(self.worker_num)

        self.callback.log("event: who: {0}, what: {1}".format(who, what))
        self.events.put({"when": time.time(), "who": who, "what": what})

        if self.opts.fedmsg_enabled and fedmsg:
            content["who"] = who
            content["what"] = what
            try:
                fedmsg.publish(modname="copr", topic=topic, msg=content)
            # pylint: disable=W0703
            except Exception as e:
                # XXX - Maybe log traceback as well with traceback.format_exc()
                self.callback.log("failed to publish message: {0}".format(e))

    def _announce_start(self, job):
        """
        Announce everywhere that a build process started now.
        """
        job.started_on = time.time()
        self.mark_started(job)

        template = "build start: user:{user} copr:{copr}" \
            "pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(user=job.submitter, copr=job.project_name,
                       owner=job.project_owner, pkg=job.pkg_name,
                       build=job.build_id, ip=self.vm_ip, pid=self.pid)
        self.event("build.start", template, content)

        template = "chroot start: chroot:{chroot} user:{user}" \
            "copr:{copr} pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(chroot=job.chroot, user=job.submitter,
                       owner=job.project_owner, pkg=job.pkg_name,
                       copr=job.project_name, build=job.build_id,
                       ip=self.vm_ip, pid=self.pid)

        self.event("chroot.start", template, content)

    def _announce_end(self, job):
        """
        Announce everywhere that a build process ended now.
        """
        job.ended_on = time.time()

        self.return_results(job)
        self.callback.log("worker finished build: {0}".format(self.vm_ip))
        template = "build end: user:{user} copr:{copr} build:{build}" \
            "  pkg: {pkg}  version: {version} ip:{ip}  pid:{pid} status:{status}"

        content = dict(user=job.submitter, copr=job.project_name,
                       owner=job.project_owner,
                       pkg=job.pkg_name, version=job.pkg_version,
                       build=job.build_id, ip=self.vm_ip, pid=self.pid,
                       status=job.status, chroot=job.chroot)
        self.event("build.end", template, content)

    def run_ansible_playbook(self, args, name="running playbook", attempts=9):
        """
        Call ansible playbook:

            - well mostly we run out of space in OpenStack so we rather try
              multiple times (attempts param)
            - dump any attempt failure
        """

        # Ansible playbook python API does not work here, dunno why.  See:
        # https://groups.google.com/forum/#!topic/ansible-project/DNBD2oHv5k8

        command = "{0} {1}".format(ansible_playbook, args)

        result = None
        for i in range(0, attempts):
            try:
                attempt_desc = ": retry: " if i > 0 else ": begin: "
                self.callback.log(name + attempt_desc + command)
                result = subprocess.check_output(command, shell=True)
                self.callback.log("Raw playbook output:\n{0}\n".format(result))
                break

            except CalledProcessError as e:
                self.callback.log("CalledProcessError: \n{0}\n".format(e.output))
                sys.stderr.write("{0}\n".format(e.output))
                # FIXME: this is not purpose of opts.sleeptime
                time.sleep(self.opts.sleeptime)

        self.callback.log(name + ": end")
        return result

    def validate_vm(self):
        """
        Test connectivity to the VM

        :param ipaddr: ip address to the newly created VM
        :raises: :py:class:`~backend.exceptions.CoprWorkerSpawnFailError`: validation fails
        """
        # we were getting some dead instances
        # that's why I'm testing the connectivity here
        runner_options = dict(
            remote_user="******",
            host_list="{},".format(self.vm_ip),
            pattern=self.vm_ip,
            forks=1,
            transport=self.opts.ssh.transport,
            timeout=500
        )
        connection = ansible.runner.Runner(**runner_options)
        connection.module_name = "shell"
        connection.module_args = "echo hello"

        try:
            res = connection.run()
        except Exception as exception:
            raise CoprWorkerSpawnFailError(
                "Failed to check created VM ({})"
                "due to ansible error: {}".format(self.vm_ip, exception))

        if self.vm_ip not in res.get("contacted", {}):
            self.callback.log(
                "Worker is not responding to the testing playbook. Terminating it."
                "Runner options: {}".format(runner_options) +
                "Ansible raw response:\n{}".format(res))
            raise CoprWorkerSpawnFailError("Created VM ({}) was unresponsive "
                                           "and therefore terminated".format(self.vm_ip))

    def try_spawn(self, args):
        """
        Tries to spawn new vm using ansible

        :param args: ansible for ansible command which spawns VM
        :return str: valid ip address of new machine (nobody guarantee machine availability)
        """
        result = self.run_ansible_playbook(args, "spawning instance")
        if not result:
            raise CoprWorkerSpawnFailError("No result, trying again")
        match = re.search(r'IP=([^\{\}"]+)', result, re.MULTILINE)

        if not match:
            raise CoprWorkerSpawnFailError("No ip in the result, trying again")
        ipaddr = match.group(1)
        match = re.search(r'vm_name=([^\{\}"]+)', result, re.MULTILINE)

        if match:
            self.vm_name = match.group(1)
        self.callback.log("got instance ip: {0}".format(ipaddr))

        try:
            IP(ipaddr)
        except ValueError:
            # if we get here we"re in trouble
            msg = "Invalid IP back from spawn_instance - dumping cache output\n"
            msg += str(result)
            raise CoprWorkerSpawnFailError(msg)

        return ipaddr

    def spawn_instance(self):
        """
        Spawn new VM, executing the following steps:

            - call the spawn playbook to startup/provision a building instance
            - get an IP and test if the builder responds
            - repeat this until you get an IP of working builder

        :param BuildJob job:
        :return ip: of created VM
        :return None: if couldn't find playbook to spin ip VM
        """

        start = time.time()

        # Ansible playbook python API does not work here, dunno why.  See:
        # https://groups.google.com/forum/#!topic/ansible-project/DNBD2oHv5k8

        try:
            spawn_playbook = self.opts.build_groups[self.group_id]["spawn_playbook"]
        except KeyError:
            return

        spawn_args = "-c ssh {}".format(spawn_playbook)

        # TODO: replace with for i in range(MAX_SPAWN_TRIES): ... else raise FatalError
        i = 0
        while self.vm_ip is None:
            i += 1
            try:
                self.callback.log("Spawning a builder. Try No. {0}".format(i))

                self.vm_ip = self.try_spawn(spawn_args)
                self.update_process_title()
                try:
                    self.validate_vm()
                except CoprWorkerSpawnFailError:
                    self.terminate_instance()
                    raise

                self.callback.log("Instance spawn/provision took {0} sec"
                                  .format(time.time() - start))

            except CoprWorkerSpawnFailError as exception:
                self.callback.log("VM Spawn attempt failed with message: {}"
                                  .format(exception.msg))

    def terminate_instance(self):
        """
        Call the terminate playbook to destroy the building instance
        """
        self.update_process_title(suffix="Terminating VM")
        term_args = {}
        if "ip" in self.opts.terminate_vars:
            term_args["ip"] = self.vm_ip
        if "vm_name" in self.opts.terminate_vars:
            term_args["vm_name"] = self.vm_name

        try:
            playbook = self.opts.build_groups[self.group_id]["terminate_playbook"]
        except KeyError:
            self.callback.log(
                "Fatal error: no terminate playbook for group_id: {}; exiting"
                .format(self.group_id))
            sys.exit(255)

        # args = "-c ssh -i '{0},' {1} {2}".format(
        args = "-c ssh {} {}".format(
            # self.vm_ip,
            playbook,
            ans_extra_vars_encode(term_args, "copr_task"))

        try:
            self.run_ansible_playbook(args, "terminate instance")
        except Exception as error:
            self.callback.log("Failed to terminate an instance: vm_name={}, vm_ip={}. Original error: {}"
                              .format(self.vm_name, self.vm_ip, error))

        # TODO: should we check that machine was destroyed?
        self.vm_ip = None
        self.vm_name = None
        self.update_process_title()

    def mark_started(self, job):
        """
        Send data about started build to the frontend
        """

        job.status = 3  # running
        build = job.to_dict()
        self.callback.log("build: {}".format(build))

        data = {"builds": [build]}
        try:
            self.frontend_callback.update(data)
        except:
            raise CoprWorkerError(
                "Could not communicate to front end to submit status info")

    def return_results(self, job):
        """
        Send the build results to the frontend
        """
        self.callback.log(
            "{0} status {1}. Took {2} seconds".format(
                job.build_id, job.status, job.ended_on - job.started_on))

        self.callback.log("build: {}".format(job.to_dict()))
        data = {"builds": [job.to_dict()]}

        try:
            self.frontend_callback.update(data)
        except Exception as err:
            raise CoprWorkerError(
                "Could not communicate to front end to submit results: {}"
                .format(err)
            )

    def starting_build(self, job):
        """
        Announce to the frontend that a build is starting.

        :return True: if the build can start
        :return False: if the build can not start (build is cancelled)
        """

        try:
            can_start = self.frontend_callback.starting_build(job.build_id, job.chroot)
        except Exception as err:
            raise CoprWorkerError(
                "Could not communicate to front end to submit results: {}"
                .format(err)
            )

        return can_start

    @classmethod
    def pkg_built_before(cls, pkg, chroot, destdir):
        """
        Check whether the package has already been built in this chroot.
        """
        s_pkg = os.path.basename(pkg)
        pdn = s_pkg.replace(".src.rpm", "")
        resdir = "{0}/{1}/{2}".format(destdir, chroot, pdn)
        resdir = os.path.normpath(resdir)
        if os.path.exists(resdir) and os.path.exists(os.path.join(resdir, "success")):
            return True
        return False

    def spawn_instance_with_check(self):
        """
        Wrapper around self.spawn_instance() with exception checking

        :param BuildJob job:

        :return str: ip of spawned vm
        :raises:

            - :py:class:`~backend.exceptions.CoprWorkerError`: spawn function doesn't return ip
            - :py:class:`AnsibleError`: failure during anible command execution
        """
        self.update_process_title(suffix="Spawning a new VM")
        try:
            self.spawn_instance()
            if not self.vm_ip:
                # TODO: maybe add specific exception?
                raise CoprWorkerError(
                    "No IP found from creating instance")
        except AnsibleError as e:
            register_build_result(self.opts, failed=True)

            self.callback.log("failure to setup instance: {0}".format(e))
            raise

    def init_fedmsg(self):
        """
        Initialize Fedmsg
        (this assumes there are certs and a fedmsg config on disk)
        """

        if not (self.opts.fedmsg_enabled and fedmsg):
            return

        try:
            fedmsg.init(name="relay_inbound", cert_prefix="copr", active=True)
        except Exception as e:
            self.callback.log(
                "failed to initialize fedmsg: {0}".format(e))

    def on_pkg_skip(self, job):
        """
        Handle package skip
        """
        self._announce_start(job)
        self.callback.log(
            "Skipping: package {0} has been already built before.".format(job.pkg))
        job.status = BuildStatus.SKIPPED  # skipped
        self._announce_end(job)

    def obtain_job(self):
        """
        Retrieves new build task from queue.
        Checks if the new job can be started and not skipped.
        """
        self.update_process_title(suffix="No task")

        # this sometimes caused TypeError in random worker
        # when another one  picekd up a task to build
        # why?
        try:
            task = self.task_queue.dequeue()
        except TypeError:
            return
        if not task:
            return

        # import ipdb; ipdb.set_trace()
        job = BuildJob(task.data, self.opts)

        self.update_process_title(suffix="Task: {} chroot: {}".format(job.build_id, job.chroot))

        # Checking whether the build is not cancelled
        if not self.starting_build(job):
            return

        # Checking whether to build or skip
        if self.pkg_built_before(job.pkg, job.chroot, job.destdir):
            self.on_pkg_skip(job)
            return

        # FIXME
        # this is our best place to sanity check the job before starting
        # up any longer process

        return job

    def do_job(self, job):
        """
        Executes new job.

        :param job: :py:class:`~backend.job.BuildJob`
        """
        self._announce_start(job)
        status = BuildStatus.SUCCEEDED
        chroot_destdir = os.path.normpath(job.destdir + '/' + job.chroot)

        # setup our target dir locally
        if not os.path.exists(chroot_destdir):
            try:
                os.makedirs(chroot_destdir)
            except (OSError, IOError) as e:
                msg = "Could not make results dir" \
                      " for job: {0} - {1}".format(chroot_destdir, str(e))

                self.callback.log(msg)
                status = BuildStatus.FAILURE

        if status == BuildStatus.SUCCEEDED:
            # FIXME
            # need a plugin hook or some mechanism to check random
            # info about the pkgs
            # this should use ansible to download the pkg on
            # the remote system
            # and run a series of checks on the package before we
            # start the build - most importantly license checks.

            self.callback.log(
                "Starting build: id={0} builder={1} timeout={2} destdir={3}"
                " chroot={4} repos={5}"
                .format(job.build_id, self.vm_ip, job.timeout, job.destdir,
                        job.chroot, str(job.repos)))

            self.callback.log("Building pkgs: {0}".format(job.pkg))

            chroot_repos = list(job.repos)
            chroot_repos.append(job.results + job.chroot + '/')
            chroot_repos.append(job.results + job.chroot + '/devel/')

            chroot_logfile = "{0}/build-{1}.log".format(
                chroot_destdir, job.build_id)

            macros = {
                "copr_username": job.project_owner,
                "copr_projectname": job.project_name,
                "vendor": "Fedora Project COPR ({0}/{1})".format(
                    job.project_owner, job.project_name)
            }

            try:
                mr = MockRemote(
                    builder_host=self.vm_ip, job=job, repos=chroot_repos,
                    macros=macros, opts=self.opts, lock=self.lock,
                    callback=CliLogCallBack(quiet=True, logfn=chroot_logfile),
                )
                mr.check()

                build_details = mr.build_pkg()
                job.update(build_details)

                if self.opts.do_sign:
                    mr.add_pubkey()

                register_build_result(self.opts)

            except MockRemoteError as e:
                # record and break
                self.callback.log("{0} - {1}".format(self.vm_ip, e))
                status = BuildStatus.FAILURE
                register_build_result(self.opts, failed=True)

            self.callback.log(
                "Finished build: id={0} builder={1} timeout={2} destdir={3}"
                " chroot={4} repos={5}"
                .format(job.build_id, self.vm_ip, job.timeout, job.destdir,
                        job.chroot, str(job.repos)))

        job.status = status
        self._announce_end(job)
        self.update_process_title(suffix="Task: {} chroot: {} done"
                                  .format(job.build_id, job.chroot))

    def check_vm_still_alive(self):
        """
        Ensure that if we have vm_ip it is alive.
        Terminates unresponsive instance.
        """
        if self.vm_ip:
            # TODO: extract method: check_vm_still_alive
            try:
                self.validate_vm()
            except CoprWorkerSpawnFailError:
                self.terminate_instance()

    def update_process_title(self, suffix=None):
        title = "worker-{} {} ".format(self.group_name, self.worker_num)
        if self.vm_ip:
            title += "VM_IP={} ".format(self.vm_ip)
        if self.vm_name:
            title += "VM_NAME={} ".format(self.vm_name)
        if suffix:
            title += str(suffix)

        setproctitle(title)

    def run(self):
        """
        Worker should startup and check if it can function
        for each job it takes from the jobs queue
        run opts.setup_playbook to create the instance
        do the build (mockremote)
        terminate the instance.

        """
        self.init_fedmsg()

        while not self.kill_received:
            self.update_process_title()
            self.check_vm_still_alive()

            if self.opts.spawn_in_advance and not self.vm_ip:
                self.spawn_instance_with_check()

            job = self.obtain_job()
            if not job:
                time.sleep(self.opts.sleeptime)
                continue

            if not self.vm_ip:
                self.spawn_instance_with_check()

            try:
                self.do_job(job)
            except Exception as error:
                self.callback.log("Unhandled build error: {}".format(error))
            finally:
                # clean up the instance
                self.terminate_instance()

Example #7

Show file

File: dispatcher.py Project: evilkost/copr

class Worker(multiprocessing.Process):
    """
    Worker process dispatches building tasks. Backend spin-up multiple workers, each
    worker associated to one group_id and process one task at the each moment.

    Worker listens for the new tasks from :py:class:`retask.Queue` associated with its group_id

    :param Munch opts: backend config
    :param int worker_num: worker number
    :param int group_id: group_id from the set of groups defined in config

    """

    def __init__(self, opts, frontend_client, worker_num, group_id):

        # base class initialization
        multiprocessing.Process.__init__(self, name="worker-builder")

        self.opts = opts
        self.worker_num = worker_num
        self.group_id = group_id

        self.log = get_redis_logger(self.opts, self.logger_name, "worker")

        # job management stuff
        self.task_queue = Queue("copr-be-{0}".format(str(group_id)))
        self.task_queue.connect()
        # event queue for communicating back to dispatcher

        self.kill_received = False

        self.frontend_client = frontend_client
        self.vm_name = None
        self.vm_ip = None

        self.rc = None
        self.vmm = VmManager(self.opts)

    @property
    def logger_name(self):
        return "backend.worker-{}-{}".format(self.group_name, self.worker_num)

    @property
    def group_name(self):
        try:
            return self.opts.build_groups[self.group_id]["name"]
        except Exception as error:
            self.log.exception("Failed to get builder group name from config, using group_id as name."
                               "Original error: {}".format(error))
            return str(self.group_id)

    def fedmsg_notify(self, topic, template, content=None):
        """
        Publish message to fedmsg bus when it is available
        :param topic:
        :param template:
        :param content:
        """
        if self.opts.fedmsg_enabled and fedmsg:

            who = "worker-{0}".format(self.worker_num)

            content = content or {}
            content["who"] = who
            content["what"] = template.format(**content)

            try:
                fedmsg.publish(modname="copr", topic=topic, msg=content)
            # pylint: disable=W0703
            except Exception as e:
                self.log.exception("failed to publish message: {0}".format(e))

    def _announce_start(self, job):
        """
        Announce everywhere that a build process started now.
        """
        job.started_on = time.time()
        self.mark_started(job)

        template = "build start: user:{user} copr:{copr}" \
            "pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(user=job.submitter, copr=job.project_name,
                       owner=job.project_owner, pkg=job.package_name,
                       build=job.build_id, ip=self.vm_ip, pid=self.pid)
        self.fedmsg_notify("build.start", template, content)

        template = "chroot start: chroot:{chroot} user:{user}" \
            "copr:{copr} pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(chroot=job.chroot, user=job.submitter,
                       owner=job.project_owner, pkg=job.package_name,
                       copr=job.project_name, build=job.build_id,
                       ip=self.vm_ip, pid=self.pid)

        self.fedmsg_notify("chroot.start", template, content)

    def _announce_end(self, job):
        """
        Announce everywhere that a build process ended now.
        """
        job.ended_on = time.time()

        self.return_results(job)
        self.log.info("worker finished build: {0}".format(self.vm_ip))
        template = "build end: user:{user} copr:{copr} build:{build}" \
            "  pkg: {pkg}  version: {version} ip:{ip}  pid:{pid} status:{status}"

        content = dict(user=job.submitter, copr=job.project_name,
                       owner=job.project_owner,
                       pkg=job.package_name, version=job.package_version,
                       build=job.build_id, ip=self.vm_ip, pid=self.pid,
                       status=job.status, chroot=job.chroot)
        self.fedmsg_notify("build.end", template, content)

    def mark_started(self, job):
        """
        Send data about started build to the frontend
        """

        job.status = BuildStatus.RUNNING
        build = job.to_dict()
        self.log.info("starting build: {}".format(build))

        data = {"builds": [build]}
        try:
            self.frontend_client.update(data)
        except:
            raise CoprWorkerError(
                "Could not communicate to front end to submit status info")

    def return_results(self, job):
        """
        Send the build results to the frontend
        """
        self.log.info("Build {} finished with status {}. Took {} seconds"
                      .format(job.build_id, job.status, job.ended_on - job.started_on))

        data = {"builds": [job.to_dict()]}

        try:
            self.frontend_client.update(data)
        except Exception as err:
            raise CoprWorkerError(
                "Could not communicate to front end to submit results: {}"
                .format(err)
            )

    def starting_build(self, job):
        """
        Announce to the frontend that a build is starting.
        Checks if we can and/or should start job

        :return True: if the build can start
        :return False: if the build can not start (build is cancelled)
        """

        try:
            return self.frontend_client.starting_build(job.build_id, job.chroot)
        except Exception as err:
            msg = "Could not communicate to front end to confirm build start"
            self.log.exception(msg)
            raise CoprWorkerError(msg)

    @classmethod
    def pkg_built_before(cls, pkg, chroot, destdir):
        """
        Check whether the package has already been built in this chroot.
        """
        s_pkg = os.path.basename(pkg)
        pdn = s_pkg.replace(".src.rpm", "")
        resdir = "{0}/{1}/{2}".format(destdir, chroot, pdn)
        resdir = os.path.normpath(resdir)
        if os.path.exists(resdir) and os.path.exists(os.path.join(resdir, "success")):
            return True
        return False

    def init_fedmsg(self):
        """
        Initialize Fedmsg
        (this assumes there are certs and a fedmsg config on disk)
        """

        if not (self.opts.fedmsg_enabled and fedmsg):
            return

        try:
            fedmsg.init(name="relay_inbound", cert_prefix="copr", active=True)
        except Exception as e:
            self.log.exception("Failed to initialize fedmsg: {}".format(e))

    # TODO: doing skip logic on fronted during @start_build query
    # def on_pkg_skip(self, job):
    #     """
    #     Handle package skip
    #     """
    #     self._announce_start(job)
    #     self.log.info("Skipping: package {} has been already built before.".format(job.pkg))
    #     job.status = BuildStatus.SKIPPED
    #     self.notify_job_grab_about_task_end(job)
    #     self._announce_end(job)

    def obtain_job(self):
        """
        Retrieves new build task from queue.
        Checks if the new job can be started and not skipped.
        """
        # ToDo: remove retask, use redis lua fsm logic similiar to VMM
        # this sometimes caused TypeError in random worker
        # when another one  picekd up a task to build
        # why?
        try:
            task = self.task_queue.dequeue()
        except TypeError:
            return
        if not task:
            return

        job = BuildJob(task.data, self.opts)
        self.update_process_title(suffix="Task: {} chroot: {}, obtained at {}"
                                  .format(job.build_id, job.chroot, str(datetime.now())))

        return job

    def do_job(self, job):
        """
        Executes new job.

        :param job: :py:class:`~backend.job.BuildJob`
        """

        self._announce_start(job)
        self.update_process_title(suffix="Task: {} chroot: {} build started"
                                  .format(job.build_id, job.chroot))
        status = BuildStatus.SUCCEEDED

        # setup our target dir locally
        if not os.path.exists(job.chroot_dir):
            try:
                os.makedirs(job.chroot_dir)
            except (OSError, IOError):
                self.log.exception("Could not make results dir for job: {}"
                                   .format(job.chroot_dir))
                status = BuildStatus.FAILURE

        self.clean_result_directory(job)

        if status == BuildStatus.SUCCEEDED:
            # FIXME
            # need a plugin hook or some mechanism to check random
            # info about the pkgs
            # this should use ansible to download the pkg on
            # the remote system
            # and run a series of checks on the package before we
            # start the build - most importantly license checks.

            self.log.info("Starting build: id={} builder={} job: {}"
                          .format(job.build_id, self.vm_ip, job))

            with local_file_logger(
                "{}.builder.mr".format(self.logger_name),
                job.chroot_log_path,
                fmt=build_log_format) as build_logger:
                try:
                    mr = MockRemote(
                        builder_host=self.vm_ip,
                        job=job,
                        logger=build_logger,
                        opts=self.opts
                    )
                    mr.check()

                    build_details = mr.build_pkg_and_process_results()
                    job.update(build_details)

                    if self.opts.do_sign:
                        mr.add_pubkey()

                    register_build_result(self.opts)

                except MockRemoteError as e:
                    # record and break
                    self.log.exception(
                        "Error during the build, host={}, build_id={}, chroot={}, error: {}"
                        .format(self.vm_ip, job.build_id, job.chroot, e)
                    )
                    status = BuildStatus.FAILURE
                    register_build_result(self.opts, failed=True)

            self.log.info(
                "Finished build: id={} builder={} timeout={} destdir={}"
                " chroot={} repos={}"
                .format(job.build_id, self.vm_ip, job.timeout, job.destdir,
                        job.chroot, str(job.repos)))

            self.copy_mock_logs(job)

        job.status = status
        self._announce_end(job)
        self.update_process_title(suffix="Task: {} chroot: {} done"
                                  .format(job.build_id, job.chroot))

    def copy_mock_logs(self, job):
        if not os.path.isdir(job.results_dir):
            self.log.info("Job results dir doesn't exists, couldn't copy main log; path: {}"
                          .format(job.results_dir))
            return

        log_names = [(job.chroot_log_name, "mockchain.log.gz"),
                     (job.rsync_log_name, "rsync.log.gz")]

        for src_name, dst_name in log_names:
            src = os.path.join(job.chroot_dir, src_name)
            dst = os.path.join(job.results_dir, dst_name)
            try:
                with open(src, "rb") as f_src, gzip.open(dst, "wb") as f_dst:
                    f_dst.writelines(f_src)
            except IOError:
                self.log.info("File {} not found".format(src))

    def clean_result_directory(self, job):
        """
        Create backup directory and move there results from previous build.
        """
        if not os.path.exists(job.results_dir) or os.listdir(job.results_dir) == []:
            return

        backup_dir_name = "prev_build_backup"
        backup_dir = os.path.join(job.results_dir, backup_dir_name)
        self.log.info("Cleaning target directory, results from previous build storing in {}"
                      .format(backup_dir))

        if not os.path.exists(backup_dir):
            os.makedirs(backup_dir)

        files = (x for x in os.listdir(job.results_dir) if x != backup_dir_name)
        for filename in files:
            file_path = os.path.join(job.results_dir, filename)
            if os.path.isfile(file_path):
                if file_path.endswith((".info", ".log", ".log.gz")):
                    os.rename(file_path, os.path.join(backup_dir, filename))

                elif not file_path.endswith(".rpm"):
                    os.remove(file_path)
            else:
                shutil.rmtree(file_path)

    def update_process_title(self, suffix=None):
        title = "worker-{} {} ".format(self.group_name, self.worker_num)
        if self.vm_ip:
            title += "VM_IP={} ".format(self.vm_ip)
        if self.vm_name:
            title += "VM_NAME={} ".format(self.vm_name)
        if suffix:
            title += str(suffix)

        setproctitle(title)

    def notify_job_grab_about_task_end(self, job, do_reschedule=False):
        # TODO: Current notification method is unreliable,
        # we should retask and use redis + lua for atomic acquire/release tasks
        request = {
            "action": "reschedule" if do_reschedule else "remove",
            "build_id": job.build_id,
            "task_id": job.task_id,
            "chroot": job.chroot,
        }

        self.rc.publish(JOB_GRAB_TASK_END_PUBSUB, json.dumps(request))

    def acquire_vm_for_job(self, job):
        # TODO: replace acquire/release with context manager

        self.log.info("got job: {}, acquiring VM for build".format(str(job)))
        start_vm_wait_time = time.time()
        vmd = None
        while vmd is None:
            try:
                self.update_process_title(suffix="trying to acquire VM for job {} for {}s"
                                          .format(job.task_id, time.time() - start_vm_wait_time))
                vmd = self.vmm.acquire_vm(self.group_id, job.project_owner, os.getpid(),
                                          job.task_id, job.build_id, job.chroot)
            except NoVmAvailable as error:
                self.log.debug("No VM yet: {}".format(error))
                time.sleep(self.opts.sleeptime)
                continue
            except Exception as error:
                self.log.exception("Unhandled exception during VM acquire :{}".format(error))
                break
        return vmd

    def run_cycle(self):
        self.update_process_title(suffix="trying to acquire job")

        time.sleep(self.opts.sleeptime)
        job = self.obtain_job()
        if not job:
            return

        try:
            if not self.starting_build(job):
                self.notify_job_grab_about_task_end(job)
                return
        except Exception:
            self.log.exception("Failed to check if job can be started")
            self.notify_job_grab_about_task_end(job)
            return

        vmd = self.acquire_vm_for_job(job)

        if vmd is None:
            self.notify_job_grab_about_task_end(job, do_reschedule=True)
        else:
            self.log.info("acquired VM: {} ip: {} for build {}".format(vmd.vm_name, vmd.vm_ip, job.task_id))
            # TODO: store self.vmd = vmd and use it
            self.vm_name = vmd.vm_name
            self.vm_ip = vmd.vm_ip

            try:
                self.do_job(job)
                self.notify_job_grab_about_task_end(job)
            except VmError as error:
                self.log.exception("Builder error, re-scheduling task: {}".format(error))
                self.notify_job_grab_about_task_end(job, do_reschedule=True)
            except Exception as error:
                self.log.exception("Unhandled build error: {}".format(error))
                self.notify_job_grab_about_task_end(job, do_reschedule=True)
            finally:
                # clean up the instance
                self.vmm.release_vm(vmd.vm_name)
                self.vm_ip = None
                self.vm_name = None

    def run(self):
        self.log.info("Starting worker")
        self.init_fedmsg()
        self.vmm.post_init()

        self.rc = get_redis_connection(self.opts)
        self.update_process_title(suffix="trying to acquire job")
        while not self.kill_received:
            self.run_cycle()

Example #8

Show file

class Worker(multiprocessing.Process):
    """
    Worker process dispatches building tasks. Backend spin-up multiple workers, each
    worker associated to one group_id and process one task at the each moment.

    Worker listens for the new tasks from :py:class:`retask.Queue` associated with its group_id

    :param Munch opts: backend config
    :param int worker_num: worker number
    :param int group_id: group_id from the set of groups defined in config

    """
    def __init__(self, opts, frontend_client, worker_num, group_id):

        # base class initialization
        multiprocessing.Process.__init__(self, name="worker-builder")

        self.opts = opts
        self.worker_num = worker_num
        self.group_id = group_id

        self.log = get_redis_logger(self.opts, self.logger_name, "worker")

        # job management stuff
        self.task_queue = Queue("copr-be-{0}".format(str(group_id)))
        self.task_queue.connect()
        # event queue for communicating back to dispatcher

        self.kill_received = False

        self.frontend_client = frontend_client
        self.vm_name = None
        self.vm_ip = None

        self.rc = None
        self.vmm = VmManager(self.opts)

    @property
    def logger_name(self):
        return "backend.worker-{}-{}".format(self.group_name, self.worker_num)

    @property
    def group_name(self):
        try:
            return self.opts.build_groups[self.group_id]["name"]
        except Exception as error:
            self.log.exception(
                "Failed to get builder group name from config, using group_id as name."
                "Original error: {}".format(error))
            return str(self.group_id)

    def fedmsg_notify(self, topic, template, content=None):
        """
        Publish message to fedmsg bus when it is available
        :param topic:
        :param template:
        :param content:
        """
        if self.opts.fedmsg_enabled and fedmsg:

            who = "worker-{0}".format(self.worker_num)

            content = content or {}
            content["who"] = who
            content["what"] = template.format(**content)

            try:
                fedmsg.publish(modname="copr", topic=topic, msg=content)
            # pylint: disable=W0703
            except Exception as e:
                self.log.exception("failed to publish message: {0}".format(e))

    def _announce_start(self, job):
        """
        Announce everywhere that a build process started now.
        """
        job.started_on = time.time()
        self.mark_started(job)

        template = "build start: user:{user} copr:{copr}" \
            "pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(user=job.submitter,
                       copr=job.project_name,
                       owner=job.project_owner,
                       pkg=job.package_name,
                       build=job.build_id,
                       ip=self.vm_ip,
                       pid=self.pid)
        self.fedmsg_notify("build.start", template, content)

        template = "chroot start: chroot:{chroot} user:{user}" \
            "copr:{copr} pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(chroot=job.chroot,
                       user=job.submitter,
                       owner=job.project_owner,
                       pkg=job.package_name,
                       copr=job.project_name,
                       build=job.build_id,
                       ip=self.vm_ip,
                       pid=self.pid)

        self.fedmsg_notify("chroot.start", template, content)

    def _announce_end(self, job):
        """
        Announce everywhere that a build process ended now.
        """
        job.ended_on = time.time()

        self.return_results(job)
        self.log.info("worker finished build: {0}".format(self.vm_ip))
        template = "build end: user:{user} copr:{copr} build:{build}" \
            "  pkg: {pkg}  version: {version} ip:{ip}  pid:{pid} status:{status}"

        content = dict(user=job.submitter,
                       copr=job.project_name,
                       owner=job.project_owner,
                       pkg=job.package_name,
                       version=job.package_version,
                       build=job.build_id,
                       ip=self.vm_ip,
                       pid=self.pid,
                       status=job.status,
                       chroot=job.chroot)
        self.fedmsg_notify("build.end", template, content)

    def mark_started(self, job):
        """
        Send data about started build to the frontend
        """

        job.status = BuildStatus.RUNNING
        build = job.to_dict()
        self.log.info("starting build: {}".format(build))

        data = {"builds": [build]}
        try:
            self.frontend_client.update(data)
        except:
            raise CoprWorkerError(
                "Could not communicate to front end to submit status info")

    def return_results(self, job):
        """
        Send the build results to the frontend
        """
        self.log.info(
            "Build {} finished with status {}. Took {} seconds".format(
                job.build_id, job.status, job.ended_on - job.started_on))

        data = {"builds": [job.to_dict()]}

        try:
            self.frontend_client.update(data)
        except Exception as err:
            raise CoprWorkerError(
                "Could not communicate to front end to submit results: {}".
                format(err))

    def starting_build(self, job):
        """
        Announce to the frontend that a build is starting.
        Checks if we can and/or should start job

        :return True: if the build can start
        :return False: if the build can not start (build is cancelled)
        """

        try:
            return self.frontend_client.starting_build(job.build_id,
                                                       job.chroot)
        except Exception as err:
            msg = "Could not communicate to front end to confirm build start"
            self.log.exception(msg)
            raise CoprWorkerError(msg)

    @classmethod
    def pkg_built_before(cls, pkg, chroot, destdir):
        """
        Check whether the package has already been built in this chroot.
        """
        s_pkg = os.path.basename(pkg)
        pdn = s_pkg.replace(".src.rpm", "")
        resdir = "{0}/{1}/{2}".format(destdir, chroot, pdn)
        resdir = os.path.normpath(resdir)
        if os.path.exists(resdir) and os.path.exists(
                os.path.join(resdir, "success")):
            return True
        return False

    def init_fedmsg(self):
        """
        Initialize Fedmsg
        (this assumes there are certs and a fedmsg config on disk)
        """

        if not (self.opts.fedmsg_enabled and fedmsg):
            return

        try:
            fedmsg.init(name="relay_inbound", cert_prefix="copr", active=True)
        except Exception as e:
            self.log.exception("Failed to initialize fedmsg: {}".format(e))

    # TODO: doing skip logic on fronted during @start_build query
    # def on_pkg_skip(self, job):
    #     """
    #     Handle package skip
    #     """
    #     self._announce_start(job)
    #     self.log.info("Skipping: package {} has been already built before.".format(job.pkg))
    #     job.status = BuildStatus.SKIPPED
    #     self.notify_job_grab_about_task_end(job)
    #     self._announce_end(job)

    def obtain_job(self):
        """
        Retrieves new build task from queue.
        Checks if the new job can be started and not skipped.
        """
        # ToDo: remove retask, use redis lua fsm logic similiar to VMM
        # this sometimes caused TypeError in random worker
        # when another one  picekd up a task to build
        # why?
        try:
            task = self.task_queue.dequeue()
        except TypeError:
            return
        if not task:
            return

        job = BuildJob(task.data, self.opts)
        self.update_process_title(
            suffix="Task: {} chroot: {}, obtained at {}".format(
                job.build_id, job.chroot, str(datetime.now())))

        return job

    def do_job(self, job):
        """
        Executes new job.

        :param job: :py:class:`~backend.job.BuildJob`
        """

        self._announce_start(job)
        self.update_process_title(
            suffix="Task: {} chroot: {} build started".format(
                job.build_id, job.chroot))
        status = BuildStatus.SUCCEEDED

        # setup our target dir locally
        if not os.path.exists(job.chroot_dir):
            try:
                os.makedirs(job.chroot_dir)
            except (OSError, IOError):
                self.log.exception(
                    "Could not make results dir for job: {}".format(
                        job.chroot_dir))
                status = BuildStatus.FAILURE

        self.clean_result_directory(job)

        if status == BuildStatus.SUCCEEDED:
            # FIXME
            # need a plugin hook or some mechanism to check random
            # info about the pkgs
            # this should use ansible to download the pkg on
            # the remote system
            # and run a series of checks on the package before we
            # start the build - most importantly license checks.

            self.log.info("Starting build: id={} builder={} job: {}".format(
                job.build_id, self.vm_ip, job))

            with local_file_logger("{}.builder.mr".format(self.logger_name),
                                   job.chroot_log_path,
                                   fmt=build_log_format) as build_logger:
                try:
                    mr = MockRemote(builder_host=self.vm_ip,
                                    job=job,
                                    logger=build_logger,
                                    opts=self.opts)
                    mr.check()

                    build_details = mr.build_pkg_and_process_results()
                    job.update(build_details)

                    if self.opts.do_sign:
                        mr.add_pubkey()

                    register_build_result(self.opts)

                except MockRemoteError as e:
                    # record and break
                    self.log.exception(
                        "Error during the build, host={}, build_id={}, chroot={}, error: {}"
                        .format(self.vm_ip, job.build_id, job.chroot, e))
                    status = BuildStatus.FAILURE
                    register_build_result(self.opts, failed=True)

            self.log.info(
                "Finished build: id={} builder={} timeout={} destdir={}"
                " chroot={} repos={}".format(job.build_id, self.vm_ip,
                                             job.timeout, job.destdir,
                                             job.chroot, str(job.repos)))

            self.copy_mock_logs(job)

        job.status = status
        self._announce_end(job)
        self.update_process_title(
            suffix="Task: {} chroot: {} done".format(job.build_id, job.chroot))

    def copy_mock_logs(self, job):
        if not os.path.isdir(job.results_dir):
            self.log.info(
                "Job results dir doesn't exists, couldn't copy main log; path: {}"
                .format(job.results_dir))
            return

        log_names = [(job.chroot_log_name, "mockchain.log.gz"),
                     (job.rsync_log_name, "rsync.log.gz")]

        for src_name, dst_name in log_names:
            src = os.path.join(job.chroot_dir, src_name)
            dst = os.path.join(job.results_dir, dst_name)
            try:
                with open(src, "rb") as f_src, gzip.open(dst, "wb") as f_dst:
                    f_dst.writelines(f_src)
            except IOError:
                self.log.info("File {} not found".format(src))

    def clean_result_directory(self, job):
        """
        Create backup directory and move there results from previous build.
        """
        if not os.path.exists(job.results_dir) or os.listdir(
                job.results_dir) == []:
            return

        backup_dir_name = "prev_build_backup"
        backup_dir = os.path.join(job.results_dir, backup_dir_name)
        self.log.info(
            "Cleaning target directory, results from previous build storing in {}"
            .format(backup_dir))

        if not os.path.exists(backup_dir):
            os.makedirs(backup_dir)

        files = (x for x in os.listdir(job.results_dir)
                 if x != backup_dir_name)
        for filename in files:
            file_path = os.path.join(job.results_dir, filename)
            if os.path.isfile(file_path):
                if file_path.endswith((".info", ".log", ".log.gz")):
                    os.rename(file_path, os.path.join(backup_dir, filename))

                elif not file_path.endswith(".rpm"):
                    os.remove(file_path)
            else:
                shutil.rmtree(file_path)

    def update_process_title(self, suffix=None):
        title = "worker-{} {} ".format(self.group_name, self.worker_num)
        if self.vm_ip:
            title += "VM_IP={} ".format(self.vm_ip)
        if self.vm_name:
            title += "VM_NAME={} ".format(self.vm_name)
        if suffix:
            title += str(suffix)

        setproctitle(title)

    def notify_job_grab_about_task_end(self, job, do_reschedule=False):
        # TODO: Current notification method is unreliable,
        # we should retask and use redis + lua for atomic acquire/release tasks
        request = {
            "action": "reschedule" if do_reschedule else "remove",
            "build_id": job.build_id,
            "task_id": job.task_id,
            "chroot": job.chroot,
        }

        self.rc.publish(JOB_GRAB_TASK_END_PUBSUB, json.dumps(request))

    def acquire_vm_for_job(self, job):
        # TODO: replace acquire/release with context manager

        self.log.info("got job: {}, acquiring VM for build".format(str(job)))
        start_vm_wait_time = time.time()
        vmd = None
        while vmd is None:
            try:
                self.update_process_title(
                    suffix="trying to acquire VM for job {} for {}s".format(
                        job.task_id,
                        time.time() - start_vm_wait_time))
                vmd = self.vmm.acquire_vm(self.group_id, job.project_owner,
                                          os.getpid(), job.task_id,
                                          job.build_id, job.chroot)
            except NoVmAvailable as error:
                self.log.debug("No VM yet: {}".format(error))
                time.sleep(self.opts.sleeptime)
                continue
            except Exception as error:
                self.log.exception(
                    "Unhandled exception during VM acquire :{}".format(error))
                break
        return vmd

    def run_cycle(self):
        self.update_process_title(suffix="trying to acquire job")

        time.sleep(self.opts.sleeptime)
        job = self.obtain_job()
        if not job:
            return

        try:
            if not self.starting_build(job):
                self.notify_job_grab_about_task_end(job)
                return
        except Exception:
            self.log.exception("Failed to check if job can be started")
            self.notify_job_grab_about_task_end(job)
            return

        vmd = self.acquire_vm_for_job(job)

        if vmd is None:
            self.notify_job_grab_about_task_end(job, do_reschedule=True)
        else:
            self.log.info("acquired VM: {} ip: {} for build {}".format(
                vmd.vm_name, vmd.vm_ip, job.task_id))
            # TODO: store self.vmd = vmd and use it
            self.vm_name = vmd.vm_name
            self.vm_ip = vmd.vm_ip

            try:
                self.do_job(job)
                self.notify_job_grab_about_task_end(job)
            except VmError as error:
                self.log.exception(
                    "Builder error, re-scheduling task: {}".format(error))
                self.notify_job_grab_about_task_end(job, do_reschedule=True)
            except Exception as error:
                self.log.exception("Unhandled build error: {}".format(error))
                self.notify_job_grab_about_task_end(job, do_reschedule=True)
            finally:
                # clean up the instance
                self.vmm.release_vm(vmd.vm_name)
                self.vm_ip = None
                self.vm_name = None

    def run(self):
        self.log.info("Starting worker")
        self.init_fedmsg()
        self.vmm.post_init()

        self.rc = get_redis_connection(self.opts)
        self.update_process_title(suffix="trying to acquire job")
        while not self.kill_received:
            self.run_cycle()

Example #9

Show file

File: print_queues.py Project: tedwardia/copr

#!/usr/bin/python
# coding: utf-8

NUM_QUEUES = 2

import sys
sys.path.append("/usr/share/copr/")

from retask.task import Task
from retask.queue import Queue
from backend.helpers import BackendConfigReader

opts = BackendConfigReader().read()
redis_config = {
    'host': opts['redis_host'],
    'port': opts['redis_port'],
    'db': opts['redis_db'],
}

for i in range(0, NUM_QUEUES):
    print("## Queue {}".format(i))
    q = Queue("copr-be-{}".format(i), config=redis_config)
    q.connect()
    save_q = []
    while q.length != 0:
        task = q.dequeue()
        print task.data
        save_q.append(task)
    for t in save_q:
        q.enqueue(t)

Example #10

Show file

if __name__ == '__main__':
    libimporter.loadconfig()
    create_rundir()
    key = 'darkjobworker'
    config = get_redis_config()
    jobqueue = Queue('jobqueue', config)
    jobqueue.connect()
    log_status('darkjobworker', 'Starting worker module')
    while True:

        if jobqueue.length == 0:
            log(key, "Sleeping, no jobqueue job", 'info')
            time.sleep(60)
            continue
        try:
            task = jobqueue.dequeue()
            if not task:
                continue
            instance = task.data['instance']
            idx = task.data['build_id']
            distro = task.data['release']
            utils.msgtext = task.data['instance']
            log(key, "Import started %s" % idx, 'info')
            do_buildid_import(instance, idx, distro, key)
            log(key, "Import finished %s" % idx, 'info')
        except Exception, err:
            log(key, str(err), 'error')
        print "one more done or crashed"
    remove_redis_keys('darkjobworker')

Example #11

Show file

class Worker(multiprocessing.Process):
    """
    Worker process dispatches building tasks. Backend spin-up multiple workers, each
    worker associated to one group_id and process one task at the each moment.

    Worker listens for the new tasks from :py:class:`retask.Queueu` associated with its group_id

    :param Bunch opts: backend config
    :param queue: (:py:class:`multiprocessing.Queue`) queue to announce new events
    :param int worker_num: worker number
    :param int group_id: group_id from the set of groups defined in config
    :param callback: callback object to handle internal workers events. Should implement method ``log(msg)``.
    :param lock: (:py:class:`multiprocessing.Lock`) global backend lock

    """
    def __init__(self,
                 opts,
                 events,
                 worker_num,
                 group_id,
                 callback=None,
                 lock=None):

        # base class initialization
        multiprocessing.Process.__init__(self, name="worker-builder")

        self.opts = opts

        # job management stuff
        self.task_queue = Queue("copr-be-{0}".format(str(group_id)))
        self.task_queue.connect()
        # event queue for communicating back to dispatcher
        self.events = events
        self.worker_num = worker_num
        self.group_id = group_id

        self.kill_received = False
        self.lock = lock
        self.frontend_callback = FrontendClient(opts, events)
        self.callback = callback
        if not self.callback:
            log_name = "worker-{0}-{1}.log".format(self.group_name,
                                                   self.worker_num)

            self.logfile = os.path.join(self.opts.worker_logdir, log_name)
            self.callback = WorkerCallback(logfile=self.logfile)

        self.vm_name = None
        self.vm_ip = None
        self.callback.log("creating worker: dynamic ip")

    @property
    def group_name(self):
        try:
            return self.opts.build_groups[self.group_id]["name"]
        except Exception as error:
            self.callback.log(
                "Failed to get builder group name from config, using group_id as name."
                "Original error: {}".format(error))
            return self.group_id

    def event(self, topic, template, content=None):
        """ Multi-purpose logging method.

        Logs messages to three different destinations:
            - To log file
            - The internal "events" queue for communicating back to the
              dispatcher.
            - The fedmsg bus.  Messages are posted asynchronously to a
              zmq.PUB socket.

        """

        content = content or {}
        what = template.format(**content)
        who = "worker-{0}".format(self.worker_num)

        self.callback.log("event: who: {0}, what: {1}".format(who, what))
        self.events.put({"when": time.time(), "who": who, "what": what})

        if self.opts.fedmsg_enabled and fedmsg:
            content["who"] = who
            content["what"] = what
            try:
                fedmsg.publish(modname="copr", topic=topic, msg=content)
            # pylint: disable=W0703
            except Exception as e:
                # XXX - Maybe log traceback as well with traceback.format_exc()
                self.callback.log("failed to publish message: {0}".format(e))

    def _announce_start(self, job):
        """
        Announce everywhere that a build process started now.
        """
        job.started_on = time.time()
        self.mark_started(job)

        template = "build start: user:{user} copr:{copr}" \
            "pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(user=job.submitter,
                       copr=job.project_name,
                       owner=job.project_owner,
                       pkg=job.pkg_name,
                       build=job.build_id,
                       ip=self.vm_ip,
                       pid=self.pid)
        self.event("build.start", template, content)

        template = "chroot start: chroot:{chroot} user:{user}" \
            "copr:{copr} pkg: {pkg} build:{build} ip:{ip}  pid:{pid}"

        content = dict(chroot=job.chroot,
                       user=job.submitter,
                       owner=job.project_owner,
                       pkg=job.pkg_name,
                       copr=job.project_name,
                       build=job.build_id,
                       ip=self.vm_ip,
                       pid=self.pid)

        self.event("chroot.start", template, content)

    def _announce_end(self, job):
        """
        Announce everywhere that a build process ended now.
        """
        job.ended_on = time.time()

        self.return_results(job)
        self.callback.log("worker finished build: {0}".format(self.vm_ip))
        template = "build end: user:{user} copr:{copr} build:{build}" \
            "  pkg: {pkg}  version: {version} ip:{ip}  pid:{pid} status:{status}"

        content = dict(user=job.submitter,
                       copr=job.project_name,
                       owner=job.project_owner,
                       pkg=job.pkg_name,
                       version=job.pkg_version,
                       build=job.build_id,
                       ip=self.vm_ip,
                       pid=self.pid,
                       status=job.status,
                       chroot=job.chroot)
        self.event("build.end", template, content)

    def run_ansible_playbook(self, args, name="running playbook", attempts=9):
        """
        Call ansible playbook:

            - well mostly we run out of space in OpenStack so we rather try
              multiple times (attempts param)
            - dump any attempt failure
        """

        # Ansible playbook python API does not work here, dunno why.  See:
        # https://groups.google.com/forum/#!topic/ansible-project/DNBD2oHv5k8

        command = "{0} {1}".format(ansible_playbook, args)

        result = None
        for i in range(0, attempts):
            try:
                attempt_desc = ": retry: " if i > 0 else ": begin: "
                self.callback.log(name + attempt_desc + command)
                result = subprocess.check_output(command, shell=True)
                self.callback.log("Raw playbook output:\n{0}\n".format(result))
                break

            except CalledProcessError as e:
                self.callback.log("CalledProcessError: \n{0}\n".format(
                    e.output))
                sys.stderr.write("{0}\n".format(e.output))
                # FIXME: this is not purpose of opts.sleeptime
                time.sleep(self.opts.sleeptime)

        self.callback.log(name + ": end")
        return result

    def validate_vm(self):
        """
        Test connectivity to the VM

        :param ipaddr: ip address to the newly created VM
        :raises: :py:class:`~backend.exceptions.CoprWorkerSpawnFailError`: validation fails
        """
        # we were getting some dead instances
        # that's why I'm testing the connectivity here
        runner_options = dict(remote_user="******",
                              host_list="{},".format(self.vm_ip),
                              pattern=self.vm_ip,
                              forks=1,
                              transport=self.opts.ssh.transport,
                              timeout=500)
        connection = ansible.runner.Runner(**runner_options)
        connection.module_name = "shell"
        connection.module_args = "echo hello"

        try:
            res = connection.run()
        except Exception as exception:
            raise CoprWorkerSpawnFailError("Failed to check created VM ({})"
                                           "due to ansible error: {}".format(
                                               self.vm_ip, exception))

        if self.vm_ip not in res.get("contacted", {}):
            self.callback.log(
                "Worker is not responding to the testing playbook. Terminating it."
                "Runner options: {}".format(runner_options) +
                "Ansible raw response:\n{}".format(res))
            raise CoprWorkerSpawnFailError("Created VM ({}) was unresponsive "
                                           "and therefore terminated".format(
                                               self.vm_ip))

    def try_spawn(self, args):
        """
        Tries to spawn new vm using ansible

        :param args: ansible for ansible command which spawns VM
        :return str: valid ip address of new machine (nobody guarantee machine availability)
        """
        result = self.run_ansible_playbook(args, "spawning instance")
        if not result:
            raise CoprWorkerSpawnFailError("No result, trying again")
        match = re.search(r'IP=([^\{\}"]+)', result, re.MULTILINE)

        if not match:
            raise CoprWorkerSpawnFailError("No ip in the result, trying again")
        ipaddr = match.group(1)
        match = re.search(r'vm_name=([^\{\}"]+)', result, re.MULTILINE)

        if match:
            self.vm_name = match.group(1)
        self.callback.log("got instance ip: {0}".format(ipaddr))

        try:
            IP(ipaddr)
        except ValueError:
            # if we get here we"re in trouble
            msg = "Invalid IP back from spawn_instance - dumping cache output\n"
            msg += str(result)
            raise CoprWorkerSpawnFailError(msg)

        return ipaddr

    def spawn_instance(self):
        """
        Spawn new VM, executing the following steps:

            - call the spawn playbook to startup/provision a building instance
            - get an IP and test if the builder responds
            - repeat this until you get an IP of working builder

        :param BuildJob job:
        :return ip: of created VM
        :return None: if couldn't find playbook to spin ip VM
        """

        start = time.time()

        # Ansible playbook python API does not work here, dunno why.  See:
        # https://groups.google.com/forum/#!topic/ansible-project/DNBD2oHv5k8

        try:
            spawn_playbook = self.opts.build_groups[
                self.group_id]["spawn_playbook"]
        except KeyError:
            return

        spawn_args = "-c ssh {}".format(spawn_playbook)

        # TODO: replace with for i in range(MAX_SPAWN_TRIES): ... else raise FatalError
        i = 0
        while self.vm_ip is None:
            i += 1
            try:
                self.callback.log("Spawning a builder. Try No. {0}".format(i))

                self.vm_ip = self.try_spawn(spawn_args)
                self.update_process_title()
                try:
                    self.validate_vm()
                except CoprWorkerSpawnFailError:
                    self.terminate_instance()
                    raise

                self.callback.log(
                    "Instance spawn/provision took {0} sec".format(
                        time.time() - start))

            except CoprWorkerSpawnFailError as exception:
                self.callback.log(
                    "VM Spawn attempt failed with message: {}".format(
                        exception.msg))

    def terminate_instance(self):
        """
        Call the terminate playbook to destroy the building instance
        """
        self.update_process_title(suffix="Terminating VM")
        term_args = {}
        if "ip" in self.opts.terminate_vars:
            term_args["ip"] = self.vm_ip
        if "vm_name" in self.opts.terminate_vars:
            term_args["vm_name"] = self.vm_name

        try:
            playbook = self.opts.build_groups[
                self.group_id]["terminate_playbook"]
        except KeyError:
            self.callback.log(
                "Fatal error: no terminate playbook for group_id: {}; exiting".
                format(self.group_id))
            sys.exit(255)

        # args = "-c ssh -i '{0},' {1} {2}".format(
        args = "-c ssh {} {}".format(
            # self.vm_ip,
            playbook,
            ans_extra_vars_encode(term_args, "copr_task"))

        try:
            self.run_ansible_playbook(args, "terminate instance")
        except Exception as error:
            self.callback.log(
                "Failed to terminate an instance: vm_name={}, vm_ip={}. Original error: {}"
                .format(self.vm_name, self.vm_ip, error))

        # TODO: should we check that machine was destroyed?
        self.vm_ip = None
        self.vm_name = None
        self.update_process_title()

    def mark_started(self, job):
        """
        Send data about started build to the frontend
        """

        job.status = 3  # running
        build = job.to_dict()
        self.callback.log("build: {}".format(build))

        data = {"builds": [build]}
        try:
            self.frontend_callback.update(data)
        except:
            raise CoprWorkerError(
                "Could not communicate to front end to submit status info")

    def return_results(self, job):
        """
        Send the build results to the frontend
        """
        self.callback.log("{0} status {1}. Took {2} seconds".format(
            job.build_id, job.status, job.ended_on - job.started_on))

        self.callback.log("build: {}".format(job.to_dict()))
        data = {"builds": [job.to_dict()]}

        try:
            self.frontend_callback.update(data)
        except Exception as err:
            raise CoprWorkerError(
                "Could not communicate to front end to submit results: {}".
                format(err))

    def starting_build(self, job):
        """
        Announce to the frontend that a build is starting.

        :return True: if the build can start
        :return False: if the build can not start (build is cancelled)
        """

        try:
            can_start = self.frontend_callback.starting_build(
                job.build_id, job.chroot)
        except Exception as err:
            raise CoprWorkerError(
                "Could not communicate to front end to submit results: {}".
                format(err))

        return can_start

    @classmethod
    def pkg_built_before(cls, pkg, chroot, destdir):
        """
        Check whether the package has already been built in this chroot.
        """
        s_pkg = os.path.basename(pkg)
        pdn = s_pkg.replace(".src.rpm", "")
        resdir = "{0}/{1}/{2}".format(destdir, chroot, pdn)
        resdir = os.path.normpath(resdir)
        if os.path.exists(resdir) and os.path.exists(
                os.path.join(resdir, "success")):
            return True
        return False

    def spawn_instance_with_check(self):
        """
        Wrapper around self.spawn_instance() with exception checking

        :param BuildJob job:

        :return str: ip of spawned vm
        :raises:

            - :py:class:`~backend.exceptions.CoprWorkerError`: spawn function doesn't return ip
            - :py:class:`AnsibleError`: failure during anible command execution
        """
        self.update_process_title(suffix="Spawning a new VM")
        try:
            self.spawn_instance()
            if not self.vm_ip:
                # TODO: maybe add specific exception?
                raise CoprWorkerError("No IP found from creating instance")
        except AnsibleError as e:
            register_build_result(self.opts, failed=True)

            self.callback.log("failure to setup instance: {0}".format(e))
            raise

    def init_fedmsg(self):
        """
        Initialize Fedmsg
        (this assumes there are certs and a fedmsg config on disk)
        """

        if not (self.opts.fedmsg_enabled and fedmsg):
            return

        try:
            fedmsg.init(name="relay_inbound", cert_prefix="copr", active=True)
        except Exception as e:
            self.callback.log("failed to initialize fedmsg: {0}".format(e))

    def on_pkg_skip(self, job):
        """
        Handle package skip
        """
        self._announce_start(job)
        self.callback.log(
            "Skipping: package {0} has been already built before.".format(
                job.pkg))
        job.status = BuildStatus.SKIPPED  # skipped
        self._announce_end(job)

    def obtain_job(self):
        """
        Retrieves new build task from queue.
        Checks if the new job can be started and not skipped.
        """
        self.update_process_title(suffix="No task")

        # this sometimes caused TypeError in random worker
        # when another one  picekd up a task to build
        # why?
        try:
            task = self.task_queue.dequeue()
        except TypeError:
            return
        if not task:
            return

        # import ipdb; ipdb.set_trace()
        job = BuildJob(task.data, self.opts)

        self.update_process_title(
            suffix="Task: {} chroot: {}".format(job.build_id, job.chroot))

        # Checking whether the build is not cancelled
        if not self.starting_build(job):
            return

        # Checking whether to build or skip
        if self.pkg_built_before(job.pkg, job.chroot, job.destdir):
            self.on_pkg_skip(job)
            return

        # FIXME
        # this is our best place to sanity check the job before starting
        # up any longer process

        return job

    def do_job(self, job):
        """
        Executes new job.

        :param job: :py:class:`~backend.job.BuildJob`
        """
        self._announce_start(job)
        status = BuildStatus.SUCCEEDED
        chroot_destdir = os.path.normpath(job.destdir + '/' + job.chroot)

        # setup our target dir locally
        if not os.path.exists(chroot_destdir):
            try:
                os.makedirs(chroot_destdir)
            except (OSError, IOError) as e:
                msg = "Could not make results dir" \
                      " for job: {0} - {1}".format(chroot_destdir, str(e))

                self.callback.log(msg)
                status = BuildStatus.FAILURE

        if status == BuildStatus.SUCCEEDED:
            # FIXME
            # need a plugin hook or some mechanism to check random
            # info about the pkgs
            # this should use ansible to download the pkg on
            # the remote system
            # and run a series of checks on the package before we
            # start the build - most importantly license checks.

            self.callback.log(
                "Starting build: id={0} builder={1} timeout={2} destdir={3}"
                " chroot={4} repos={5}".format(job.build_id, self.vm_ip,
                                               job.timeout, job.destdir,
                                               job.chroot, str(job.repos)))

            self.callback.log("Building pkgs: {0}".format(job.pkg))

            chroot_repos = list(job.repos)
            chroot_repos.append(job.results + job.chroot + '/')
            chroot_repos.append(job.results + job.chroot + '/devel/')

            chroot_logfile = "{0}/build-{1}.log".format(
                chroot_destdir, job.build_id)

            macros = {
                "copr_username":
                job.project_owner,
                "copr_projectname":
                job.project_name,
                "vendor":
                "Fedora Project COPR ({0}/{1})".format(job.project_owner,
                                                       job.project_name)
            }

            try:
                mr = MockRemote(
                    builder_host=self.vm_ip,
                    job=job,
                    repos=chroot_repos,
                    macros=macros,
                    opts=self.opts,
                    lock=self.lock,
                    callback=CliLogCallBack(quiet=True, logfn=chroot_logfile),
                )
                mr.check()

                build_details = mr.build_pkg()
                job.update(build_details)

                if self.opts.do_sign:
                    mr.add_pubkey()

                register_build_result(self.opts)

            except MockRemoteError as e:
                # record and break
                self.callback.log("{0} - {1}".format(self.vm_ip, e))
                status = BuildStatus.FAILURE
                register_build_result(self.opts, failed=True)

            self.callback.log(
                "Finished build: id={0} builder={1} timeout={2} destdir={3}"
                " chroot={4} repos={5}".format(job.build_id, self.vm_ip,
                                               job.timeout, job.destdir,
                                               job.chroot, str(job.repos)))

        job.status = status
        self._announce_end(job)
        self.update_process_title(
            suffix="Task: {} chroot: {} done".format(job.build_id, job.chroot))

    def check_vm_still_alive(self):
        """
        Ensure that if we have vm_ip it is alive.
        Terminates unresponsive instance.
        """
        if self.vm_ip:
            # TODO: extract method: check_vm_still_alive
            try:
                self.validate_vm()
            except CoprWorkerSpawnFailError:
                self.terminate_instance()

    def update_process_title(self, suffix=None):
        title = "worker-{} {} ".format(self.group_name, self.worker_num)
        if self.vm_ip:
            title += "VM_IP={} ".format(self.vm_ip)
        if self.vm_name:
            title += "VM_NAME={} ".format(self.vm_name)
        if suffix:
            title += str(suffix)

        setproctitle(title)

    def run(self):
        """
        Worker should startup and check if it can function
        for each job it takes from the jobs queue
        run opts.setup_playbook to create the instance
        do the build (mockremote)
        terminate the instance.

        """
        self.init_fedmsg()

        while not self.kill_received:
            self.update_process_title()
            self.check_vm_still_alive()

            if self.opts.spawn_in_advance and not self.vm_ip:
                self.spawn_instance_with_check()

            job = self.obtain_job()
            if not job:
                time.sleep(self.opts.sleeptime)
                continue

            if not self.vm_ip:
                self.spawn_instance_with_check()

            try:
                self.do_job(job)
            except Exception as error:
                self.callback.log("Unhandled build error: {}".format(error))
            finally:
                # clean up the instance
                self.terminate_instance()

Example #12

Show file

File: print_queues.py Project: 0-T-0/copr

# coding: utf-8

NUM_QUEUES = 2

import sys
sys.path.append("/usr/share/copr/")

from retask.task import Task
from retask.queue import Queue
from backend.helpers import BackendConfigReader

opts = BackendConfigReader().read()
redis_config = {
    'host': opts['redis_host'],
    'port': opts['redis_port'],
    'db': opts['redis_db'],
}

for i in range(0, NUM_QUEUES):
    print("## Queue {}".format(i))
    q = Queue("copr-be-{}".format(i), config=redis_config)
    q.connect()
    save_q = []
    while q.length != 0:
    	task = q.dequeue()
        print task.data
        save_q.append(task)
    for t in save_q:
        q.enqueue(t)

Example #13

Show file

File: consumer.py Project: d1ffuz0r/retask

from retask.task import Task
from retask.queue import Queue
queue = Queue('example')
queue.connect()
while queue.length != 0:
    task = queue.dequeue()
    print task.data