Exemple #1
0
def _send_zmq_msg(job_id, command, data, address):
    """
    simple code to send messages back to host
    (and get a reply back)
    """
    logger = logging.getLogger(__name__)
    context = zmq.Context()
    zsocket = context.socket(zmq.REQ)
    logger.debug('Connecting to JobMonitor (%s)', address)
    zsocket.connect(address)

    host_name = socket.gethostname()
    ip_address = socket.gethostbyname(host_name)

    msg_container = {}
    msg_container["job_id"] = job_id
    msg_container["host_name"] = host_name
    msg_container["ip_address"] = ip_address
    msg_container["command"] = command
    msg_container["data"] = data

    # Send request
    logger.debug('Sending message: %s', msg_container)
    msg_string = zdumps(msg_container)
    zsocket.send(msg_string)

    # Get reply
    msg = zloads(zsocket.recv())

    return msg
Exemple #2
0
def _send_zmq_msg(job_id, command, data, address):
    """
    simple code to send messages back to host
    (and get a reply back)
    """
    logger = logging.getLogger(__name__)
    context = zmq.Context()
    zsocket = context.socket(zmq.REQ)
    logger.debug('Connecting to JobMonitor (%s)', address)
    zsocket.connect(address)

    host_name = socket.gethostname()
    ip_address = socket.gethostbyname(host_name)

    msg_container = {}
    msg_container["job_id"] = job_id
    msg_container["host_name"] = host_name
    msg_container["ip_address"] = ip_address
    msg_container["command"] = command
    msg_container["data"] = data

    # Send request
    logger.debug('Sending message: %s', msg_container)
    msg_string = zdumps(msg_container)
    zsocket.send(msg_string)

    # Get reply
    msg = zloads(zsocket.recv())

    return msg
Exemple #3
0
    def check(self, session_id, jobs):
        """
        serves input and output data
        """
        # save list of jobs
        self.jobs = jobs
        self.id_to_job = {job.id: job for job in self.jobs}

        # keep track of DRMAA session_id (for resubmissions)
        self.session_id = session_id

        # determines in which interval to check if jobs are alive
        self.logger.debug('Starting local hearbeat')
        local_heart = multiprocessing.Process(target=_heart_beat,
                                              args=(-1, self.home_address, -1,
                                                    "", CHECK_FREQUENCY))
        local_heart.start()
        try:
            self.logger.debug("Starting ZMQ event loop")
            # main loop
            while not self.all_jobs_done():
                self.logger.debug('Waiting for message')
                msg_str = self.socket.recv()
                msg = zloads(msg_str)
                self.logger.debug('Received message: %s', msg)
                return_msg = ""

                job_id = msg["job_id"]

                # only if its not the local beat
                if job_id != -1:
                    # If message is from a valid job, process that message
                    if job_id in self.id_to_job:
                        job = self.id_to_job[job_id]

                        if msg["command"] == "fetch_input":
                            return_msg = self.id_to_job[job_id]
                            job.timestamp = datetime.now()
                            self.logger.debug("Received input request from %s",
                                              job_id)

                        if msg["command"] == "store_output":
                            # be nice
                            return_msg = "thanks"

                            # store tmp job object
                            if isinstance(msg["data"], Job):
                                tmp_job = msg["data"]
                                # copy relevant fields
                                job.ret = tmp_job.ret
                                job.traceback = tmp_job.traceback
                                self.logger.info("Received output from %s",
                                                  job_id)
                            # Returned exception instead of job, so store that
                            elif isinstance(msg["data"], tuple):
                                job.ret, job.traceback = msg["data"]
                                self.logger.info("Received exception from %s",
                                                  job_id)
                            else:
                                self.logger.error(("Received message with " +
                                                   "invalid data: %s"), msg)
                                job.ret = msg["data"]
                            job.timestamp = datetime.now()

                        if msg["command"] == "heart_beat":
                            job.heart_beat = msg["data"]

                            # keep track of mem and cpu
                            try:
                                job.track_mem.append(job.heart_beat["memory"])
                                job.track_cpu.append(job.heart_beat["cpu_load"])
                            except (ValueError, TypeError):
                                self.logger.error("Error decoding heart-beat",
                                                  exc_info=True)
                            return_msg = "all good"
                            job.timestamp = datetime.now()

                        if msg["command"] == "get_job":
                            # serve job for display
                            return_msg = job
                        else:
                            # update host name
                            job.host_name = msg["host_name"]
                    # If this is an unknown job, report it and reply
                    else:
                        self.logger.error(('Received message from unknown job' +
                                           ' with ID %s. Known job IDs are: ' +
                                           '%s'), job_id,
                                          list(self.id_to_job.keys()))
                        return_msg = 'thanks, but no thanks'
                else:
                    # run check
                    self.check_if_alive()

                    if msg["command"] == "get_jobs":
                        # serve list of jobs for display
                        return_msg = self.jobs

                # send back compressed response
                self.logger.debug('Sending reply: %s', return_msg)
                self.socket.send(zdumps(return_msg))
        finally:
            # Kill child processes that we don't need anymore
            local_heart.terminate()