예제 #1
0
    def get_jobs(self, req):
        """
        Get the job definition from panda server.

        :return: job definiton dictionary.
        """

        self.get_jobs_lock.acquire()

        try:
            jobs = []
            resp_attrs = None

            data = {'getProxyKey': 'False'}
            kmap = {'node': 'node', 'mem': 'mem', 'getProxyKey': 'getProxyKey', 'computingElement': 'queue', 'diskSpace': 'disk_space',
                    'siteName': 'site', 'prodSourceLabel': 'job_label', 'workingGroup': 'working_group', 'cpu': 'cpu'}
            for key, value in list(kmap.items()):  # Python 2/3
                if hasattr(req, value):
                    data[key] = getattr(req, value)

            for i in range(req.num_jobs):
                logger.info("Getting jobs: %s" % data)
                res = https.request('{pandaserver}/server/panda/getJob'.format(pandaserver=config.Pilot.pandaserver),
                                    data=data)
                logger.info("Got jobs returns: %s" % res)

                if res is None:
                    resp_attrs = {'status': None, 'content': None, 'exception': exception.CommunicationFailure("Get job failed to get response from Panda.")}
                    break
                elif res['StatusCode'] == 20 and 'no jobs in PanDA' in res['errorDialog']:
                    resp_attrs = {'status': res['StatusCode'],
                                  'content': None,
                                  'exception': exception.CommunicationFailure("No jobs in panda")}
                elif res['StatusCode'] != 0:
                    resp_attrs = {'status': res['StatusCode'],
                                  'content': None,
                                  'exception': exception.CommunicationFailure("Get job from Panda returns a non-zero value: %s" % res['StatusCode'])}
                    break
                else:
                    jobs.append(res)

            if jobs:
                resp_attrs = {'status': 0, 'content': jobs, 'exception': None}
            elif not resp_attrs:
                resp_attrs = {'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get jobs")}

            resp = CommunicationResponse(resp_attrs)
        except Exception as e:  # Python 2/3
            logger.error("Failed to get jobs: %s, %s" % (e, traceback.format_exc()))
            resp_attrs = {'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get jobs: %s" % (traceback.format_exc()))}
            resp = CommunicationResponse(resp_attrs)

        self.get_jobs_lock.release()

        return resp
예제 #2
0
    def get_events(self, req):
        """
        Get events
        """
        self.get_events_lock.acquire()

        resp = None
        try:
            log = get_logger(str(req.jobid), logger)

            if not req.num_ranges:
                # ToBeFix num_ranges with corecount
                req.num_ranges = 1

            data = {
                'pandaID': req.jobid,
                'jobsetID': req.jobsetid,
                'taskID': req.taskid,
                'nRanges': req.num_ranges
            }

            log.info("Downloading new event ranges: %s" % data)
            res = https.request(
                '{pandaserver}/server/panda/getEventRanges'.format(
                    pandaserver=config.Pilot.pandaserver),
                data=data)
            log.info("Downloaded event ranges: %s" % res)

            if res['StatusCode'] == 0 or str(res['StatusCode']) == '0':
                resp_attrs = {
                    'status': 0,
                    'content': res['eventRanges'],
                    'exception': None
                }
            else:
                resp_attrs = {
                    'status':
                    res['StatusCode'],
                    'content':
                    None,
                    'exception':
                    exception.CommunicationFailure(
                        "Get events from panda returns non-zero value: %s" %
                        res['StatusCode'])
                }

            resp = CommunicationResponse(resp_attrs)
        except Exception, e:
            log.error("Failed to download event ranges: %s, %s" %
                      (e, traceback.format_exc()))
            resp_attrs = {
                'status':
                -1,
                'content':
                None,
                'exception':
                exception.UnknownException("Failed to get events: %s" %
                                           (traceback.format_exc()))
            }
            resp = CommunicationResponse(resp_attrs)
예제 #3
0
    def get_event_ranges(self, num_event_ranges=1, post_hook=None, job=None):
        """
        Function can be called by client to send a get_event_ranges request and get a response with event ranges.

        :returns: event ranges (got from jobs servers)
        :raise: Exception catched when getting event ranges
        """

        if self.is_stop():
            return None

        if not job:
            resp_attrs = {
                'status':
                -1,
                'content':
                None,
                'exception':
                exception.CommunicationFailure(
                    "Get events failed because job info missing(job: %s)" %
                    job)
            }
            resp = CommunicationResponse(resp_attrs)
            raise resp.exception

        req_attrs = {
            'request_type': CommunicationRequest.RequestType.RequestEvents,
            'num_event_ranges': num_event_ranges,
            'post_hook': post_hook
        }
        req_attrs['jobid'] = job['PandaID']
        req_attrs['jobsetid'] = job['jobsetID']
        req_attrs['taskid'] = job['taskID']
        req_attrs['num_ranges'] = num_event_ranges

        req = CommunicationRequest(req_attrs)
        self.queues['request_get_events'].put(req)

        if req.post_hook:
            return

        while req.response is None:
            time.sleep(0.1)
        if req.response.exception:
            raise req.response.exception
        if req.response.status is False:
            return None
        else:
            return req.response.content
예제 #4
0
    def run(self):
        """
        Main loop to handle communication requests
        """

        confs = self.get_plugin_confs()
        logger.info("Communication plugin confs: %s" % confs)
        communicator = self.get_plugin(confs)
        logger.info("Communication: %s" % communicator)

        processor = {
            'request_get_jobs': {
                'pre_check': communicator.pre_check_get_jobs,
                'handler': communicator.request_get_jobs,
                'next_queue': 'processing_get_jobs',
                'process_req_post_hook': False
            },
            'request_get_events': {
                'pre_check': communicator.pre_check_get_events,
                'handler': communicator.request_get_events,
                'next_queue': 'processing_get_events',
                'process_req_post_hook': False
            },
            'update_jobs': {
                'pre_check': communicator.pre_check_update_jobs,
                'handler': communicator.update_jobs,
                'next_queue': None,
                'process_req_post_hook': True
            },
            'update_events': {
                'pre_check': communicator.pre_check_update_events,
                'handler': communicator.update_events,
                'next_queue': None,
                'process_req_post_hook': True
            },
            'processing_get_jobs': {
                'pre_check': communicator.check_get_jobs_status,
                'handler': communicator.get_jobs,
                'next_queue': None,
                'process_req_post_hook': True
            },
            'processing_get_events': {
                'pre_check': communicator.check_get_events_status,
                'handler': communicator.get_events,
                'next_queue': None,
                'process_req_post_hook': True
            }
        }

        logger.info("Starting communication manager")
        while True:
            has_req = False
            for process_type in processor:
                if self.is_stop():
                    while not self.queues[process_type].empty():
                        req = self.queues[process_type].get()
                        logger.info("Is going to stop, aborting request: %s" %
                                    req)
                        req.abort = True
                        resp_attrs = {
                            'status':
                            None,
                            'content':
                            None,
                            'exception':
                            exception.CommunicationFailure(
                                "Communication manager is stopping, abort this request"
                            )
                        }
                        req.response = CommunicationResponse(resp_attrs)
                elif self.can_process_request(processor, process_type):
                    pre_check_resp = processor[process_type]['pre_check']()
                    if not pre_check_resp.status == 0:
                        continue

                    logger.info("Processing %s" % process_type)

                    has_req = True
                    req = self.queues[process_type].get()

                    logger.info("Processing %s request: %s" %
                                (process_type, req))
                    res = processor[process_type]['handler'](req)
                    logger.info("Processing %s respone: %s" %
                                (process_type, res))

                    if res.status is False:
                        req.response = res
                    else:
                        next_queue = processor[process_type]['next_queue']
                        if next_queue:
                            self.queues[next_queue].put(req)
                        else:
                            req.response = res
                        process_req_post_hook = processor[process_type][
                            'process_req_post_hook']
                        if process_req_post_hook and req.post_hook:
                            req.post_hook(res)
            if not has_req:
                if self.is_stop():
                    break
            else:
                time.sleep(0.01)
        logger.info("Communication manager stopped.")