def get_jobs(self, req): """ Get the job definition from panda server. :return: job definiton dictionary. """ self.get_jobs_lock.acquire() try: jobs = [] resp_attrs = None data = {'getProxyKey': 'False'} kmap = {'node': 'node', 'mem': 'mem', 'getProxyKey': 'getProxyKey', 'computingElement': 'queue', 'diskSpace': 'disk_space', 'siteName': 'site', 'prodSourceLabel': 'job_label', 'workingGroup': 'working_group', 'cpu': 'cpu'} for key, value in list(kmap.items()): # Python 2/3 if hasattr(req, value): data[key] = getattr(req, value) for i in range(req.num_jobs): logger.info("Getting jobs: %s" % data) res = https.request('{pandaserver}/server/panda/getJob'.format(pandaserver=config.Pilot.pandaserver), data=data) logger.info("Got jobs returns: %s" % res) if res is None: resp_attrs = {'status': None, 'content': None, 'exception': exception.CommunicationFailure("Get job failed to get response from Panda.")} break elif res['StatusCode'] == 20 and 'no jobs in PanDA' in res['errorDialog']: resp_attrs = {'status': res['StatusCode'], 'content': None, 'exception': exception.CommunicationFailure("No jobs in panda")} elif res['StatusCode'] != 0: resp_attrs = {'status': res['StatusCode'], 'content': None, 'exception': exception.CommunicationFailure("Get job from Panda returns a non-zero value: %s" % res['StatusCode'])} break else: jobs.append(res) if jobs: resp_attrs = {'status': 0, 'content': jobs, 'exception': None} elif not resp_attrs: resp_attrs = {'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get jobs")} resp = CommunicationResponse(resp_attrs) except Exception as e: # Python 2/3 logger.error("Failed to get jobs: %s, %s" % (e, traceback.format_exc())) resp_attrs = {'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get jobs: %s" % (traceback.format_exc()))} resp = CommunicationResponse(resp_attrs) self.get_jobs_lock.release() return resp
def get_events(self, req): """ Get events """ self.get_events_lock.acquire() resp = None try: log = get_logger(str(req.jobid), logger) if not req.num_ranges: # ToBeFix num_ranges with corecount req.num_ranges = 1 data = { 'pandaID': req.jobid, 'jobsetID': req.jobsetid, 'taskID': req.taskid, 'nRanges': req.num_ranges } log.info("Downloading new event ranges: %s" % data) res = https.request( '{pandaserver}/server/panda/getEventRanges'.format( pandaserver=config.Pilot.pandaserver), data=data) log.info("Downloaded event ranges: %s" % res) if res['StatusCode'] == 0 or str(res['StatusCode']) == '0': resp_attrs = { 'status': 0, 'content': res['eventRanges'], 'exception': None } else: resp_attrs = { 'status': res['StatusCode'], 'content': None, 'exception': exception.CommunicationFailure( "Get events from panda returns non-zero value: %s" % res['StatusCode']) } resp = CommunicationResponse(resp_attrs) except Exception, e: log.error("Failed to download event ranges: %s, %s" % (e, traceback.format_exc())) resp_attrs = { 'status': -1, 'content': None, 'exception': exception.UnknownException("Failed to get events: %s" % (traceback.format_exc())) } resp = CommunicationResponse(resp_attrs)
def get_event_ranges(self, num_event_ranges=1, post_hook=None, job=None): """ Function can be called by client to send a get_event_ranges request and get a response with event ranges. :returns: event ranges (got from jobs servers) :raise: Exception catched when getting event ranges """ if self.is_stop(): return None if not job: resp_attrs = { 'status': -1, 'content': None, 'exception': exception.CommunicationFailure( "Get events failed because job info missing(job: %s)" % job) } resp = CommunicationResponse(resp_attrs) raise resp.exception req_attrs = { 'request_type': CommunicationRequest.RequestType.RequestEvents, 'num_event_ranges': num_event_ranges, 'post_hook': post_hook } req_attrs['jobid'] = job['PandaID'] req_attrs['jobsetid'] = job['jobsetID'] req_attrs['taskid'] = job['taskID'] req_attrs['num_ranges'] = num_event_ranges req = CommunicationRequest(req_attrs) self.queues['request_get_events'].put(req) if req.post_hook: return while req.response is None: time.sleep(0.1) if req.response.exception: raise req.response.exception if req.response.status is False: return None else: return req.response.content
def run(self): """ Main loop to handle communication requests """ confs = self.get_plugin_confs() logger.info("Communication plugin confs: %s" % confs) communicator = self.get_plugin(confs) logger.info("Communication: %s" % communicator) processor = { 'request_get_jobs': { 'pre_check': communicator.pre_check_get_jobs, 'handler': communicator.request_get_jobs, 'next_queue': 'processing_get_jobs', 'process_req_post_hook': False }, 'request_get_events': { 'pre_check': communicator.pre_check_get_events, 'handler': communicator.request_get_events, 'next_queue': 'processing_get_events', 'process_req_post_hook': False }, 'update_jobs': { 'pre_check': communicator.pre_check_update_jobs, 'handler': communicator.update_jobs, 'next_queue': None, 'process_req_post_hook': True }, 'update_events': { 'pre_check': communicator.pre_check_update_events, 'handler': communicator.update_events, 'next_queue': None, 'process_req_post_hook': True }, 'processing_get_jobs': { 'pre_check': communicator.check_get_jobs_status, 'handler': communicator.get_jobs, 'next_queue': None, 'process_req_post_hook': True }, 'processing_get_events': { 'pre_check': communicator.check_get_events_status, 'handler': communicator.get_events, 'next_queue': None, 'process_req_post_hook': True } } logger.info("Starting communication manager") while True: has_req = False for process_type in processor: if self.is_stop(): while not self.queues[process_type].empty(): req = self.queues[process_type].get() logger.info("Is going to stop, aborting request: %s" % req) req.abort = True resp_attrs = { 'status': None, 'content': None, 'exception': exception.CommunicationFailure( "Communication manager is stopping, abort this request" ) } req.response = CommunicationResponse(resp_attrs) elif self.can_process_request(processor, process_type): pre_check_resp = processor[process_type]['pre_check']() if not pre_check_resp.status == 0: continue logger.info("Processing %s" % process_type) has_req = True req = self.queues[process_type].get() logger.info("Processing %s request: %s" % (process_type, req)) res = processor[process_type]['handler'](req) logger.info("Processing %s respone: %s" % (process_type, res)) if res.status is False: req.response = res else: next_queue = processor[process_type]['next_queue'] if next_queue: self.queues[next_queue].put(req) else: req.response = res process_req_post_hook = processor[process_type][ 'process_req_post_hook'] if process_req_post_hook and req.post_hook: req.post_hook(res) if not has_req: if self.is_stop(): break else: time.sleep(0.01) logger.info("Communication manager stopped.")