def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, **kwargs): super(PipelineControl, self).__init__(busname=otdb_notification_busname, **kwargs) self.otdb_service_busname = otdb_service_busname self.otdbrpc = OTDBRPC(busname=otdb_service_busname) self.slurm = Slurm()
class TaskManagementHandler(ServiceMessageHandler): def handle_message(self, msg): pass def __init__(self): super(TaskManagementHandler, self).__init__() self.radb = RADBRPC() self.otdb = OTDBRPC() self.obs_ctrl = ObservationControlRPCClient() def AbortTask(self, otdb_id): """aborts tasks based on otdb id :param otdb_id: :return: dict with aborted key saying if aborting was succesful and otdb_id key """ if self._is_active_observation(otdb_id): aborted = self._abort_active_observation(otdb_id) else: aborted = self._abort_inactive_task(otdb_id) return {"aborted": aborted, "otdb_id": otdb_id} def _is_active_observation(self, otdb_id): task_type, task_status = self._get_task_type_and_status(otdb_id) return task_type == "observation" and (task_status == "running" or task_status == "queued") def _abort_inactive_task(self, otdb_id): logger.info("Aborting inactive task: %s", otdb_id) try: self.otdb.taskSetStatus(otdb_id=otdb_id, new_status="aborted") aborted = True except OTDBPRCException: aborted = False return aborted def _abort_active_observation(self, otdb_id): logger.info("Aborting active task: %s", otdb_id) result = self.obs_ctrl.abort_observation(otdb_id) aborted = result["aborted"] is True return aborted def _get_task_type_and_status(self, otdb_id): task = self.radb.getTask(otdb_id) task_type = task["type"] task_status = task['status'] return task_type, task_status
class PipelineControl(OTDBBusListener): def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, **kwargs): super(PipelineControl, self).__init__(busname=otdb_notification_busname, **kwargs) self.otdb_service_busname = otdb_service_busname self.otdbrpc = OTDBRPC(busname=otdb_service_busname) self.slurm = Slurm() def _setStatus(self, obsid, status): try: self.otdbrpc.taskSetStatus(otdb_id=obsid, new_status=status) except RPCTimeoutException, e: # We use a queue, so delivery is guaranteed. We don't care about the answer. pass
def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER, radb_dbcreds=None): """ Creates a ResourceAssigner instance :param exchange: name of the bus on which the services listen (default: lofar) :param ra_notification_prefix: prefix used in notification message subject (default: ResourceAssigner.) :param broker: Valid Qpid broker host (default: None, which means localhost) :param radb_dbcreds: the credentials to be used for accessing the RADB (default: None, which means default) """ self.radb = RADatabase(dbcreds=radb_dbcreds) self.rerpc = ResourceEstimatorRPC.create(exchange=exchange, broker=broker) self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker) self.momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker) self.sqrpc = StorageQueryRPC.create(exchange=exchange, broker=broker) self.curpc = CleanupRPC.create(exchange=exchange, broker=broker) self.ra_notification_bus = ToBus(exchange=exchange, broker=broker) self.obscontrol = ObservationControlRPCClient.create(exchange=exchange, broker=broker) self.resource_availability_checker = ResourceAvailabilityChecker( self.radb) # For the DwellScheduler instances created during run-time we store the following variables self.radb_creds = radb_dbcreds self.broker = broker
def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): """ Creates a TaskInfoCache instance, which listens for OTDB task status events, and then fetches and caches relevant info for the current active task(s). :param exchange: :param broker: """ # the internal cache is a dict with a mapping of otdb_id->TaskInfo self._cache = {} # the internal project cache is a dict with a mapping of project_name->project_info_dict self._project_cache = {} # the internal stations cache is a list of the currently used stations self._stations_cache = [] # internal rpc's to fetch the needed information self._otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker, timeout=DEFAULT_RPC_TIMEOUT) self._momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker, timeout=DEFAULT_RPC_TIMEOUT) self._radbrpc = RADBRPC.create(exchange=exchange, broker=broker, timeout=DEFAULT_RPC_TIMEOUT)
def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator()
def _send_filtered_event_message(self, otdb_id: int, modificationTime: datetime, state: str): try: with OTDBRPC.create(exchange=self.exchange, broker=self.broker, timeout=2) as otdbrpc: parset = parameterset( otdbrpc.taskGetSpecification(otdb_id=otdb_id).get( "specification", '')) task_type = parset.get("ObsSW.Observation.processType") priority = 6 if task_type == "Observation" else 2 except Exception as e: logger.warning( 'Could not determine task type for otdb_id=%s, using default priority=4: %s', otdb_id, e) priority = 4 try: content = { "treeID": otdb_id, "state": state, "time_of_change": modificationTime } msg = EventMessage( subject=DEFAULT_FILTERED_OTDB_NOTIFICATION_SUBJECT, content=content, priority=priority) logger.info( 'sending filtered event message subject:\'%s\' content: %s', msg.subject, content) self.send(msg) except Exception as e: logger.error('Could not send event message: %s', e)
def __init__(self, exchange, broker): super(PipelineControlHandler, self).__init__() logger.info('PipelineControl busname=%s', exchange) self.exchange = exchange self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker) self.dependencies = PipelineDependencies(exchange=exchange, broker=broker) self.slurm = Slurm()
def do_qa(self, otdb_id): ''' try to do all qa (quality assurance) steps for the given otdb_id resulting in an h5 MS-extract file and inspection plots :param int otdb_id: observation/pipeline otdb id for which the conversion needs to be done. :return: None ''' hdf5_file_path = None with OTDBRPC.create(exchange=self.exchange, broker=self.broker, timeout=5) as otdbrpc: parset = parameterset( otdbrpc.taskGetSpecification(otdb_id=otdb_id).get( "specification", '')) if not parset: logger.warning("could not find a parset for otdb_id %s.", otdb_id) return if parset.getBool( 'ObsSW.Observation.DataProducts.Output_Correlated.enabled' ): hdf5_file_path = self._convert_ms2hdf5(otdb_id) elif parset.getBool( 'ObsSW.Observation.DataProducts.Output_CoherentStokes.enabled' ): hdf5_file_path = self._convert_bf2hdf5(otdb_id) else: logger.info( "No uv or cs dataproducts avaiblable to convert for otdb_id %s", otdb_id) return if hdf5_file_path: # keep a note of where the h5 file was stored for this unfinished otdb_id self._unfinished_otdb_id_map[otdb_id] = hdf5_file_path # cluster it self._cluster_h5_file(hdf5_file_path, otdb_id) self._copy_hdf5_to_nfs_dir(hdf5_file_path) plot_dir_path = self._create_plots_for_h5_file( hdf5_file_path, otdb_id) plot_dir_path = self._move_plots_to_nfs_dir(plot_dir_path) # and notify that we're finished self._send_event_message( 'Finished', { 'otdb_id': otdb_id, 'hdf5_file_path': hdf5_file_path, 'plot_dir_path': plot_dir_path or '' })
def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): """ :param exchange: name of the exchange to listen on. :param broker: name of the broker to connect to. """ super().__init__() self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker) self.radbrpc = RADBRPC.create(exchange=exchange, broker=broker) self.momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker) self.send_bus = ToBus(exchange=exchange, broker=broker)
def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param exchange: exchange on which the services listen (default: lofar) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RADBRPC.create(exchange=exchange, broker=broker) self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker) self.momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker) self.translator = RAtoOTDBTranslator()
def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_service_subject=DEFAULT_OTDB_SERVICENAME, notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME, notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT, broker=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs) self.otdbrpc = OTDBRPC(busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject))
def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): """ """ self._thread = None self._running = False self._radbrpc = RADBRPC.create(exchange=exchange, broker=broker) self._momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker) self._curpc = CleanupRPC.create(exchange=exchange, broker=broker) self._otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker, timeout=180)
def __init__( self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_service_subject=DEFAULT_OTDB_SERVICENAME, notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME, notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT, broker=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs) self.otdbrpc = OTDBRPC( busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject))
def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC( busname=radb_busname, servicename=radb_servicename, broker=radb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator()
def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
def __init__(self): super(TaskManagementHandler, self).__init__() self.radb = RADBRPC() self.otdb = OTDBRPC() self.obs_ctrl = ObservationControlRPCClient()
def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): self.rarpc = RADBRPC.create(exchange=exchange, broker=broker) logger.info('PipelineDependencies busname=%s', exchange) self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker)
def init_tobus(self, exchange: str = DEFAULT_BUSNAME, broker: str = DEFAULT_BROKER): super().init_tobus(exchange, broker) self.otdb = OTDBRPC.create(exchange=exchange, broker=broker) self.radb = RADBRPC.create(exchange=exchange, broker=broker)
def main(): # make sure we run in UTC timezone import os os.environ['TZ'] = 'UTC' # Check the invocation arguments parser = OptionParser( '%prog [options]', description='run the resource assignment editor web service') parser.add_option( '--webserver_port', dest='webserver_port', type='int', default=7412, help='port number on which to host the webservice, default: %default') parser.add_option('-q', '--broker', dest='broker', type='string', default=DEFAULT_BROKER, help='Address of the qpid broker, default: %default') parser.add_option( '--exchange', dest='exchange', type='string', default=DEFAULT_BUSNAME, help='Name of the bus exchange on the qpid broker, default: %default') parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging') parser.add_option_group(dbcredentials.options_group(parser)) parser.set_defaults(dbcredentials="RADB") (options, args) = parser.parse_args() logging.basicConfig( format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG if options.verbose else logging.INFO) global _radb_dbcreds _radb_dbcreds = dbcredentials.parse_options(options) if _radb_dbcreds.database: logger.info("Using dbcreds for direct RADB access: %s" % _radb_dbcreds.stringWithHiddenPassword()) else: _radb_dbcreds = None global rarpc rarpc = RADBRPC.create(exchange=options.exchange, broker=options.broker) global otdbrpc otdbrpc = OTDBRPC.create(exchange=options.exchange, broker=options.broker) global curpc curpc = CleanupRPC.create(exchange=options.exchange, broker=options.broker) global sqrpc sqrpc = StorageQueryRPC.create(exchange=options.exchange, timeout=10, broker=options.broker) global momqueryrpc momqueryrpc = MoMQueryRPC.create(exchange=options.exchange, timeout=10, broker=options.broker) global changeshandler changeshandler = ChangesHandler(exchange=options.exchange, broker=options.broker, momqueryrpc=momqueryrpc, radbrpc=rarpc, sqrpc=sqrpc) with changeshandler, rarpc, otdbrpc, curpc, sqrpc, momqueryrpc: '''Start the webserver''' app.run(debug=options.verbose, threaded=True, host='0.0.0.0', port=options.webserver_port)
class RATaskSpecified(OTDBBusListener): def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_service_subject=DEFAULT_OTDB_SERVICENAME, notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME, notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT, broker=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs) self.otdbrpc = OTDBRPC(busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject)) def start_listening(self, **kwargs): self.otdbrpc.open() self.send_bus.open() super(RATaskSpecified, self).start_listening(**kwargs) def stop_listening(self, **kwargs): super(RATaskSpecified, self).stop_listening(**kwargs) self.send_bus.close() self.otdbrpc.close() def get_predecessors(self, parset): """ Extract the list of predecessor obs IDs from the given parset. """ key = PARSET_PREFIX + "Observation.Scheduler.predecessors" stringlist = PyParameterValue(str(parset[key]), True).getStringVector() # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM # 'S' we can probably ignore? Might be only internal in the Scheduler? result = [] for s in stringlist: try: # Made the source a string for readability, but it's not efficient if s.startswith('M'): result.append({'source': 'mom', 'id': int(s[1:])}) elif s.startswith('L') or s.startswith('T'): result.append({'source': 'otdb', 'id': int(s[1:])}) else: # 'S' logger.info("found a predecessor ID I can't handle: %s" % s) result.append({'source': 'other', 'id': int(s[1:])}) except ValueError: logger.warning("found a predecessor ID that I can't parse %s" % s) return result def get_specification_with_predecessors(self, id, id_source, state, found_parsets): logger.info("Processing ID %s from %s" % (id, id_source)) if id_source == "other": return None elif id_source == "mom": otdb_id = self.otdbrpc.taskGetIDs( mom_id=id )['otdb_id'] elif id_source == "otdb": otdb_id = id else: logger.warning("Error in understanding id %s", id) logger.info("Processing OTDB ID %s", otdb_id) result = {"otdb_id": otdb_id, "predecessors": []} if state: result["state"] = state # TODO should be status not state else: pass #otdbrpc.taskGetStatus not implemented and maybe not needed? if otdb_id in found_parsets: parset = found_parsets[otdb_id] else: parset = self.otdbrpc.taskGetSpecification( otdb_id=otdb_id )['specification'] found_parsets[otdb_id] = parset logger.info("parset [%s]: %s" % (otdb_id, parset)) result['specification'] = resourceIndicatorsFromParset(parset) key = PARSET_PREFIX + "Observation.processSubtype" result['task_type'], result['task_subtype'] = convertSchedulerProcessSubtype(parset.get(key, "")) logger.info("Processing predecessors") predecessor_ids = self.get_predecessors(parset) for id in predecessor_ids: predecessor_result = self.get_specification_with_predecessors(id['id'], id['source'], "", found_parsets) if predecessor_result: result["predecessors"].append(predecessor_result) return result def onObservationApproved(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'approved') def onObservationPrescheduled(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'prescheduled') def createAndSendSpecifiedTask(self, main_id, status): # Construct root node of tree resultTree = self.get_specification_with_predecessors(main_id, "otdb", status, {}) logger.info("Sending result: %s" % resultTree) # Put result on bus msg = EventMessage(content=resultTree) self.send_bus.send(msg) logger.info("Result sent")
def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): super().__init__() self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker) self.momquery = MoMQueryRPC.create(exchange=exchange, broker=broker) self.radb = RADatabase()
logger = logging.getLogger(__name__) # Check the invocation arguments parser = OptionParser("%prog -o obsid -s status [options]") parser.add_option("-B", "--busname", dest="busname", type="string", default=DEFAULT_OTDB_SERVICE_BUSNAME, help="Busname on which OTDB commands are sent") parser.add_option("-o", "--obsid", dest="obsid", type="int", default=0, help="Observation/tree ID to set status for") parser.add_option("-s", "--status", dest="status", type="string", default="", help="New status") (options, args) = parser.parse_args() if not options.busname or not options.obsid or not options.status: parser.print_help() sys.exit(1) with OTDBRPC(busname=options.busname) as otdbrpc: otdbrpc.taskSetStatus(otdb_id=options.obsid, new_status=options.status)
class RAtoOTDBPropagator(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator() def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.otdbrpc.open() self.momrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.otdbrpc.close() self.momrpc.close() def doTaskConflict(self, otdb_id): logger.info('doTaskConflict: otdb_id=%s' % (otdb_id,)) if not otdb_id: logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id,)) return try: self.otdbrpc.taskSetStatus(otdb_id, 'conflict') except Exception as e: logger.error(e) def doTaskScheduled(self, ra_id, otdb_id, mom_id): try: logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id)) if not otdb_id: logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id,)) return ra_info = self.getRAinfo(ra_id) logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info)) # check if this is a CEP4 task, or an old CEP2 task # at this moment the most simple check is to see if RA claimed (CEP4) storage # TODO: do proper check on cluster/storage/etc if not ra_info['storage']: logger.info("No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id)) return #get mom project name try: project = self.momrpc.getProjectDetails(mom_id) logger.info(project) project_name = "_".join(project[str(mom_id)]['project_name'].split()) except (RPCException, KeyError) as e: logger.error('Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e))) logger.info('Using \'unknown\' as project name.') project_name = 'unknown' otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name) logger.debug("Parset info for OTDB: %s" %otdb_info) self.setOTDBinfo(otdb_id, otdb_info, 'scheduled') except Exception as e: logger.error(e) self.doTaskConflict(otdb_id) def getRAinfo(self, ra_id): info = {} info["storage"] = {} task = self.radbrpc.getTask(ra_id) claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True) for claim in claims: logger.debug("Processing claim: %s" % claim) if claim['resource_type_name'] == 'storage': info['storage'] = claim info["starttime"] = task["starttime"] info["endtime"] = task["endtime"] info["status"] = task["status"] return info def setOTDBinfo(self, otdb_id, otdb_info, otdb_status): try: logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info)) self.otdbrpc.taskSetSpecification(otdb_id, otdb_info) self.otdbrpc.taskPrepareForScheduling(otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"]) logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id)) self.otdbrpc.taskSetStatus(otdb_id, otdb_status) except Exception as e: logger.error(e) self.doTaskConflict(otdb_id)
class RATaskSpecified(OTDBBusListener): def __init__( self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_service_subject=DEFAULT_OTDB_SERVICENAME, notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME, notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT, broker=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs) self.otdbrpc = OTDBRPC( busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject)) def start_listening(self, **kwargs): self.otdbrpc.open() self.send_bus.open() super(RATaskSpecified, self).start_listening(**kwargs) def stop_listening(self, **kwargs): super(RATaskSpecified, self).stop_listening(**kwargs) self.send_bus.close() self.otdbrpc.close() def get_predecessors(self, parset): """ Extract the list of predecessor obs IDs from the given parset. """ key = PARSET_PREFIX + "Observation.Scheduler.predecessors" stringlist = PyParameterValue(str(parset[key]), True).getStringVector() # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM # 'S' we can probably ignore? Might be only internal in the Scheduler? result = [] for s in stringlist: try: # Made the source a string for readability, but it's not efficient if s.startswith('M'): result.append({'source': 'mom', 'id': int(s[1:])}) elif s.startswith('L') or s.startswith('T'): result.append({'source': 'otdb', 'id': int(s[1:])}) else: # 'S' logger.info("found a predecessor ID I can't handle: %s" % s) result.append({'source': 'other', 'id': int(s[1:])}) except ValueError: logger.warning("found a predecessor ID that I can't parse %s" % s) return result def get_specification_with_predecessors(self, id, id_source, state, found_parsets): logger.info("Processing ID %s from %s" % (id, id_source)) if id_source == "other": return None elif id_source == "mom": otdb_id = self.otdbrpc.taskGetIDs(mom_id=id)['otdb_id'] elif id_source == "otdb": otdb_id = id else: logger.warning("Error in understanding id %s", id) logger.info("Processing OTDB ID %s", otdb_id) result = {"otdb_id": otdb_id, "predecessors": []} if state: result["state"] = state # TODO should be status not state else: pass #otdbrpc.taskGetStatus not implemented and maybe not needed? if otdb_id in found_parsets: parset = found_parsets[otdb_id] else: parset = self.otdbrpc.taskGetSpecification( otdb_id=otdb_id)['specification'] found_parsets[otdb_id] = parset logger.info("parset [%s]: %s" % (otdb_id, parset)) result['specification'] = resourceIndicatorsFromParset(parset) key = PARSET_PREFIX + "Observation.processSubtype" result['task_type'], result[ 'task_subtype'] = convertSchedulerProcessSubtype( parset.get(key, "")) logger.info("Processing predecessors") predecessor_ids = self.get_predecessors(parset) for id in predecessor_ids: predecessor_result = self.get_specification_with_predecessors( id['id'], id['source'], "", found_parsets) if predecessor_result: result["predecessors"].append(predecessor_result) return result def onObservationApproved(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'approved') def onObservationPrescheduled(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'prescheduled') def createAndSendSpecifiedTask(self, main_id, status): # Construct root node of tree resultTree = self.get_specification_with_predecessors( main_id, "otdb", status, {}) logger.info("Sending result: %s" % resultTree) # Put result on bus msg = EventMessage(content=resultTree) self.send_bus.send(msg) logger.info("Result sent")
class RAtoOTDBPropagator(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, radb_broker=None, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, mom_busname=DEFAULT_MOMQUERY_BUSNAME, mom_servicename=DEFAULT_MOMQUERY_SERVICENAME, otdb_broker=None, mom_broker=None, broker=None): """ RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param radb_broker: valid Qpid broker host (default: None, which means localhost) :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command) :param otdb_servicename: servicename of the OTDB service (default: OTDBService) :param otdb_broker: valid Qpid broker host (default: None, which means localhost) :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost) """ if broker: radb_broker = broker otdb_broker = broker mom_broker = broker self.radbrpc = RADBRPC( busname=radb_busname, servicename=radb_servicename, broker=radb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker) self.translator = RAtoOTDBTranslator() def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.otdbrpc.open() self.momrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.otdbrpc.close() self.momrpc.close() def doTaskConflict(self, otdb_id): logger.info('doTaskConflict: otdb_id=%s' % (otdb_id, )) if not otdb_id: logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id, )) return try: self.otdbrpc.taskSetStatus(otdb_id, 'conflict') except Exception as e: logger.error(e) def doTaskScheduled(self, ra_id, otdb_id, mom_id): try: logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id)) if not otdb_id: logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id, )) return ra_info = self.getRAinfo(ra_id) logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info)) # check if this is a CEP4 task, or an old CEP2 task # at this moment the most simple check is to see if RA claimed (CEP4) storage # TODO: do proper check on cluster/storage/etc if not ra_info['storage']: logger.info( "No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id)) return #get mom project name try: project = self.momrpc.getProjectDetails(mom_id) logger.info(project) project_name = "_".join( project[str(mom_id)]['project_name'].split()) except (RPCException, KeyError) as e: logger.error( 'Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e))) logger.info('Using \'unknown\' as project name.') project_name = 'unknown' otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name) logger.debug("Parset info for OTDB: %s" % otdb_info) self.setOTDBinfo(otdb_id, otdb_info, 'scheduled') except Exception as e: logger.error(e) self.doTaskConflict(otdb_id) def getRAinfo(self, ra_id): info = {} info["storage"] = {} task = self.radbrpc.getTask(ra_id) claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True) for claim in claims: logger.debug("Processing claim: %s" % claim) if claim['resource_type_name'] == 'storage': info['storage'] = claim info["starttime"] = task["starttime"] info["endtime"] = task["endtime"] info["status"] = task["status"] return info def setOTDBinfo(self, otdb_id, otdb_info, otdb_status): try: logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info)) self.otdbrpc.taskSetSpecification(otdb_id, otdb_info) self.otdbrpc.taskPrepareForScheduling( otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"]) logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id)) self.otdbrpc.taskSetStatus(otdb_id, otdb_status) except Exception as e: logger.error(e) self.doTaskConflict(otdb_id)
class ResourceAssigner(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.rerpc.open() self.otdbrpc.open() self.ssdbrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.rerpc.close() self.otdbrpc.close() self.ssdbrpc.close() def doAssignment(self, specification_tree): logger.info('doAssignment: specification_tree=%s' % (specification_tree)) otdb_id = specification_tree['otdb_id'] taskType = specification_tree.get('task_type', '').lower() status = specification_tree.get('state', '').lower() if status not in ['approved', 'prescheduled']: # cep2 accepts both, cep4 only prescheduled, see below logger.info('skipping specification for otdb_id=%s because status=%s', (otdb_id, status)) #parse main parset... mainParset = parameterset(specification_tree['specification']) momId = mainParset.getInt('Observation.momID', -1) try: startTime = datetime.strptime(mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S') endTime = datetime.strptime(mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S') except ValueError: logger.warning('cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, )) # insert new task and specification in the radb # any existing specification and task with same otdb_id will be deleted automatically logger.info('doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' % (momId, otdb_id, status, taskType, startTime, endTime)) result = self.radbrpc.insertSpecificationAndTask(momId, otdb_id, status, taskType, startTime, endTime, str(mainParset)) if not result['inserted']: logger.error('could not insert specification and task') return specificationId = result['specification_id'] taskId = result['task_id'] logger.info('doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId,taskId)) # do not assign resources to task for other clusters than cep4 if not self.checkClusterIsCEP4(mainParset): return if status != 'prescheduled': logger.info('skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status)) return needed = self.getNeededResouces(specification_tree) logger.info('doAssignment: getNeededResouces=%s' % (needed,)) if not str(otdb_id) in needed: logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed)) return if not taskType in needed[str(otdb_id)]: logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)])) return # make sure the availability in the radb is up to date # TODO: this should be updated regularly try: self.updateAvailableResources('cep4') except Exception as e: logger.warning("Exception while updating available resources: %s" % str(e)) # claim the resources for this task # during the claim inserts the claims are automatically validated # and if not enough resources are available, then they are put to conflict status # also, if any claim is in conflict state, then the task is put to conflict status as well main_needed = needed[str(otdb_id)] task = self.radbrpc.getTask(taskId) claimed, claim_ids = self.claimResources(main_needed, task) if claimed: conflictingClaims = self.radbrpc.getResourceClaims(task_ids=taskId, status='conflict') if conflictingClaims: logger.warning('doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' % (len(conflictingClaims), conflictingClaims)) else: logger.info('doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId,)) self.radbrpc.updateTaskAndResourceClaims(taskId, task_status='scheduled', claim_status='allocated') self.processPredecessors(specification_tree) def processPredecessors(self, specification_tree): try: predecessor_trees = specification_tree['predecessors'] if predecessor_trees: otdb_id = specification_tree['otdb_id'] task = self.radbrpc.getTask(otdb_id=otdb_id) for predecessor_tree in predecessor_trees: pred_otdb_id = predecessor_tree['otdb_id'] predecessor_task = self.radbrpc.getTask(otdb_id=pred_otdb_id) if predecessor_task: self.radbrpc.insertTaskPredecessor(task['id'], predecessor_task['id']) self.processPredecessors(predecessor_tree) except Exception as e: logger.error(e) def checkClusterIsCEP4(self, parset): # check storageClusterName for enabled DataProducts # if any storageClusterName is not CEP4, we do not accept this parset keys = ['Output_Correlated', 'Output_IncoherentStokes', 'Output_CoherentStokes', 'Output_InstrumentModel', 'Output_SkyImage', 'Output_Pulsar'] for key in keys: if parset.getBool('Observation.DataProducts.%s.enabled' % key, False): if parset.getString('Observation.DataProducts.%s.storageClusterName' % key, '') != 'CEP4': logger.warn("storageClusterName not CEP4, rejecting specification.") return False logger.info("all enabled storageClusterName's are CEP4, accepting specification.") return True def getNeededResouces(self, specification_tree): replymessage, status = self.rerpc({"specification_tree":specification_tree}, timeout=10) logger.info('getNeededResouces: %s' % replymessage) return replymessage def updateAvailableResources(self, cluster): # find out which resources are available # and what is their capacity # For now, only look at CEP4 storage # Later, also look at stations up/down for short term scheduling #get all active groupnames, find id for cluster group groupnames = self.ssdbrpc.getactivegroupnames() cluster_group_id = next(k for k,v in groupnames.items() if v == cluster) # for CEP4 cluster, do hard codes lookup of first and only node node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0] storage_resources = self.radbrpc.getResources(resource_types='storage', include_availability=True) cep4_storage_resource = next(x for x in storage_resources if 'cep4' in x['name']) active = node_info['statename'] == 'Active' total_capacity = node_info['totalspace'] available_capacity = total_capacity - node_info['usedspace'] logger.info("Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s" % (cep4_storage_resource['name'], cep4_storage_resource['id'], active, available_capacity, total_capacity)) self.radbrpc.updateResourceAvailability(cep4_storage_resource['id'], active=active, available_capacity=available_capacity, total_capacity=total_capacity) def claimResources(self, needed_resources, task): logger.info('claimResources: task %s needed_resources=%s' % (task, needed_resources)) # get the needed resources for the task type needed_resources_for_task_type = needed_resources[task['type']] # get db lists rc_property_types = {rcpt['name']:rcpt['id'] for rcpt in self.radbrpc.getResourceClaimPropertyTypes()} resource_types = {rt['name']:rt['id'] for rt in self.radbrpc.getResourceTypes()} resources = self.radbrpc.getResources() # loop over needed_resources -> resource_type -> claim (and props) # flatten the tree dict to a list of claims (with props) claims = [] for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items(): if resource_type_name in resource_types: logger.info('claimResources: processing resource_type: %s' % resource_type_name) db_resource_type_id = resource_types[resource_type_name] db_resources_for_type = [r for r in resources if r['type_id'] == db_resource_type_id] # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int # that value is the value for the claim needed_claim_value = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, int))) # FIXME: right now we just pick the first resource from the 'cep4' resources. # estimator will deliver this info in the future db_cep4_resources_for_type = [r for r in db_resources_for_type if 'cep4' in r['name'].lower()] if db_cep4_resources_for_type: claim = {'resource_id':db_cep4_resources_for_type[0]['id'], 'starttime':task['starttime'], 'endtime':task['endtime'], 'status':'claimed', 'claim_size':needed_claim_value} #FIXME: find proper way to extend storage time with a month if 'storage' in db_cep4_resources_for_type[0]['name']: claim['endtime'] += timedelta(days=31) # if the needed_claim_for_resource_type dict contains more kvp's, # then the subdict contains groups of properties for the claim if len(needed_claim_for_resource_type) > 1: claim['properties'] = [] needed_prop_groups = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, collections.Iterable))) def processProperties(propertiesDict, sap_nr=None): for prop_type_name, prop_value in propertiesDict.items(): if prop_type_name in rc_property_types: rc_property_type_id = rc_property_types[prop_type_name] property = {'type':rc_property_type_id, 'value':prop_value} if sap_nr is not None: property['sap_nr'] = sap_nr claim['properties'].append(property) else: logger.error('claimResources: unknown prop_type:%s' % prop_type_name) for group_name, needed_prop_group in needed_prop_groups.items(): if group_name == 'saps': for sap_dict in needed_prop_group: processProperties(sap_dict['properties'], sap_dict['sap_nr']) else: processProperties(needed_prop_group) logger.info('claimResources: created claim:%s' % claim) claims.append(claim) else: logger.error('claimResources: unknown resource_type:%s' % resource_type_name) logger.info('claimResources: inserting %d claims in the radb' % len(claims)) claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1, 'anonymous', -1)['ids'] logger.info('claimResources: %d claims were inserted in the radb' % len(claim_ids)) return len(claim_ids) == len(claims), claim_ids
class ResourceAssigner(): def __init__(self, radb_busname=RADB_BUSNAME, radb_servicename=RADB_SERVICENAME, re_busname=RE_BUSNAME, re_servicename=RE_SERVICENAME, ssdb_busname=DEFAULT_SSDB_BUSNAME, ssdb_servicename=DEFAULT_SSDB_SERVICENAME, otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_servicename=DEFAULT_OTDB_SERVICENAME, broker=None): """ ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset. :param radb_busname: busname on which the radb service listens (default: lofar.ra.command) :param radb_servicename: servicename of the radb service (default: RADBService) :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command) :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation) :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system) :param ssdb_servicename: servicename of the radb service (default: SSDBService) :param broker: Valid Qpid broker host (default: None, which means localhost) """ self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker) self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True) self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker) self.otdbrpc = OTDBRPC( busname=otdb_busname, servicename=otdb_servicename, broker=broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now def __enter__(self): """Internal use only. (handles scope 'with')""" self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): """Internal use only. (handles scope 'with')""" self.close() def open(self): """Open rpc connections to radb service and resource estimator service""" self.radbrpc.open() self.rerpc.open() self.otdbrpc.open() self.ssdbrpc.open() def close(self): """Close rpc connections to radb service and resource estimator service""" self.radbrpc.close() self.rerpc.close() self.otdbrpc.close() self.ssdbrpc.close() def doAssignment(self, specification_tree): logger.info('doAssignment: specification_tree=%s' % (specification_tree)) otdb_id = specification_tree['otdb_id'] taskType = specification_tree.get('task_type', '').lower() status = specification_tree.get('state', '').lower() if status not in [ 'approved', 'prescheduled' ]: # cep2 accepts both, cep4 only prescheduled, see below logger.info( 'skipping specification for otdb_id=%s because status=%s', (otdb_id, status)) #parse main parset... mainParset = parameterset(specification_tree['specification']) momId = mainParset.getInt('Observation.momID', -1) try: startTime = datetime.strptime( mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S') endTime = datetime.strptime( mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S') except ValueError: logger.warning( 'cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, )) # insert new task and specification in the radb # any existing specification and task with same otdb_id will be deleted automatically logger.info( 'doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' % (momId, otdb_id, status, taskType, startTime, endTime)) result = self.radbrpc.insertSpecificationAndTask( momId, otdb_id, status, taskType, startTime, endTime, str(mainParset)) if not result['inserted']: logger.error('could not insert specification and task') return specificationId = result['specification_id'] taskId = result['task_id'] logger.info( 'doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId, taskId)) # do not assign resources to task for other clusters than cep4 if not self.checkClusterIsCEP4(mainParset): return if status != 'prescheduled': logger.info( 'skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status)) return needed = self.getNeededResouces(specification_tree) logger.info('doAssignment: getNeededResouces=%s' % (needed, )) if not str(otdb_id) in needed: logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed)) return if not taskType in needed[str(otdb_id)]: logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)])) return # make sure the availability in the radb is up to date # TODO: this should be updated regularly try: self.updateAvailableResources('cep4') except Exception as e: logger.warning("Exception while updating available resources: %s" % str(e)) # claim the resources for this task # during the claim inserts the claims are automatically validated # and if not enough resources are available, then they are put to conflict status # also, if any claim is in conflict state, then the task is put to conflict status as well main_needed = needed[str(otdb_id)] task = self.radbrpc.getTask(taskId) claimed, claim_ids = self.claimResources(main_needed, task) if claimed: conflictingClaims = self.radbrpc.getResourceClaims( task_ids=taskId, status='conflict') if conflictingClaims: logger.warning( 'doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' % (len(conflictingClaims), conflictingClaims)) else: logger.info( 'doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId, )) self.radbrpc.updateTaskAndResourceClaims( taskId, task_status='scheduled', claim_status='allocated') self.processPredecessors(specification_tree) def processPredecessors(self, specification_tree): try: predecessor_trees = specification_tree['predecessors'] if predecessor_trees: otdb_id = specification_tree['otdb_id'] task = self.radbrpc.getTask(otdb_id=otdb_id) for predecessor_tree in predecessor_trees: pred_otdb_id = predecessor_tree['otdb_id'] predecessor_task = self.radbrpc.getTask( otdb_id=pred_otdb_id) if predecessor_task: self.radbrpc.insertTaskPredecessor( task['id'], predecessor_task['id']) self.processPredecessors(predecessor_tree) except Exception as e: logger.error(e) def checkClusterIsCEP4(self, parset): # check storageClusterName for enabled DataProducts # if any storageClusterName is not CEP4, we do not accept this parset keys = [ 'Output_Correlated', 'Output_IncoherentStokes', 'Output_CoherentStokes', 'Output_InstrumentModel', 'Output_SkyImage', 'Output_Pulsar' ] for key in keys: if parset.getBool('Observation.DataProducts.%s.enabled' % key, False): if parset.getString( 'Observation.DataProducts.%s.storageClusterName' % key, '') != 'CEP4': logger.warn( "storageClusterName not CEP4, rejecting specification." ) return False logger.info( "all enabled storageClusterName's are CEP4, accepting specification." ) return True def getNeededResouces(self, specification_tree): replymessage, status = self.rerpc( {"specification_tree": specification_tree}, timeout=10) logger.info('getNeededResouces: %s' % replymessage) return replymessage def updateAvailableResources(self, cluster): # find out which resources are available # and what is their capacity # For now, only look at CEP4 storage # Later, also look at stations up/down for short term scheduling #get all active groupnames, find id for cluster group groupnames = self.ssdbrpc.getactivegroupnames() cluster_group_id = next(k for k, v in groupnames.items() if v == cluster) # for CEP4 cluster, do hard codes lookup of first and only node node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0] storage_resources = self.radbrpc.getResources( resource_types='storage', include_availability=True) cep4_storage_resource = next(x for x in storage_resources if 'cep4' in x['name']) active = node_info['statename'] == 'Active' total_capacity = node_info['totalspace'] available_capacity = total_capacity - node_info['usedspace'] logger.info( "Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s" % (cep4_storage_resource['name'], cep4_storage_resource['id'], active, available_capacity, total_capacity)) self.radbrpc.updateResourceAvailability( cep4_storage_resource['id'], active=active, available_capacity=available_capacity, total_capacity=total_capacity) def claimResources(self, needed_resources, task): logger.info('claimResources: task %s needed_resources=%s' % (task, needed_resources)) # get the needed resources for the task type needed_resources_for_task_type = needed_resources[task['type']] # get db lists rc_property_types = { rcpt['name']: rcpt['id'] for rcpt in self.radbrpc.getResourceClaimPropertyTypes() } resource_types = { rt['name']: rt['id'] for rt in self.radbrpc.getResourceTypes() } resources = self.radbrpc.getResources() # loop over needed_resources -> resource_type -> claim (and props) # flatten the tree dict to a list of claims (with props) claims = [] for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items( ): if resource_type_name in resource_types: logger.info('claimResources: processing resource_type: %s' % resource_type_name) db_resource_type_id = resource_types[resource_type_name] db_resources_for_type = [ r for r in resources if r['type_id'] == db_resource_type_id ] # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int # that value is the value for the claim needed_claim_value = next( (v for k, v in needed_claim_for_resource_type.items() if isinstance(v, int))) # FIXME: right now we just pick the first resource from the 'cep4' resources. # estimator will deliver this info in the future db_cep4_resources_for_type = [ r for r in db_resources_for_type if 'cep4' in r['name'].lower() ] if db_cep4_resources_for_type: claim = { 'resource_id': db_cep4_resources_for_type[0]['id'], 'starttime': task['starttime'], 'endtime': task['endtime'], 'status': 'claimed', 'claim_size': needed_claim_value } #FIXME: find proper way to extend storage time with a month if 'storage' in db_cep4_resources_for_type[0]['name']: claim['endtime'] += timedelta(days=31) # if the needed_claim_for_resource_type dict contains more kvp's, # then the subdict contains groups of properties for the claim if len(needed_claim_for_resource_type) > 1: claim['properties'] = [] needed_prop_groups = next(( v for k, v in needed_claim_for_resource_type.items() if isinstance(v, collections.Iterable))) def processProperties(propertiesDict, sap_nr=None): for prop_type_name, prop_value in propertiesDict.items( ): if prop_type_name in rc_property_types: rc_property_type_id = rc_property_types[ prop_type_name] property = { 'type': rc_property_type_id, 'value': prop_value } if sap_nr is not None: property['sap_nr'] = sap_nr claim['properties'].append(property) else: logger.error( 'claimResources: unknown prop_type:%s' % prop_type_name) for group_name, needed_prop_group in needed_prop_groups.items( ): if group_name == 'saps': for sap_dict in needed_prop_group: processProperties(sap_dict['properties'], sap_dict['sap_nr']) else: processProperties(needed_prop_group) logger.info('claimResources: created claim:%s' % claim) claims.append(claim) else: logger.error('claimResources: unknown resource_type:%s' % resource_type_name) logger.info('claimResources: inserting %d claims in the radb' % len(claims)) claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1, 'anonymous', -1)['ids'] logger.info('claimResources: %d claims were inserted in the radb' % len(claim_ids)) return len(claim_ids) == len(claims), claim_ids