Beispiel #1
0
class RAtoOTDBPropagator():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 radb_broker=None,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 mom_busname=DEFAULT_MOMQUERY_BUSNAME,
                 mom_servicename=DEFAULT_MOMQUERY_SERVICENAME,
                 otdb_broker=None,
                 mom_broker=None,
                 broker=None):
        """
        RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param radb_broker: valid Qpid broker host (default: None, which means localhost)
        :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command)
        :param otdb_servicename: servicename of the OTDB service (default: OTDBService)
        :param otdb_broker: valid Qpid broker host (default: None, which means localhost)
        :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost)
        """
        if broker:
            radb_broker = broker
            otdb_broker = broker
            mom_broker = broker

        self.radbrpc = RADBRPC(
            busname=radb_busname,
            servicename=radb_servicename,
            broker=radb_broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.otdbrpc = OTDBRPC(
            busname=otdb_busname,
            servicename=otdb_servicename,
            broker=otdb_broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.momrpc = MoMQueryRPC(busname=mom_busname,
                                  servicename=mom_servicename,
                                  broker=mom_broker)
        self.translator = RAtoOTDBTranslator()

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.otdbrpc.open()
        self.momrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.otdbrpc.close()
        self.momrpc.close()

    def doTaskConflict(self, otdb_id):
        logger.info('doTaskConflict: otdb_id=%s' % (otdb_id, ))
        if not otdb_id:
            logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' %
                           (otdb_id, ))
            return
        try:
            self.otdbrpc.taskSetStatus(otdb_id, 'conflict')
        except Exception as e:
            logger.error(e)

    def doTaskScheduled(self, ra_id, otdb_id, mom_id):
        try:
            logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' %
                        (ra_id, otdb_id, mom_id))
            if not otdb_id:
                logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' %
                               (otdb_id, ))
                return
            ra_info = self.getRAinfo(ra_id)

            logger.info('RA info for ra_id=%s otdb_id=%s: %s' %
                        (ra_id, otdb_id, ra_info))

            # check if this is a CEP4 task, or an old CEP2 task
            # at this moment the most simple check is to see if RA claimed (CEP4) storage
            # TODO: do proper check on cluster/storage/etc
            if not ra_info['storage']:
                logger.info(
                    "No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update."
                    % (ra_id, otdb_id))
                return

            #get mom project name
            try:
                project = self.momrpc.getProjectDetails(mom_id)
                logger.info(project)
                project_name = "_".join(
                    project[str(mom_id)]['project_name'].split())
            except (RPCException, KeyError) as e:
                logger.error(
                    'Could not get project name from MoM for mom_id %s: %s' %
                    (mom_id, str(e)))
                logger.info('Using \'unknown\' as project name.')
                project_name = 'unknown'

            otdb_info = self.translator.CreateParset(otdb_id, ra_info,
                                                     project_name)
            logger.debug("Parset info for OTDB: %s" % otdb_info)
            self.setOTDBinfo(otdb_id, otdb_info, 'scheduled')
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)

    def getRAinfo(self, ra_id):
        info = {}
        info["storage"] = {}
        task = self.radbrpc.getTask(ra_id)
        claims = self.radbrpc.getResourceClaims(task_ids=ra_id,
                                                extended=True,
                                                include_properties=True)
        for claim in claims:
            logger.debug("Processing claim: %s" % claim)
            if claim['resource_type_name'] == 'storage':
                info['storage'] = claim
        info["starttime"] = task["starttime"]
        info["endtime"] = task["endtime"]
        info["status"] = task["status"]
        return info

    def setOTDBinfo(self, otdb_id, otdb_info, otdb_status):
        try:
            logger.info('Setting specticication for otdb_id %s: %s' %
                        (otdb_id, otdb_info))
            self.otdbrpc.taskSetSpecification(otdb_id, otdb_info)
            self.otdbrpc.taskPrepareForScheduling(
                otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"],
                otdb_info["LOFAR.ObsSW.Observation.stopTime"])
            logger.info('Setting status (%s) for otdb_id %s' %
                        (otdb_status, otdb_id))
            self.otdbrpc.taskSetStatus(otdb_id, otdb_status)
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)
Beispiel #2
0
class RATaskSpecified(OTDBBusListener):
    def __init__(self,
                   otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME,
                   otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT,
                   otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                   otdb_service_subject=DEFAULT_OTDB_SERVICENAME,
                   notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME,
                   notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT,
                   broker=None, **kwargs):
        super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs)
        self.otdbrpc  = OTDBRPC(busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject))

    def start_listening(self, **kwargs):
        self.otdbrpc.open()
        self.send_bus.open()
        super(RATaskSpecified, self).start_listening(**kwargs)

    def stop_listening(self, **kwargs):
        super(RATaskSpecified, self).stop_listening(**kwargs)
        self.send_bus.close()
        self.otdbrpc.close()

    def get_predecessors(self, parset):
        """ Extract the list of predecessor obs IDs from the given parset. """

        key = PARSET_PREFIX + "Observation.Scheduler.predecessors"
        stringlist = PyParameterValue(str(parset[key]), True).getStringVector()

        # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM
        # 'S' we can probably ignore? Might be only internal in the Scheduler?
        result = []
        for s in stringlist:
            try: # Made the source a string for readability, but it's not efficient
                if s.startswith('M'):
                    result.append({'source': 'mom', 'id': int(s[1:])})
                elif s.startswith('L') or s.startswith('T'):
                    result.append({'source': 'otdb', 'id': int(s[1:])})
                else: # 'S'
                    logger.info("found a predecessor ID I can't handle: %s" % s)
                    result.append({'source': 'other', 'id': int(s[1:])})
            except ValueError:
                logger.warning("found a predecessor ID that I can't parse %s" % s)
        return result

    def get_specification_with_predecessors(self, id, id_source, state, found_parsets):
        logger.info("Processing ID %s from %s" % (id, id_source))
        if id_source == "other":
            return None
        elif id_source == "mom":
            otdb_id = self.otdbrpc.taskGetIDs( mom_id=id )['otdb_id']
        elif id_source == "otdb":
            otdb_id = id
        else:
            logger.warning("Error in understanding id %s", id)
      
        logger.info("Processing OTDB ID %s", otdb_id)
        result = {"otdb_id": otdb_id, "predecessors": []} 
        if state:
            result["state"] = state # TODO should be status not state
        else:
            pass #otdbrpc.taskGetStatus not implemented and maybe not needed?
          
        if otdb_id in found_parsets:
            parset = found_parsets[otdb_id]
        else:
            parset = self.otdbrpc.taskGetSpecification( otdb_id=otdb_id )['specification']
            found_parsets[otdb_id] = parset
          
        logger.info("parset [%s]: %s" % (otdb_id, parset))
        result['specification'] = resourceIndicatorsFromParset(parset)
      
        key = PARSET_PREFIX + "Observation.processSubtype"
        result['task_type'], result['task_subtype'] = convertSchedulerProcessSubtype(parset.get(key, ""))

        logger.info("Processing predecessors")
        predecessor_ids = self.get_predecessors(parset)
        for id in predecessor_ids:
            predecessor_result = self.get_specification_with_predecessors(id['id'], id['source'], "", found_parsets)
            if predecessor_result:
                result["predecessors"].append(predecessor_result)
        return result

    def onObservationApproved(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'approved')

    def onObservationPrescheduled(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'prescheduled')

    def createAndSendSpecifiedTask(self, main_id, status):
        # Construct root node of tree
        resultTree = self.get_specification_with_predecessors(main_id, "otdb", status, {})
        logger.info("Sending result: %s" % resultTree)

        # Put result on bus
        msg = EventMessage(content=resultTree)
        self.send_bus.send(msg)
        logger.info("Result sent")
Beispiel #3
0
class RATaskSpecified(OTDBBusListener):
    def __init__(
            self,
            otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME,
            otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT,
            otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
            otdb_service_subject=DEFAULT_OTDB_SERVICENAME,
            notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME,
            notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT,
            broker=None,
            **kwargs):
        super(RATaskSpecified,
              self).__init__(busname=otdb_notification_busname,
                             subject=otdb_notification_subject,
                             **kwargs)
        self.otdbrpc = OTDBRPC(
            busname=otdb_service_busname,
            servicename=otdb_service_subject,
            broker=broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.send_bus = ToBus("%s/%s" %
                              (notification_busname, notification_subject))

    def start_listening(self, **kwargs):
        self.otdbrpc.open()
        self.send_bus.open()
        super(RATaskSpecified, self).start_listening(**kwargs)

    def stop_listening(self, **kwargs):
        super(RATaskSpecified, self).stop_listening(**kwargs)
        self.send_bus.close()
        self.otdbrpc.close()

    def get_predecessors(self, parset):
        """ Extract the list of predecessor obs IDs from the given parset. """

        key = PARSET_PREFIX + "Observation.Scheduler.predecessors"
        stringlist = PyParameterValue(str(parset[key]), True).getStringVector()

        # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM
        # 'S' we can probably ignore? Might be only internal in the Scheduler?
        result = []
        for s in stringlist:
            try:  # Made the source a string for readability, but it's not efficient
                if s.startswith('M'):
                    result.append({'source': 'mom', 'id': int(s[1:])})
                elif s.startswith('L') or s.startswith('T'):
                    result.append({'source': 'otdb', 'id': int(s[1:])})
                else:  # 'S'
                    logger.info("found a predecessor ID I can't handle: %s" %
                                s)
                    result.append({'source': 'other', 'id': int(s[1:])})
            except ValueError:
                logger.warning("found a predecessor ID that I can't parse %s" %
                               s)
        return result

    def get_specification_with_predecessors(self, id, id_source, state,
                                            found_parsets):
        logger.info("Processing ID %s from %s" % (id, id_source))
        if id_source == "other":
            return None
        elif id_source == "mom":
            otdb_id = self.otdbrpc.taskGetIDs(mom_id=id)['otdb_id']
        elif id_source == "otdb":
            otdb_id = id
        else:
            logger.warning("Error in understanding id %s", id)

        logger.info("Processing OTDB ID %s", otdb_id)
        result = {"otdb_id": otdb_id, "predecessors": []}
        if state:
            result["state"] = state  # TODO should be status not state
        else:
            pass  #otdbrpc.taskGetStatus not implemented and maybe not needed?

        if otdb_id in found_parsets:
            parset = found_parsets[otdb_id]
        else:
            parset = self.otdbrpc.taskGetSpecification(
                otdb_id=otdb_id)['specification']
            found_parsets[otdb_id] = parset

        logger.info("parset [%s]: %s" % (otdb_id, parset))
        result['specification'] = resourceIndicatorsFromParset(parset)

        key = PARSET_PREFIX + "Observation.processSubtype"
        result['task_type'], result[
            'task_subtype'] = convertSchedulerProcessSubtype(
                parset.get(key, ""))

        logger.info("Processing predecessors")
        predecessor_ids = self.get_predecessors(parset)
        for id in predecessor_ids:
            predecessor_result = self.get_specification_with_predecessors(
                id['id'], id['source'], "", found_parsets)
            if predecessor_result:
                result["predecessors"].append(predecessor_result)
        return result

    def onObservationApproved(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'approved')

    def onObservationPrescheduled(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'prescheduled')

    def createAndSendSpecifiedTask(self, main_id, status):
        # Construct root node of tree
        resultTree = self.get_specification_with_predecessors(
            main_id, "otdb", status, {})
        logger.info("Sending result: %s" % resultTree)

        # Put result on bus
        msg = EventMessage(content=resultTree)
        self.send_bus.send(msg)
        logger.info("Result sent")
Beispiel #4
0
class ResourceAssigner():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 re_busname=RE_BUSNAME,
                 re_servicename=RE_SERVICENAME,
                 ssdb_busname=DEFAULT_SSDB_BUSNAME,
                 ssdb_servicename=DEFAULT_SSDB_SERVICENAME,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 broker=None):
        """
        ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command)
        :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation)
        :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system)
        :param ssdb_servicename: servicename of the radb service (default: SSDBService)
        :param broker: Valid Qpid broker host (default: None, which means localhost)
        """
        self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker)
        self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True)
        self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker)
        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.rerpc.open()
        self.otdbrpc.open()
        self.ssdbrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.rerpc.close()
        self.otdbrpc.close()
        self.ssdbrpc.close()

    def doAssignment(self, specification_tree):
        logger.info('doAssignment: specification_tree=%s' % (specification_tree))

        otdb_id = specification_tree['otdb_id']
        taskType = specification_tree.get('task_type', '').lower()
        status = specification_tree.get('state', '').lower()

        if status not in ['approved', 'prescheduled']: # cep2 accepts both, cep4 only prescheduled, see below
            logger.info('skipping specification for otdb_id=%s because status=%s', (otdb_id, status))

        #parse main parset...
        mainParset = parameterset(specification_tree['specification'])

        momId = mainParset.getInt('Observation.momID', -1)
        try:
            startTime = datetime.strptime(mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S')
            endTime = datetime.strptime(mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S')
        except ValueError:
            logger.warning('cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, ))

        # insert new task and specification in the radb
        # any existing specification and task with same otdb_id will be deleted automatically
        logger.info('doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' %
                    (momId, otdb_id, status, taskType, startTime, endTime))
        result = self.radbrpc.insertSpecificationAndTask(momId, otdb_id, status, taskType, startTime, endTime, str(mainParset))

        if not result['inserted']:
            logger.error('could not insert specification and task')
            return

        specificationId = result['specification_id']
        taskId = result['task_id']
        logger.info('doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId,taskId))

        # do not assign resources to task for other clusters than cep4
        if not self.checkClusterIsCEP4(mainParset):
            return

        if status != 'prescheduled':
            logger.info('skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status))
            return

        needed = self.getNeededResouces(specification_tree)
        logger.info('doAssignment: getNeededResouces=%s' % (needed,))

        if not str(otdb_id) in needed:
            logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed))
            return

        if not taskType in needed[str(otdb_id)]:
            logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)]))
            return

        # make sure the availability in the radb is up to date
        # TODO: this should be updated regularly
        try:
            self.updateAvailableResources('cep4')
        except Exception as e:
            logger.warning("Exception while updating available resources: %s" % str(e))

        # claim the resources for this task
        # during the claim inserts the claims are automatically validated
        # and if not enough resources are available, then they are put to conflict status
        # also, if any claim is in conflict state, then the task is put to conflict status as well
        main_needed = needed[str(otdb_id)]
        task = self.radbrpc.getTask(taskId)
        claimed, claim_ids = self.claimResources(main_needed, task)
        if claimed:
            conflictingClaims = self.radbrpc.getResourceClaims(task_ids=taskId, status='conflict')

            if conflictingClaims:
                logger.warning('doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' %
                               (len(conflictingClaims), conflictingClaims))
            else:
                logger.info('doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId,))
                self.radbrpc.updateTaskAndResourceClaims(taskId, task_status='scheduled', claim_status='allocated')

        self.processPredecessors(specification_tree)

    def processPredecessors(self, specification_tree):
        try:
            predecessor_trees = specification_tree['predecessors']

            if predecessor_trees:
                otdb_id = specification_tree['otdb_id']
                task = self.radbrpc.getTask(otdb_id=otdb_id)

                for predecessor_tree in predecessor_trees:
                    pred_otdb_id = predecessor_tree['otdb_id']
                    predecessor_task = self.radbrpc.getTask(otdb_id=pred_otdb_id)
                    if predecessor_task:
                        self.radbrpc.insertTaskPredecessor(task['id'], predecessor_task['id'])
                    self.processPredecessors(predecessor_tree)

        except Exception as e:
            logger.error(e)

    def checkClusterIsCEP4(self, parset):
        # check storageClusterName for enabled DataProducts
        # if any storageClusterName is not CEP4, we do not accept this parset
        keys = ['Output_Correlated',
                'Output_IncoherentStokes',
                'Output_CoherentStokes',
                'Output_InstrumentModel',
                'Output_SkyImage',
                'Output_Pulsar']
        for key in keys:
            if parset.getBool('Observation.DataProducts.%s.enabled' % key, False):
                if parset.getString('Observation.DataProducts.%s.storageClusterName' % key, '') != 'CEP4':
                    logger.warn("storageClusterName not CEP4, rejecting specification.")
                    return False

        logger.info("all enabled storageClusterName's are CEP4, accepting specification.")
        return True


    def getNeededResouces(self, specification_tree):
        replymessage, status = self.rerpc({"specification_tree":specification_tree}, timeout=10)
        logger.info('getNeededResouces: %s' % replymessage)
        return replymessage

    def updateAvailableResources(self, cluster):
        # find out which resources are available
        # and what is their capacity
        # For now, only look at CEP4 storage
        # Later, also look at stations up/down for short term scheduling

        #get all active groupnames, find id for cluster group
        groupnames = self.ssdbrpc.getactivegroupnames()
        cluster_group_id = next(k for k,v in groupnames.items() if v == cluster)

        # for CEP4 cluster, do hard codes lookup of first and only node
        node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0]

        storage_resources = self.radbrpc.getResources(resource_types='storage', include_availability=True)
        cep4_storage_resource = next(x for x in storage_resources if 'cep4' in x['name'])
        active = node_info['statename'] == 'Active'
        total_capacity = node_info['totalspace']
        available_capacity = total_capacity - node_info['usedspace']

        logger.info("Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s" %
                    (cep4_storage_resource['name'], cep4_storage_resource['id'], active, available_capacity, total_capacity))

        self.radbrpc.updateResourceAvailability(cep4_storage_resource['id'],
                                                active=active,
                                                available_capacity=available_capacity,
                                                total_capacity=total_capacity)


    def claimResources(self, needed_resources, task):
        logger.info('claimResources: task %s needed_resources=%s' % (task, needed_resources))

        # get the needed resources for the task type
        needed_resources_for_task_type = needed_resources[task['type']]

        # get db lists
        rc_property_types = {rcpt['name']:rcpt['id'] for rcpt in self.radbrpc.getResourceClaimPropertyTypes()}
        resource_types = {rt['name']:rt['id'] for rt in self.radbrpc.getResourceTypes()}
        resources = self.radbrpc.getResources()

        # loop over needed_resources -> resource_type -> claim (and props)
        # flatten the tree dict to a list of claims (with props)
        claims = []
        for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items():
            if resource_type_name in resource_types:
                logger.info('claimResources: processing resource_type: %s' % resource_type_name)
                db_resource_type_id = resource_types[resource_type_name]
                db_resources_for_type = [r for r in resources if r['type_id'] == db_resource_type_id]

                # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int
                # that value is the value for the claim
                needed_claim_value = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, int)))

                # FIXME: right now we just pick the first resource from the 'cep4' resources.
                # estimator will deliver this info in the future
                db_cep4_resources_for_type = [r for r in db_resources_for_type if 'cep4' in r['name'].lower()]

                if db_cep4_resources_for_type:
                    claim = {'resource_id':db_cep4_resources_for_type[0]['id'],
                            'starttime':task['starttime'],
                            'endtime':task['endtime'],
                            'status':'claimed',
                            'claim_size':needed_claim_value}

                    #FIXME: find proper way to extend storage time with a month
                    if 'storage' in db_cep4_resources_for_type[0]['name']:
                        claim['endtime'] += timedelta(days=31)

                    # if the needed_claim_for_resource_type dict contains more kvp's,
                    # then the subdict contains groups of properties for the claim
                    if len(needed_claim_for_resource_type) > 1:
                        claim['properties'] = []
                        needed_prop_groups = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, collections.Iterable)))

                        def processProperties(propertiesDict, sap_nr=None):
                            for prop_type_name, prop_value in propertiesDict.items():
                                if prop_type_name in rc_property_types:
                                    rc_property_type_id = rc_property_types[prop_type_name]
                                    property = {'type':rc_property_type_id, 'value':prop_value}
                                    if sap_nr is not None:
                                        property['sap_nr'] = sap_nr
                                    claim['properties'].append(property)
                                else:
                                    logger.error('claimResources: unknown prop_type:%s' % prop_type_name)

                        for group_name, needed_prop_group in needed_prop_groups.items():
                            if group_name == 'saps':
                                for sap_dict in needed_prop_group:
                                    processProperties(sap_dict['properties'], sap_dict['sap_nr'])
                            else:
                                processProperties(needed_prop_group)

                    logger.info('claimResources: created claim:%s' % claim)
                    claims.append(claim)
            else:
                logger.error('claimResources: unknown resource_type:%s' % resource_type_name)

        logger.info('claimResources: inserting %d claims in the radb' % len(claims))
        claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1, 'anonymous', -1)['ids']
        logger.info('claimResources: %d claims were inserted in the radb' % len(claim_ids))
        return len(claim_ids) == len(claims), claim_ids
Beispiel #5
0
class RAtoOTDBPropagator():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 radb_broker=None,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 mom_busname=DEFAULT_MOMQUERY_BUSNAME,
                 mom_servicename=DEFAULT_MOMQUERY_SERVICENAME,
                 otdb_broker=None,
                 mom_broker=None,
                 broker=None):
        """
        RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param radb_broker: valid Qpid broker host (default: None, which means localhost)
        :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command)
        :param otdb_servicename: servicename of the OTDB service (default: OTDBService)
        :param otdb_broker: valid Qpid broker host (default: None, which means localhost)
        :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost)
        """
        if broker:
            radb_broker = broker
            otdb_broker = broker
            mom_broker  = broker

        self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker)
        self.translator = RAtoOTDBTranslator()

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.otdbrpc.open()
        self.momrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.otdbrpc.close()
        self.momrpc.close()

    def doTaskConflict(self, otdb_id):
        logger.info('doTaskConflict: otdb_id=%s' % (otdb_id,))
        if not otdb_id:
            logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id,))
            return
        try:
            self.otdbrpc.taskSetStatus(otdb_id, 'conflict')
        except Exception as e:
            logger.error(e)

    def doTaskScheduled(self, ra_id, otdb_id, mom_id):
        try:
            logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id))
            if not otdb_id:
                logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id,))
                return
            ra_info = self.getRAinfo(ra_id)

            logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info))

            # check if this is a CEP4 task, or an old CEP2 task
            # at this moment the most simple check is to see if RA claimed (CEP4) storage
            # TODO: do proper check on cluster/storage/etc
            if not ra_info['storage']:
                logger.info("No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id))
                return

            #get mom project name
            try:
                project = self.momrpc.getProjectDetails(mom_id)
                logger.info(project)
                project_name = "_".join(project[str(mom_id)]['project_name'].split())
            except (RPCException, KeyError) as e:
                logger.error('Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e)))
                logger.info('Using \'unknown\' as project name.')
                project_name = 'unknown'

            otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name)
            logger.debug("Parset info for OTDB: %s" %otdb_info)
            self.setOTDBinfo(otdb_id, otdb_info, 'scheduled')
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)

    def getRAinfo(self, ra_id):
        info = {}
        info["storage"] = {}
        task = self.radbrpc.getTask(ra_id)
        claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True)
        for claim in claims:
            logger.debug("Processing claim: %s" % claim)
            if claim['resource_type_name'] == 'storage':
                info['storage'] = claim
        info["starttime"] = task["starttime"]
        info["endtime"] = task["endtime"]
        info["status"] = task["status"]
        return info

    def setOTDBinfo(self, otdb_id, otdb_info, otdb_status):
        try:
            logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info))
            self.otdbrpc.taskSetSpecification(otdb_id, otdb_info)
            self.otdbrpc.taskPrepareForScheduling(otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"])
            logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id))
            self.otdbrpc.taskSetStatus(otdb_id, otdb_status)
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)
Beispiel #6
0
class ResourceAssigner():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 re_busname=RE_BUSNAME,
                 re_servicename=RE_SERVICENAME,
                 ssdb_busname=DEFAULT_SSDB_BUSNAME,
                 ssdb_servicename=DEFAULT_SSDB_SERVICENAME,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 broker=None):
        """
        ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command)
        :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation)
        :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system)
        :param ssdb_servicename: servicename of the radb service (default: SSDBService)
        :param broker: Valid Qpid broker host (default: None, which means localhost)
        """
        self.radbrpc = RARPC(servicename=radb_servicename,
                             busname=radb_busname,
                             broker=broker)
        self.rerpc = RPC(re_servicename,
                         busname=re_busname,
                         broker=broker,
                         ForwardExceptions=True)
        self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename,
                               busname=ssdb_busname,
                               broker=broker)
        self.otdbrpc = OTDBRPC(
            busname=otdb_busname, servicename=otdb_servicename, broker=broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.rerpc.open()
        self.otdbrpc.open()
        self.ssdbrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.rerpc.close()
        self.otdbrpc.close()
        self.ssdbrpc.close()

    def doAssignment(self, specification_tree):
        logger.info('doAssignment: specification_tree=%s' %
                    (specification_tree))

        otdb_id = specification_tree['otdb_id']
        taskType = specification_tree.get('task_type', '').lower()
        status = specification_tree.get('state', '').lower()

        if status not in [
                'approved', 'prescheduled'
        ]:  # cep2 accepts both, cep4 only prescheduled, see below
            logger.info(
                'skipping specification for otdb_id=%s because status=%s',
                (otdb_id, status))

        #parse main parset...
        mainParset = parameterset(specification_tree['specification'])

        momId = mainParset.getInt('Observation.momID', -1)
        try:
            startTime = datetime.strptime(
                mainParset.getString('Observation.startTime'),
                '%Y-%m-%d %H:%M:%S')
            endTime = datetime.strptime(
                mainParset.getString('Observation.stopTime'),
                '%Y-%m-%d %H:%M:%S')
        except ValueError:
            logger.warning(
                'cannot parse for start/end time from specification for otdb_id=%s',
                (otdb_id, ))

        # insert new task and specification in the radb
        # any existing specification and task with same otdb_id will be deleted automatically
        logger.info(
            'doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s'
            % (momId, otdb_id, status, taskType, startTime, endTime))
        result = self.radbrpc.insertSpecificationAndTask(
            momId, otdb_id, status, taskType, startTime, endTime,
            str(mainParset))

        if not result['inserted']:
            logger.error('could not insert specification and task')
            return

        specificationId = result['specification_id']
        taskId = result['task_id']
        logger.info(
            'doAssignment: inserted specification (id=%s) and task (id=%s)' %
            (specificationId, taskId))

        # do not assign resources to task for other clusters than cep4
        if not self.checkClusterIsCEP4(mainParset):
            return

        if status != 'prescheduled':
            logger.info(
                'skipping resource assignment for CEP4 task otdb_id=%s because status=%s'
                % (otdb_id, status))
            return

        needed = self.getNeededResouces(specification_tree)
        logger.info('doAssignment: getNeededResouces=%s' % (needed, ))

        if not str(otdb_id) in needed:
            logger.error("no otdb_id %s found in estimator results %s" %
                         (otdb_id, needed))
            return

        if not taskType in needed[str(otdb_id)]:
            logger.error("no task type %s found in estimator results %s" %
                         (taskType, needed[str(otdb_id)]))
            return

        # make sure the availability in the radb is up to date
        # TODO: this should be updated regularly
        try:
            self.updateAvailableResources('cep4')
        except Exception as e:
            logger.warning("Exception while updating available resources: %s" %
                           str(e))

        # claim the resources for this task
        # during the claim inserts the claims are automatically validated
        # and if not enough resources are available, then they are put to conflict status
        # also, if any claim is in conflict state, then the task is put to conflict status as well
        main_needed = needed[str(otdb_id)]
        task = self.radbrpc.getTask(taskId)
        claimed, claim_ids = self.claimResources(main_needed, task)
        if claimed:
            conflictingClaims = self.radbrpc.getResourceClaims(
                task_ids=taskId, status='conflict')

            if conflictingClaims:
                logger.warning(
                    'doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s'
                    % (len(conflictingClaims), conflictingClaims))
            else:
                logger.info(
                    'doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled'
                    % (taskId, ))
                self.radbrpc.updateTaskAndResourceClaims(
                    taskId, task_status='scheduled', claim_status='allocated')

        self.processPredecessors(specification_tree)

    def processPredecessors(self, specification_tree):
        try:
            predecessor_trees = specification_tree['predecessors']

            if predecessor_trees:
                otdb_id = specification_tree['otdb_id']
                task = self.radbrpc.getTask(otdb_id=otdb_id)

                for predecessor_tree in predecessor_trees:
                    pred_otdb_id = predecessor_tree['otdb_id']
                    predecessor_task = self.radbrpc.getTask(
                        otdb_id=pred_otdb_id)
                    if predecessor_task:
                        self.radbrpc.insertTaskPredecessor(
                            task['id'], predecessor_task['id'])
                    self.processPredecessors(predecessor_tree)

        except Exception as e:
            logger.error(e)

    def checkClusterIsCEP4(self, parset):
        # check storageClusterName for enabled DataProducts
        # if any storageClusterName is not CEP4, we do not accept this parset
        keys = [
            'Output_Correlated', 'Output_IncoherentStokes',
            'Output_CoherentStokes', 'Output_InstrumentModel',
            'Output_SkyImage', 'Output_Pulsar'
        ]
        for key in keys:
            if parset.getBool('Observation.DataProducts.%s.enabled' % key,
                              False):
                if parset.getString(
                        'Observation.DataProducts.%s.storageClusterName' % key,
                        '') != 'CEP4':
                    logger.warn(
                        "storageClusterName not CEP4, rejecting specification."
                    )
                    return False

        logger.info(
            "all enabled storageClusterName's are CEP4, accepting specification."
        )
        return True

    def getNeededResouces(self, specification_tree):
        replymessage, status = self.rerpc(
            {"specification_tree": specification_tree}, timeout=10)
        logger.info('getNeededResouces: %s' % replymessage)
        return replymessage

    def updateAvailableResources(self, cluster):
        # find out which resources are available
        # and what is their capacity
        # For now, only look at CEP4 storage
        # Later, also look at stations up/down for short term scheduling

        #get all active groupnames, find id for cluster group
        groupnames = self.ssdbrpc.getactivegroupnames()
        cluster_group_id = next(k for k, v in groupnames.items()
                                if v == cluster)

        # for CEP4 cluster, do hard codes lookup of first and only node
        node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0]

        storage_resources = self.radbrpc.getResources(
            resource_types='storage', include_availability=True)
        cep4_storage_resource = next(x for x in storage_resources
                                     if 'cep4' in x['name'])
        active = node_info['statename'] == 'Active'
        total_capacity = node_info['totalspace']
        available_capacity = total_capacity - node_info['usedspace']

        logger.info(
            "Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s"
            % (cep4_storage_resource['name'], cep4_storage_resource['id'],
               active, available_capacity, total_capacity))

        self.radbrpc.updateResourceAvailability(
            cep4_storage_resource['id'],
            active=active,
            available_capacity=available_capacity,
            total_capacity=total_capacity)

    def claimResources(self, needed_resources, task):
        logger.info('claimResources: task %s needed_resources=%s' %
                    (task, needed_resources))

        # get the needed resources for the task type
        needed_resources_for_task_type = needed_resources[task['type']]

        # get db lists
        rc_property_types = {
            rcpt['name']: rcpt['id']
            for rcpt in self.radbrpc.getResourceClaimPropertyTypes()
        }
        resource_types = {
            rt['name']: rt['id']
            for rt in self.radbrpc.getResourceTypes()
        }
        resources = self.radbrpc.getResources()

        # loop over needed_resources -> resource_type -> claim (and props)
        # flatten the tree dict to a list of claims (with props)
        claims = []
        for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items(
        ):
            if resource_type_name in resource_types:
                logger.info('claimResources: processing resource_type: %s' %
                            resource_type_name)
                db_resource_type_id = resource_types[resource_type_name]
                db_resources_for_type = [
                    r for r in resources if r['type_id'] == db_resource_type_id
                ]

                # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int
                # that value is the value for the claim
                needed_claim_value = next(
                    (v for k, v in needed_claim_for_resource_type.items()
                     if isinstance(v, int)))

                # FIXME: right now we just pick the first resource from the 'cep4' resources.
                # estimator will deliver this info in the future
                db_cep4_resources_for_type = [
                    r for r in db_resources_for_type
                    if 'cep4' in r['name'].lower()
                ]

                if db_cep4_resources_for_type:
                    claim = {
                        'resource_id': db_cep4_resources_for_type[0]['id'],
                        'starttime': task['starttime'],
                        'endtime': task['endtime'],
                        'status': 'claimed',
                        'claim_size': needed_claim_value
                    }

                    #FIXME: find proper way to extend storage time with a month
                    if 'storage' in db_cep4_resources_for_type[0]['name']:
                        claim['endtime'] += timedelta(days=31)

                    # if the needed_claim_for_resource_type dict contains more kvp's,
                    # then the subdict contains groups of properties for the claim
                    if len(needed_claim_for_resource_type) > 1:
                        claim['properties'] = []
                        needed_prop_groups = next((
                            v
                            for k, v in needed_claim_for_resource_type.items()
                            if isinstance(v, collections.Iterable)))

                        def processProperties(propertiesDict, sap_nr=None):
                            for prop_type_name, prop_value in propertiesDict.items(
                            ):
                                if prop_type_name in rc_property_types:
                                    rc_property_type_id = rc_property_types[
                                        prop_type_name]
                                    property = {
                                        'type': rc_property_type_id,
                                        'value': prop_value
                                    }
                                    if sap_nr is not None:
                                        property['sap_nr'] = sap_nr
                                    claim['properties'].append(property)
                                else:
                                    logger.error(
                                        'claimResources: unknown prop_type:%s'
                                        % prop_type_name)

                        for group_name, needed_prop_group in needed_prop_groups.items(
                        ):
                            if group_name == 'saps':
                                for sap_dict in needed_prop_group:
                                    processProperties(sap_dict['properties'],
                                                      sap_dict['sap_nr'])
                            else:
                                processProperties(needed_prop_group)

                    logger.info('claimResources: created claim:%s' % claim)
                    claims.append(claim)
            else:
                logger.error('claimResources: unknown resource_type:%s' %
                             resource_type_name)

        logger.info('claimResources: inserting %d claims in the radb' %
                    len(claims))
        claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1,
                                                      'anonymous', -1)['ids']
        logger.info('claimResources: %d claims were inserted in the radb' %
                    len(claim_ids))
        return len(claim_ids) == len(claims), claim_ids