Ejemplo n.º 1
0
class RAtoOTDBPropagator():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 radb_broker=None,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 mom_busname=DEFAULT_MOMQUERY_BUSNAME,
                 mom_servicename=DEFAULT_MOMQUERY_SERVICENAME,
                 otdb_broker=None,
                 mom_broker=None,
                 broker=None):
        """
        RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param radb_broker: valid Qpid broker host (default: None, which means localhost)
        :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command)
        :param otdb_servicename: servicename of the OTDB service (default: OTDBService)
        :param otdb_broker: valid Qpid broker host (default: None, which means localhost)
        :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost)
        """
        if broker:
            radb_broker = broker
            otdb_broker = broker
            mom_broker = broker

        self.radbrpc = RADBRPC(
            busname=radb_busname,
            servicename=radb_servicename,
            broker=radb_broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.otdbrpc = OTDBRPC(
            busname=otdb_busname,
            servicename=otdb_servicename,
            broker=otdb_broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.momrpc = MoMQueryRPC(busname=mom_busname,
                                  servicename=mom_servicename,
                                  broker=mom_broker)
        self.translator = RAtoOTDBTranslator()

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.otdbrpc.open()
        self.momrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.otdbrpc.close()
        self.momrpc.close()

    def doTaskConflict(self, otdb_id):
        logger.info('doTaskConflict: otdb_id=%s' % (otdb_id, ))
        if not otdb_id:
            logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' %
                           (otdb_id, ))
            return
        try:
            self.otdbrpc.taskSetStatus(otdb_id, 'conflict')
        except Exception as e:
            logger.error(e)

    def doTaskScheduled(self, ra_id, otdb_id, mom_id):
        try:
            logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' %
                        (ra_id, otdb_id, mom_id))
            if not otdb_id:
                logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' %
                               (otdb_id, ))
                return
            ra_info = self.getRAinfo(ra_id)

            logger.info('RA info for ra_id=%s otdb_id=%s: %s' %
                        (ra_id, otdb_id, ra_info))

            # check if this is a CEP4 task, or an old CEP2 task
            # at this moment the most simple check is to see if RA claimed (CEP4) storage
            # TODO: do proper check on cluster/storage/etc
            if not ra_info['storage']:
                logger.info(
                    "No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update."
                    % (ra_id, otdb_id))
                return

            #get mom project name
            try:
                project = self.momrpc.getProjectDetails(mom_id)
                logger.info(project)
                project_name = "_".join(
                    project[str(mom_id)]['project_name'].split())
            except (RPCException, KeyError) as e:
                logger.error(
                    'Could not get project name from MoM for mom_id %s: %s' %
                    (mom_id, str(e)))
                logger.info('Using \'unknown\' as project name.')
                project_name = 'unknown'

            otdb_info = self.translator.CreateParset(otdb_id, ra_info,
                                                     project_name)
            logger.debug("Parset info for OTDB: %s" % otdb_info)
            self.setOTDBinfo(otdb_id, otdb_info, 'scheduled')
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)

    def getRAinfo(self, ra_id):
        info = {}
        info["storage"] = {}
        task = self.radbrpc.getTask(ra_id)
        claims = self.radbrpc.getResourceClaims(task_ids=ra_id,
                                                extended=True,
                                                include_properties=True)
        for claim in claims:
            logger.debug("Processing claim: %s" % claim)
            if claim['resource_type_name'] == 'storage':
                info['storage'] = claim
        info["starttime"] = task["starttime"]
        info["endtime"] = task["endtime"]
        info["status"] = task["status"]
        return info

    def setOTDBinfo(self, otdb_id, otdb_info, otdb_status):
        try:
            logger.info('Setting specticication for otdb_id %s: %s' %
                        (otdb_id, otdb_info))
            self.otdbrpc.taskSetSpecification(otdb_id, otdb_info)
            self.otdbrpc.taskPrepareForScheduling(
                otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"],
                otdb_info["LOFAR.ObsSW.Observation.stopTime"])
            logger.info('Setting status (%s) for otdb_id %s' %
                        (otdb_status, otdb_id))
            self.otdbrpc.taskSetStatus(otdb_id, otdb_status)
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)
Ejemplo n.º 2
0
class OTDBtoRATaskStatusPropagator(OTDBBusListener):
    def __init__(self,
                 otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME,
                 otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT,
                 radb_busname=DEFAULT_RADB_BUSNAME,
                 radb_servicename=DEFAULT_RADB_SERVICENAME,
                 broker=None, **kwargs):
        super(OTDBtoRATaskStatusPropagator, self).__init__(busname=otdb_notification_busname,
                                                           subject=otdb_notification_subject,
                                                           broker=broker,
                                                           **kwargs)

        self.radb = RARPC(busname=radb_busname, servicename=radb_servicename, broker=broker)

    def start_listening(self, **kwargs):
        self.radb.open()
        super(OTDBtoRATaskStatusPropagator, self).start_listening(**kwargs)

    def stop_listening(self, **kwargs):
        self.radb.close()
        super(OTDBtoRATaskStatusPropagator, self).stop_listening(**kwargs)

    def _update_radb_task_status(self, otdb_id, task_status):
        logger.info("updating radb-task with otdb_id %s to status %s" % (otdb_id, task_status))
        result = self.radb.updateTaskStatusForOtdbId(otdb_id=otdb_id, status=task_status)

        if not result or 'updated' not in result or not result['updated']:
            logger.warning("could not update task with otdb_id %s to status %s" % (otdb_id, task_status))

    def onObservationPrepared(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'prepared')

    def onObservationApproved(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'approved')

    def onObservationOnHold(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'on_hold')

    def onObservationConflict(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'conflict')

    def onObservationPrescheduled(self, treeId, modificationTime):
        logger.info("not propagating prescheduled status for otdb_id %s to radb because the resource assigner takes care of this" % (treeId))

    def onObservationScheduled(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'scheduled')

    def onObservationQueued(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'queued')

    def onObservationStarted(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'active')

    def onObservationCompleting(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'completing')

    def onObservationFinished(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'finished')

    def onObservationAborted(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'aborted')
Ejemplo n.º 3
0
class OTDBtoRATaskStatusPropagator(OTDBBusListener):
    def __init__(self,
                 otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME,
                 otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT,
                 radb_busname=DEFAULT_RADB_BUSNAME,
                 radb_servicename=DEFAULT_RADB_SERVICENAME,
                 broker=None,
                 **kwargs):
        super(OTDBtoRATaskStatusPropagator,
              self).__init__(busname=otdb_notification_busname,
                             subject=otdb_notification_subject,
                             broker=broker,
                             **kwargs)

        self.radb = RARPC(busname=radb_busname,
                          servicename=radb_servicename,
                          broker=broker)

    def start_listening(self, **kwargs):
        self.radb.open()
        super(OTDBtoRATaskStatusPropagator, self).start_listening(**kwargs)

    def stop_listening(self, **kwargs):
        self.radb.close()
        super(OTDBtoRATaskStatusPropagator, self).stop_listening(**kwargs)

    def _update_radb_task_status(self, otdb_id, task_status):
        logger.info("updating radb-task with otdb_id %s to status %s" %
                    (otdb_id, task_status))
        result = self.radb.updateTaskStatusForOtdbId(otdb_id=otdb_id,
                                                     status=task_status)

        if not result or 'updated' not in result or not result['updated']:
            logger.warning(
                "could not update task with otdb_id %s to status %s" %
                (otdb_id, task_status))

    def onObservationPrepared(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'prepared')

    def onObservationApproved(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'approved')

    def onObservationOnHold(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'on_hold')

    def onObservationConflict(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'conflict')

    def onObservationPrescheduled(self, treeId, modificationTime):
        logger.info(
            "not propagating prescheduled status for otdb_id %s to radb because the resource assigner takes care of this"
            % (treeId))

    def onObservationScheduled(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'scheduled')

    def onObservationQueued(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'queued')

    def onObservationStarted(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'active')

    def onObservationCompleting(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'completing')

    def onObservationFinished(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'finished')

    def onObservationAborted(self, treeId, modificationTime):
        self._update_radb_task_status(treeId, 'aborted')
Ejemplo n.º 4
0
class ResourceAssigner():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 re_busname=RE_BUSNAME,
                 re_servicename=RE_SERVICENAME,
                 ssdb_busname=DEFAULT_SSDB_BUSNAME,
                 ssdb_servicename=DEFAULT_SSDB_SERVICENAME,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 broker=None):
        """
        ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command)
        :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation)
        :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system)
        :param ssdb_servicename: servicename of the radb service (default: SSDBService)
        :param broker: Valid Qpid broker host (default: None, which means localhost)
        """
        self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker)
        self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True)
        self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename, busname=ssdb_busname, broker=broker)
        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.rerpc.open()
        self.otdbrpc.open()
        self.ssdbrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.rerpc.close()
        self.otdbrpc.close()
        self.ssdbrpc.close()

    def doAssignment(self, specification_tree):
        logger.info('doAssignment: specification_tree=%s' % (specification_tree))

        otdb_id = specification_tree['otdb_id']
        taskType = specification_tree.get('task_type', '').lower()
        status = specification_tree.get('state', '').lower()

        if status not in ['approved', 'prescheduled']: # cep2 accepts both, cep4 only prescheduled, see below
            logger.info('skipping specification for otdb_id=%s because status=%s', (otdb_id, status))

        #parse main parset...
        mainParset = parameterset(specification_tree['specification'])

        momId = mainParset.getInt('Observation.momID', -1)
        try:
            startTime = datetime.strptime(mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S')
            endTime = datetime.strptime(mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S')
        except ValueError:
            logger.warning('cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, ))

        # insert new task and specification in the radb
        # any existing specification and task with same otdb_id will be deleted automatically
        logger.info('doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' %
                    (momId, otdb_id, status, taskType, startTime, endTime))
        result = self.radbrpc.insertSpecificationAndTask(momId, otdb_id, status, taskType, startTime, endTime, str(mainParset))

        if not result['inserted']:
            logger.error('could not insert specification and task')
            return

        specificationId = result['specification_id']
        taskId = result['task_id']
        logger.info('doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId,taskId))

        # do not assign resources to task for other clusters than cep4
        if not self.checkClusterIsCEP4(mainParset):
            return

        if status != 'prescheduled':
            logger.info('skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status))
            return

        needed = self.getNeededResouces(specification_tree)
        logger.info('doAssignment: getNeededResouces=%s' % (needed,))

        if not str(otdb_id) in needed:
            logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed))
            return

        if not taskType in needed[str(otdb_id)]:
            logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)]))
            return

        # make sure the availability in the radb is up to date
        # TODO: this should be updated regularly
        try:
            self.updateAvailableResources('cep4')
        except Exception as e:
            logger.warning("Exception while updating available resources: %s" % str(e))

        # claim the resources for this task
        # during the claim inserts the claims are automatically validated
        # and if not enough resources are available, then they are put to conflict status
        # also, if any claim is in conflict state, then the task is put to conflict status as well
        main_needed = needed[str(otdb_id)]
        task = self.radbrpc.getTask(taskId)
        claimed, claim_ids = self.claimResources(main_needed, task)
        if claimed:
            conflictingClaims = self.radbrpc.getResourceClaims(task_ids=taskId, status='conflict')

            if conflictingClaims:
                logger.warning('doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' %
                               (len(conflictingClaims), conflictingClaims))
            else:
                logger.info('doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId,))
                self.radbrpc.updateTaskAndResourceClaims(taskId, task_status='scheduled', claim_status='allocated')

        self.processPredecessors(specification_tree)

    def processPredecessors(self, specification_tree):
        try:
            predecessor_trees = specification_tree['predecessors']

            if predecessor_trees:
                otdb_id = specification_tree['otdb_id']
                task = self.radbrpc.getTask(otdb_id=otdb_id)

                for predecessor_tree in predecessor_trees:
                    pred_otdb_id = predecessor_tree['otdb_id']
                    predecessor_task = self.radbrpc.getTask(otdb_id=pred_otdb_id)
                    if predecessor_task:
                        self.radbrpc.insertTaskPredecessor(task['id'], predecessor_task['id'])
                    self.processPredecessors(predecessor_tree)

        except Exception as e:
            logger.error(e)

    def checkClusterIsCEP4(self, parset):
        # check storageClusterName for enabled DataProducts
        # if any storageClusterName is not CEP4, we do not accept this parset
        keys = ['Output_Correlated',
                'Output_IncoherentStokes',
                'Output_CoherentStokes',
                'Output_InstrumentModel',
                'Output_SkyImage',
                'Output_Pulsar']
        for key in keys:
            if parset.getBool('Observation.DataProducts.%s.enabled' % key, False):
                if parset.getString('Observation.DataProducts.%s.storageClusterName' % key, '') != 'CEP4':
                    logger.warn("storageClusterName not CEP4, rejecting specification.")
                    return False

        logger.info("all enabled storageClusterName's are CEP4, accepting specification.")
        return True


    def getNeededResouces(self, specification_tree):
        replymessage, status = self.rerpc({"specification_tree":specification_tree}, timeout=10)
        logger.info('getNeededResouces: %s' % replymessage)
        return replymessage

    def updateAvailableResources(self, cluster):
        # find out which resources are available
        # and what is their capacity
        # For now, only look at CEP4 storage
        # Later, also look at stations up/down for short term scheduling

        #get all active groupnames, find id for cluster group
        groupnames = self.ssdbrpc.getactivegroupnames()
        cluster_group_id = next(k for k,v in groupnames.items() if v == cluster)

        # for CEP4 cluster, do hard codes lookup of first and only node
        node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0]

        storage_resources = self.radbrpc.getResources(resource_types='storage', include_availability=True)
        cep4_storage_resource = next(x for x in storage_resources if 'cep4' in x['name'])
        active = node_info['statename'] == 'Active'
        total_capacity = node_info['totalspace']
        available_capacity = total_capacity - node_info['usedspace']

        logger.info("Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s" %
                    (cep4_storage_resource['name'], cep4_storage_resource['id'], active, available_capacity, total_capacity))

        self.radbrpc.updateResourceAvailability(cep4_storage_resource['id'],
                                                active=active,
                                                available_capacity=available_capacity,
                                                total_capacity=total_capacity)


    def claimResources(self, needed_resources, task):
        logger.info('claimResources: task %s needed_resources=%s' % (task, needed_resources))

        # get the needed resources for the task type
        needed_resources_for_task_type = needed_resources[task['type']]

        # get db lists
        rc_property_types = {rcpt['name']:rcpt['id'] for rcpt in self.radbrpc.getResourceClaimPropertyTypes()}
        resource_types = {rt['name']:rt['id'] for rt in self.radbrpc.getResourceTypes()}
        resources = self.radbrpc.getResources()

        # loop over needed_resources -> resource_type -> claim (and props)
        # flatten the tree dict to a list of claims (with props)
        claims = []
        for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items():
            if resource_type_name in resource_types:
                logger.info('claimResources: processing resource_type: %s' % resource_type_name)
                db_resource_type_id = resource_types[resource_type_name]
                db_resources_for_type = [r for r in resources if r['type_id'] == db_resource_type_id]

                # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int
                # that value is the value for the claim
                needed_claim_value = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, int)))

                # FIXME: right now we just pick the first resource from the 'cep4' resources.
                # estimator will deliver this info in the future
                db_cep4_resources_for_type = [r for r in db_resources_for_type if 'cep4' in r['name'].lower()]

                if db_cep4_resources_for_type:
                    claim = {'resource_id':db_cep4_resources_for_type[0]['id'],
                            'starttime':task['starttime'],
                            'endtime':task['endtime'],
                            'status':'claimed',
                            'claim_size':needed_claim_value}

                    #FIXME: find proper way to extend storage time with a month
                    if 'storage' in db_cep4_resources_for_type[0]['name']:
                        claim['endtime'] += timedelta(days=31)

                    # if the needed_claim_for_resource_type dict contains more kvp's,
                    # then the subdict contains groups of properties for the claim
                    if len(needed_claim_for_resource_type) > 1:
                        claim['properties'] = []
                        needed_prop_groups = next((v for k,v in needed_claim_for_resource_type.items() if isinstance(v, collections.Iterable)))

                        def processProperties(propertiesDict, sap_nr=None):
                            for prop_type_name, prop_value in propertiesDict.items():
                                if prop_type_name in rc_property_types:
                                    rc_property_type_id = rc_property_types[prop_type_name]
                                    property = {'type':rc_property_type_id, 'value':prop_value}
                                    if sap_nr is not None:
                                        property['sap_nr'] = sap_nr
                                    claim['properties'].append(property)
                                else:
                                    logger.error('claimResources: unknown prop_type:%s' % prop_type_name)

                        for group_name, needed_prop_group in needed_prop_groups.items():
                            if group_name == 'saps':
                                for sap_dict in needed_prop_group:
                                    processProperties(sap_dict['properties'], sap_dict['sap_nr'])
                            else:
                                processProperties(needed_prop_group)

                    logger.info('claimResources: created claim:%s' % claim)
                    claims.append(claim)
            else:
                logger.error('claimResources: unknown resource_type:%s' % resource_type_name)

        logger.info('claimResources: inserting %d claims in the radb' % len(claims))
        claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1, 'anonymous', -1)['ids']
        logger.info('claimResources: %d claims were inserted in the radb' % len(claim_ids))
        return len(claim_ids) == len(claims), claim_ids
Ejemplo n.º 5
0
class RAtoOTDBPropagator():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 radb_broker=None,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 mom_busname=DEFAULT_MOMQUERY_BUSNAME,
                 mom_servicename=DEFAULT_MOMQUERY_SERVICENAME,
                 otdb_broker=None,
                 mom_broker=None,
                 broker=None):
        """
        RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param radb_broker: valid Qpid broker host (default: None, which means localhost)
        :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command)
        :param otdb_servicename: servicename of the OTDB service (default: OTDBService)
        :param otdb_broker: valid Qpid broker host (default: None, which means localhost)
        :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost)
        """
        if broker:
            radb_broker = broker
            otdb_broker = broker
            mom_broker  = broker

        self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker)
        self.translator = RAtoOTDBTranslator()

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.otdbrpc.open()
        self.momrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.otdbrpc.close()
        self.momrpc.close()

    def doTaskConflict(self, otdb_id):
        logger.info('doTaskConflict: otdb_id=%s' % (otdb_id,))
        if not otdb_id:
            logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id,))
            return
        try:
            self.otdbrpc.taskSetStatus(otdb_id, 'conflict')
        except Exception as e:
            logger.error(e)

    def doTaskScheduled(self, ra_id, otdb_id, mom_id):
        try:
            logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id))
            if not otdb_id:
                logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id,))
                return
            ra_info = self.getRAinfo(ra_id)

            logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info))

            # check if this is a CEP4 task, or an old CEP2 task
            # at this moment the most simple check is to see if RA claimed (CEP4) storage
            # TODO: do proper check on cluster/storage/etc
            if not ra_info['storage']:
                logger.info("No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id))
                return

            #get mom project name
            try:
                project = self.momrpc.getProjectDetails(mom_id)
                logger.info(project)
                project_name = "_".join(project[str(mom_id)]['project_name'].split())
            except (RPCException, KeyError) as e:
                logger.error('Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e)))
                logger.info('Using \'unknown\' as project name.')
                project_name = 'unknown'

            otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name)
            logger.debug("Parset info for OTDB: %s" %otdb_info)
            self.setOTDBinfo(otdb_id, otdb_info, 'scheduled')
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)

    def getRAinfo(self, ra_id):
        info = {}
        info["storage"] = {}
        task = self.radbrpc.getTask(ra_id)
        claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True)
        for claim in claims:
            logger.debug("Processing claim: %s" % claim)
            if claim['resource_type_name'] == 'storage':
                info['storage'] = claim
        info["starttime"] = task["starttime"]
        info["endtime"] = task["endtime"]
        info["status"] = task["status"]
        return info

    def setOTDBinfo(self, otdb_id, otdb_info, otdb_status):
        try:
            logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info))
            self.otdbrpc.taskSetSpecification(otdb_id, otdb_info)
            self.otdbrpc.taskPrepareForScheduling(otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"])
            logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id))
            self.otdbrpc.taskSetStatus(otdb_id, otdb_status)
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)
Ejemplo n.º 6
0
class ResourceAssigner():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 re_busname=RE_BUSNAME,
                 re_servicename=RE_SERVICENAME,
                 ssdb_busname=DEFAULT_SSDB_BUSNAME,
                 ssdb_servicename=DEFAULT_SSDB_SERVICENAME,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 broker=None):
        """
        ResourceAssigner inserts/updates tasks in the radb and assigns resources to it based on incoming parset.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param re_busname: busname on which the resource estimator service listens (default: lofar.ra.command)
        :param re_servicename: servicename of the resource estimator service (default: ResourceEstimation)
        :param ssdb_busname: busname on which the ssdb service listens (default: lofar.system)
        :param ssdb_servicename: servicename of the radb service (default: SSDBService)
        :param broker: Valid Qpid broker host (default: None, which means localhost)
        """
        self.radbrpc = RARPC(servicename=radb_servicename,
                             busname=radb_busname,
                             broker=broker)
        self.rerpc = RPC(re_servicename,
                         busname=re_busname,
                         broker=broker,
                         ForwardExceptions=True)
        self.ssdbrpc = SSDBRPC(servicename=ssdb_servicename,
                               busname=ssdb_busname,
                               broker=broker)
        self.otdbrpc = OTDBRPC(
            busname=otdb_busname, servicename=otdb_servicename, broker=broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.rerpc.open()
        self.otdbrpc.open()
        self.ssdbrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.rerpc.close()
        self.otdbrpc.close()
        self.ssdbrpc.close()

    def doAssignment(self, specification_tree):
        logger.info('doAssignment: specification_tree=%s' %
                    (specification_tree))

        otdb_id = specification_tree['otdb_id']
        taskType = specification_tree.get('task_type', '').lower()
        status = specification_tree.get('state', '').lower()

        if status not in [
                'approved', 'prescheduled'
        ]:  # cep2 accepts both, cep4 only prescheduled, see below
            logger.info(
                'skipping specification for otdb_id=%s because status=%s',
                (otdb_id, status))

        #parse main parset...
        mainParset = parameterset(specification_tree['specification'])

        momId = mainParset.getInt('Observation.momID', -1)
        try:
            startTime = datetime.strptime(
                mainParset.getString('Observation.startTime'),
                '%Y-%m-%d %H:%M:%S')
            endTime = datetime.strptime(
                mainParset.getString('Observation.stopTime'),
                '%Y-%m-%d %H:%M:%S')
        except ValueError:
            logger.warning(
                'cannot parse for start/end time from specification for otdb_id=%s',
                (otdb_id, ))

        # insert new task and specification in the radb
        # any existing specification and task with same otdb_id will be deleted automatically
        logger.info(
            'doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s'
            % (momId, otdb_id, status, taskType, startTime, endTime))
        result = self.radbrpc.insertSpecificationAndTask(
            momId, otdb_id, status, taskType, startTime, endTime,
            str(mainParset))

        if not result['inserted']:
            logger.error('could not insert specification and task')
            return

        specificationId = result['specification_id']
        taskId = result['task_id']
        logger.info(
            'doAssignment: inserted specification (id=%s) and task (id=%s)' %
            (specificationId, taskId))

        # do not assign resources to task for other clusters than cep4
        if not self.checkClusterIsCEP4(mainParset):
            return

        if status != 'prescheduled':
            logger.info(
                'skipping resource assignment for CEP4 task otdb_id=%s because status=%s'
                % (otdb_id, status))
            return

        needed = self.getNeededResouces(specification_tree)
        logger.info('doAssignment: getNeededResouces=%s' % (needed, ))

        if not str(otdb_id) in needed:
            logger.error("no otdb_id %s found in estimator results %s" %
                         (otdb_id, needed))
            return

        if not taskType in needed[str(otdb_id)]:
            logger.error("no task type %s found in estimator results %s" %
                         (taskType, needed[str(otdb_id)]))
            return

        # make sure the availability in the radb is up to date
        # TODO: this should be updated regularly
        try:
            self.updateAvailableResources('cep4')
        except Exception as e:
            logger.warning("Exception while updating available resources: %s" %
                           str(e))

        # claim the resources for this task
        # during the claim inserts the claims are automatically validated
        # and if not enough resources are available, then they are put to conflict status
        # also, if any claim is in conflict state, then the task is put to conflict status as well
        main_needed = needed[str(otdb_id)]
        task = self.radbrpc.getTask(taskId)
        claimed, claim_ids = self.claimResources(main_needed, task)
        if claimed:
            conflictingClaims = self.radbrpc.getResourceClaims(
                task_ids=taskId, status='conflict')

            if conflictingClaims:
                logger.warning(
                    'doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s'
                    % (len(conflictingClaims), conflictingClaims))
            else:
                logger.info(
                    'doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled'
                    % (taskId, ))
                self.radbrpc.updateTaskAndResourceClaims(
                    taskId, task_status='scheduled', claim_status='allocated')

        self.processPredecessors(specification_tree)

    def processPredecessors(self, specification_tree):
        try:
            predecessor_trees = specification_tree['predecessors']

            if predecessor_trees:
                otdb_id = specification_tree['otdb_id']
                task = self.radbrpc.getTask(otdb_id=otdb_id)

                for predecessor_tree in predecessor_trees:
                    pred_otdb_id = predecessor_tree['otdb_id']
                    predecessor_task = self.radbrpc.getTask(
                        otdb_id=pred_otdb_id)
                    if predecessor_task:
                        self.radbrpc.insertTaskPredecessor(
                            task['id'], predecessor_task['id'])
                    self.processPredecessors(predecessor_tree)

        except Exception as e:
            logger.error(e)

    def checkClusterIsCEP4(self, parset):
        # check storageClusterName for enabled DataProducts
        # if any storageClusterName is not CEP4, we do not accept this parset
        keys = [
            'Output_Correlated', 'Output_IncoherentStokes',
            'Output_CoherentStokes', 'Output_InstrumentModel',
            'Output_SkyImage', 'Output_Pulsar'
        ]
        for key in keys:
            if parset.getBool('Observation.DataProducts.%s.enabled' % key,
                              False):
                if parset.getString(
                        'Observation.DataProducts.%s.storageClusterName' % key,
                        '') != 'CEP4':
                    logger.warn(
                        "storageClusterName not CEP4, rejecting specification."
                    )
                    return False

        logger.info(
            "all enabled storageClusterName's are CEP4, accepting specification."
        )
        return True

    def getNeededResouces(self, specification_tree):
        replymessage, status = self.rerpc(
            {"specification_tree": specification_tree}, timeout=10)
        logger.info('getNeededResouces: %s' % replymessage)
        return replymessage

    def updateAvailableResources(self, cluster):
        # find out which resources are available
        # and what is their capacity
        # For now, only look at CEP4 storage
        # Later, also look at stations up/down for short term scheduling

        #get all active groupnames, find id for cluster group
        groupnames = self.ssdbrpc.getactivegroupnames()
        cluster_group_id = next(k for k, v in groupnames.items()
                                if v == cluster)

        # for CEP4 cluster, do hard codes lookup of first and only node
        node_info = self.ssdbrpc.gethostsforgid(cluster_group_id)['nodes'][0]

        storage_resources = self.radbrpc.getResources(
            resource_types='storage', include_availability=True)
        cep4_storage_resource = next(x for x in storage_resources
                                     if 'cep4' in x['name'])
        active = node_info['statename'] == 'Active'
        total_capacity = node_info['totalspace']
        available_capacity = total_capacity - node_info['usedspace']

        logger.info(
            "Updating resource availability of %s (id=%s) to active=%s available_capacity=%s total_capacity=%s"
            % (cep4_storage_resource['name'], cep4_storage_resource['id'],
               active, available_capacity, total_capacity))

        self.radbrpc.updateResourceAvailability(
            cep4_storage_resource['id'],
            active=active,
            available_capacity=available_capacity,
            total_capacity=total_capacity)

    def claimResources(self, needed_resources, task):
        logger.info('claimResources: task %s needed_resources=%s' %
                    (task, needed_resources))

        # get the needed resources for the task type
        needed_resources_for_task_type = needed_resources[task['type']]

        # get db lists
        rc_property_types = {
            rcpt['name']: rcpt['id']
            for rcpt in self.radbrpc.getResourceClaimPropertyTypes()
        }
        resource_types = {
            rt['name']: rt['id']
            for rt in self.radbrpc.getResourceTypes()
        }
        resources = self.radbrpc.getResources()

        # loop over needed_resources -> resource_type -> claim (and props)
        # flatten the tree dict to a list of claims (with props)
        claims = []
        for resource_type_name, needed_claim_for_resource_type in needed_resources_for_task_type.items(
        ):
            if resource_type_name in resource_types:
                logger.info('claimResources: processing resource_type: %s' %
                            resource_type_name)
                db_resource_type_id = resource_types[resource_type_name]
                db_resources_for_type = [
                    r for r in resources if r['type_id'] == db_resource_type_id
                ]

                # needed_claim_for_resource_type is a dict containing exactly one kvp of which the value is an int
                # that value is the value for the claim
                needed_claim_value = next(
                    (v for k, v in needed_claim_for_resource_type.items()
                     if isinstance(v, int)))

                # FIXME: right now we just pick the first resource from the 'cep4' resources.
                # estimator will deliver this info in the future
                db_cep4_resources_for_type = [
                    r for r in db_resources_for_type
                    if 'cep4' in r['name'].lower()
                ]

                if db_cep4_resources_for_type:
                    claim = {
                        'resource_id': db_cep4_resources_for_type[0]['id'],
                        'starttime': task['starttime'],
                        'endtime': task['endtime'],
                        'status': 'claimed',
                        'claim_size': needed_claim_value
                    }

                    #FIXME: find proper way to extend storage time with a month
                    if 'storage' in db_cep4_resources_for_type[0]['name']:
                        claim['endtime'] += timedelta(days=31)

                    # if the needed_claim_for_resource_type dict contains more kvp's,
                    # then the subdict contains groups of properties for the claim
                    if len(needed_claim_for_resource_type) > 1:
                        claim['properties'] = []
                        needed_prop_groups = next((
                            v
                            for k, v in needed_claim_for_resource_type.items()
                            if isinstance(v, collections.Iterable)))

                        def processProperties(propertiesDict, sap_nr=None):
                            for prop_type_name, prop_value in propertiesDict.items(
                            ):
                                if prop_type_name in rc_property_types:
                                    rc_property_type_id = rc_property_types[
                                        prop_type_name]
                                    property = {
                                        'type': rc_property_type_id,
                                        'value': prop_value
                                    }
                                    if sap_nr is not None:
                                        property['sap_nr'] = sap_nr
                                    claim['properties'].append(property)
                                else:
                                    logger.error(
                                        'claimResources: unknown prop_type:%s'
                                        % prop_type_name)

                        for group_name, needed_prop_group in needed_prop_groups.items(
                        ):
                            if group_name == 'saps':
                                for sap_dict in needed_prop_group:
                                    processProperties(sap_dict['properties'],
                                                      sap_dict['sap_nr'])
                            else:
                                processProperties(needed_prop_group)

                    logger.info('claimResources: created claim:%s' % claim)
                    claims.append(claim)
            else:
                logger.error('claimResources: unknown resource_type:%s' %
                             resource_type_name)

        logger.info('claimResources: inserting %d claims in the radb' %
                    len(claims))
        claim_ids = self.radbrpc.insertResourceClaims(task['id'], claims, 1,
                                                      'anonymous', -1)['ids']
        logger.info('claimResources: %d claims were inserted in the radb' %
                    len(claim_ids))
        return len(claim_ids) == len(claims), claim_ids