Example #1
0
class TaskManagementHandler(ServiceMessageHandler):
    def handle_message(self, msg):
        pass

    def __init__(self):
        super(TaskManagementHandler, self).__init__()

        self.radb = RADBRPC()
        self.otdb = OTDBRPC()
        self.obs_ctrl = ObservationControlRPCClient()

    def AbortTask(self, otdb_id):
        """aborts tasks based on otdb id
        :param otdb_id:
        :return: dict with aborted key saying if aborting was succesful and otdb_id key
        """
        if self._is_active_observation(otdb_id):
            aborted = self._abort_active_observation(otdb_id)
        else:
            aborted = self._abort_inactive_task(otdb_id)

        return {"aborted": aborted, "otdb_id": otdb_id}

    def _is_active_observation(self, otdb_id):
        task_type, task_status = self._get_task_type_and_status(otdb_id)

        return task_type == "observation" and (task_status == "running"
                                               or task_status == "queued")

    def _abort_inactive_task(self, otdb_id):
        logger.info("Aborting inactive task: %s", otdb_id)

        try:
            self.otdb.taskSetStatus(otdb_id=otdb_id, new_status="aborted")
            aborted = True
        except OTDBPRCException:
            aborted = False
        return aborted

    def _abort_active_observation(self, otdb_id):
        logger.info("Aborting active task: %s", otdb_id)

        result = self.obs_ctrl.abort_observation(otdb_id)
        aborted = result["aborted"] is True
        return aborted

    def _get_task_type_and_status(self, otdb_id):
        task = self.radb.getTask(otdb_id)
        task_type = task["type"]
        task_status = task['status']
        return task_type, task_status
Example #2
0
class PipelineControl(OTDBBusListener):
  def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, **kwargs):
    super(PipelineControl, self).__init__(busname=otdb_notification_busname, **kwargs)

    self.otdb_service_busname = otdb_service_busname
    self.otdbrpc = OTDBRPC(busname=otdb_service_busname)
    self.slurm = Slurm()

  def _setStatus(self, obsid, status):
    try:
        self.otdbrpc.taskSetStatus(otdb_id=obsid, new_status=status)
    except RPCTimeoutException, e:
        # We use a queue, so delivery is guaranteed. We don't care about the answer.
        pass
Example #3
0
class PipelineControl(OTDBBusListener):
    def __init__(self,
                 otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME,
                 otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 **kwargs):
        super(PipelineControl,
              self).__init__(busname=otdb_notification_busname, **kwargs)

        self.otdb_service_busname = otdb_service_busname
        self.otdbrpc = OTDBRPC(busname=otdb_service_busname)
        self.slurm = Slurm()

    def _setStatus(self, obsid, status):
        try:
            self.otdbrpc.taskSetStatus(otdb_id=obsid, new_status=status)
        except RPCTimeoutException, e:
            # We use a queue, so delivery is guaranteed. We don't care about the answer.
            pass
Example #4
0
class RAtoOTDBPropagator():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 radb_broker=None,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 mom_busname=DEFAULT_MOMQUERY_BUSNAME,
                 mom_servicename=DEFAULT_MOMQUERY_SERVICENAME,
                 otdb_broker=None,
                 mom_broker=None,
                 broker=None):
        """
        RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param radb_broker: valid Qpid broker host (default: None, which means localhost)
        :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command)
        :param otdb_servicename: servicename of the OTDB service (default: OTDBService)
        :param otdb_broker: valid Qpid broker host (default: None, which means localhost)
        :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost)
        """
        if broker:
            radb_broker = broker
            otdb_broker = broker
            mom_broker = broker

        self.radbrpc = RADBRPC(
            busname=radb_busname,
            servicename=radb_servicename,
            broker=radb_broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.otdbrpc = OTDBRPC(
            busname=otdb_busname,
            servicename=otdb_servicename,
            broker=otdb_broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.momrpc = MoMQueryRPC(busname=mom_busname,
                                  servicename=mom_servicename,
                                  broker=mom_broker)
        self.translator = RAtoOTDBTranslator()

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.otdbrpc.open()
        self.momrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.otdbrpc.close()
        self.momrpc.close()

    def doTaskConflict(self, otdb_id):
        logger.info('doTaskConflict: otdb_id=%s' % (otdb_id, ))
        if not otdb_id:
            logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' %
                           (otdb_id, ))
            return
        try:
            self.otdbrpc.taskSetStatus(otdb_id, 'conflict')
        except Exception as e:
            logger.error(e)

    def doTaskScheduled(self, ra_id, otdb_id, mom_id):
        try:
            logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' %
                        (ra_id, otdb_id, mom_id))
            if not otdb_id:
                logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' %
                               (otdb_id, ))
                return
            ra_info = self.getRAinfo(ra_id)

            logger.info('RA info for ra_id=%s otdb_id=%s: %s' %
                        (ra_id, otdb_id, ra_info))

            # check if this is a CEP4 task, or an old CEP2 task
            # at this moment the most simple check is to see if RA claimed (CEP4) storage
            # TODO: do proper check on cluster/storage/etc
            if not ra_info['storage']:
                logger.info(
                    "No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update."
                    % (ra_id, otdb_id))
                return

            #get mom project name
            try:
                project = self.momrpc.getProjectDetails(mom_id)
                logger.info(project)
                project_name = "_".join(
                    project[str(mom_id)]['project_name'].split())
            except (RPCException, KeyError) as e:
                logger.error(
                    'Could not get project name from MoM for mom_id %s: %s' %
                    (mom_id, str(e)))
                logger.info('Using \'unknown\' as project name.')
                project_name = 'unknown'

            otdb_info = self.translator.CreateParset(otdb_id, ra_info,
                                                     project_name)
            logger.debug("Parset info for OTDB: %s" % otdb_info)
            self.setOTDBinfo(otdb_id, otdb_info, 'scheduled')
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)

    def getRAinfo(self, ra_id):
        info = {}
        info["storage"] = {}
        task = self.radbrpc.getTask(ra_id)
        claims = self.radbrpc.getResourceClaims(task_ids=ra_id,
                                                extended=True,
                                                include_properties=True)
        for claim in claims:
            logger.debug("Processing claim: %s" % claim)
            if claim['resource_type_name'] == 'storage':
                info['storage'] = claim
        info["starttime"] = task["starttime"]
        info["endtime"] = task["endtime"]
        info["status"] = task["status"]
        return info

    def setOTDBinfo(self, otdb_id, otdb_info, otdb_status):
        try:
            logger.info('Setting specticication for otdb_id %s: %s' %
                        (otdb_id, otdb_info))
            self.otdbrpc.taskSetSpecification(otdb_id, otdb_info)
            self.otdbrpc.taskPrepareForScheduling(
                otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"],
                otdb_info["LOFAR.ObsSW.Observation.stopTime"])
            logger.info('Setting status (%s) for otdb_id %s' %
                        (otdb_status, otdb_id))
            self.otdbrpc.taskSetStatus(otdb_id, otdb_status)
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)
Example #5
0
class RAtoOTDBPropagator():
    def __init__(self,
                 radb_busname=RADB_BUSNAME,
                 radb_servicename=RADB_SERVICENAME,
                 radb_broker=None,
                 otdb_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                 otdb_servicename=DEFAULT_OTDB_SERVICENAME,
                 mom_busname=DEFAULT_MOMQUERY_BUSNAME,
                 mom_servicename=DEFAULT_MOMQUERY_SERVICENAME,
                 otdb_broker=None,
                 mom_broker=None,
                 broker=None):
        """
        RAtoOTDBPropagator updates tasks in the OTDB after the ResourceAssigner is done with them.
        :param radb_busname: busname on which the radb service listens (default: lofar.ra.command)
        :param radb_servicename: servicename of the radb service (default: RADBService)
        :param radb_broker: valid Qpid broker host (default: None, which means localhost)
        :param otdb_busname: busname on which the OTDB service listens (default: lofar.otdb.command)
        :param otdb_servicename: servicename of the OTDB service (default: OTDBService)
        :param otdb_broker: valid Qpid broker host (default: None, which means localhost)
        :param broker: if specified, overrules radb_broker and otdb_broker. Valid Qpid broker host (default: None, which means localhost)
        """
        if broker:
            radb_broker = broker
            otdb_broker = broker
            mom_broker  = broker

        self.radbrpc = RADBRPC(busname=radb_busname, servicename=radb_servicename, broker=radb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=otdb_broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.momrpc = MoMQueryRPC(busname=mom_busname, servicename=mom_servicename, broker=mom_broker)
        self.translator = RAtoOTDBTranslator()

    def __enter__(self):
        """Internal use only. (handles scope 'with')"""
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Internal use only. (handles scope 'with')"""
        self.close()

    def open(self):
        """Open rpc connections to radb service and resource estimator service"""
        self.radbrpc.open()
        self.otdbrpc.open()
        self.momrpc.open()

    def close(self):
        """Close rpc connections to radb service and resource estimator service"""
        self.radbrpc.close()
        self.otdbrpc.close()
        self.momrpc.close()

    def doTaskConflict(self, otdb_id):
        logger.info('doTaskConflict: otdb_id=%s' % (otdb_id,))
        if not otdb_id:
            logger.warning('doTaskConflict no valid otdb_id: otdb_id=%s' % (otdb_id,))
            return
        try:
            self.otdbrpc.taskSetStatus(otdb_id, 'conflict')
        except Exception as e:
            logger.error(e)

    def doTaskScheduled(self, ra_id, otdb_id, mom_id):
        try:
            logger.info('doTaskScheduled: ra_id=%s otdb_id=%s mom_id=%s' % (ra_id, otdb_id, mom_id))
            if not otdb_id:
                logger.warning('doTaskScheduled no valid otdb_id: otdb_id=%s' % (otdb_id,))
                return
            ra_info = self.getRAinfo(ra_id)

            logger.info('RA info for ra_id=%s otdb_id=%s: %s' % (ra_id, otdb_id, ra_info))

            # check if this is a CEP4 task, or an old CEP2 task
            # at this moment the most simple check is to see if RA claimed (CEP4) storage
            # TODO: do proper check on cluster/storage/etc
            if not ra_info['storage']:
                logger.info("No (CEP4) storage claimed for ra_id=%s otdb_id=%s, skipping otdb specification update." % (ra_id, otdb_id))
                return

            #get mom project name
            try:
                project = self.momrpc.getProjectDetails(mom_id)
                logger.info(project)
                project_name = "_".join(project[str(mom_id)]['project_name'].split())
            except (RPCException, KeyError) as e:
                logger.error('Could not get project name from MoM for mom_id %s: %s' % (mom_id, str(e)))
                logger.info('Using \'unknown\' as project name.')
                project_name = 'unknown'

            otdb_info = self.translator.CreateParset(otdb_id, ra_info, project_name)
            logger.debug("Parset info for OTDB: %s" %otdb_info)
            self.setOTDBinfo(otdb_id, otdb_info, 'scheduled')
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)

    def getRAinfo(self, ra_id):
        info = {}
        info["storage"] = {}
        task = self.radbrpc.getTask(ra_id)
        claims = self.radbrpc.getResourceClaims(task_ids=ra_id, extended=True, include_properties=True)
        for claim in claims:
            logger.debug("Processing claim: %s" % claim)
            if claim['resource_type_name'] == 'storage':
                info['storage'] = claim
        info["starttime"] = task["starttime"]
        info["endtime"] = task["endtime"]
        info["status"] = task["status"]
        return info

    def setOTDBinfo(self, otdb_id, otdb_info, otdb_status):
        try:
            logger.info('Setting specticication for otdb_id %s: %s' % (otdb_id, otdb_info))
            self.otdbrpc.taskSetSpecification(otdb_id, otdb_info)
            self.otdbrpc.taskPrepareForScheduling(otdb_id, otdb_info["LOFAR.ObsSW.Observation.startTime"], otdb_info["LOFAR.ObsSW.Observation.stopTime"])
            logger.info('Setting status (%s) for otdb_id %s' % (otdb_status, otdb_id))
            self.otdbrpc.taskSetStatus(otdb_id, otdb_status)
        except Exception as e:
            logger.error(e)
            self.doTaskConflict(otdb_id)