Beispiel #1
0
class RATaskSpecifiedOTDBEventMessageHandler(OTDBEventMessageHandler):
    def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER):
        """
        :param exchange: name of the exchange to listen on.
        :param broker: name of the broker to connect to.
        """
        super().__init__()
        self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker)
        self.radbrpc = RADBRPC.create(exchange=exchange, broker=broker)
        self.momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker)
        self.send_bus = ToBus(exchange=exchange, broker=broker)

    def start_handling(self):
        self.otdbrpc.open()
        self.momrpc.open()
        self.radbrpc.open()
        self.send_bus.open()

    def stop_handling(self):
        self.otdbrpc.close()
        self.momrpc.close()
        self.radbrpc.close()
        self.send_bus.close()

    # This is mainly to trigger the propagation of misc field values through read_from_mom
    # and then sending them to the RA to OTDB Service in the resource assigner.
    # Might need to be a separate service if we take on more mom-otdb-adapter function.
    def onObservationApproved(self, main_id, modification_time):
        self.createAndSendSpecifiedTask(main_id, 'approved')

    def onObservationPrescheduled(self, main_id, modification_time):
        self.createAndSendSpecifiedTask(main_id, 'prescheduled')

    def createAndSendSpecifiedTask(self, main_id, status):
        spec = Specification(self.otdbrpc, self.momrpc, self.radbrpc)
        spec.status = status
        spec.read_from_OTDB_with_predecessors(main_id, "otdb", {})
        spec.read_from_mom()
        spec.update_start_end_times()
        # spec.insert_into_radb() is still done in resource_assigner for now.
        result_tree = spec.as_dict()
        if spec.status == status:
            logger.info("Sending result: %s" % result_tree)
            # Put result on bus
            msg = EventMessage(subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT,
                               content=result_tree)
            with self.send_bus:
                self.send_bus.send(msg)
            logger.info("Result sent")
        else:
            logger.warning("Problem retrieving task %i" % main_id)
Beispiel #2
0
class RATaskSpecified(OTDBBusListener):
    def __init__(self,
                   otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME,
                   otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT,
                   otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
                   otdb_service_subject=DEFAULT_OTDB_SERVICENAME,
                   notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME,
                   notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT,
                   broker=None, **kwargs):
        super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs)
        self.otdbrpc  = OTDBRPC(busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject))

    def start_listening(self, **kwargs):
        self.otdbrpc.open()
        self.send_bus.open()
        super(RATaskSpecified, self).start_listening(**kwargs)

    def stop_listening(self, **kwargs):
        super(RATaskSpecified, self).stop_listening(**kwargs)
        self.send_bus.close()
        self.otdbrpc.close()

    def get_predecessors(self, parset):
        """ Extract the list of predecessor obs IDs from the given parset. """

        key = PARSET_PREFIX + "Observation.Scheduler.predecessors"
        stringlist = PyParameterValue(str(parset[key]), True).getStringVector()

        # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM
        # 'S' we can probably ignore? Might be only internal in the Scheduler?
        result = []
        for s in stringlist:
            try: # Made the source a string for readability, but it's not efficient
                if s.startswith('M'):
                    result.append({'source': 'mom', 'id': int(s[1:])})
                elif s.startswith('L') or s.startswith('T'):
                    result.append({'source': 'otdb', 'id': int(s[1:])})
                else: # 'S'
                    logger.info("found a predecessor ID I can't handle: %s" % s)
                    result.append({'source': 'other', 'id': int(s[1:])})
            except ValueError:
                logger.warning("found a predecessor ID that I can't parse %s" % s)
        return result

    def get_specification_with_predecessors(self, id, id_source, state, found_parsets):
        logger.info("Processing ID %s from %s" % (id, id_source))
        if id_source == "other":
            return None
        elif id_source == "mom":
            otdb_id = self.otdbrpc.taskGetIDs( mom_id=id )['otdb_id']
        elif id_source == "otdb":
            otdb_id = id
        else:
            logger.warning("Error in understanding id %s", id)
      
        logger.info("Processing OTDB ID %s", otdb_id)
        result = {"otdb_id": otdb_id, "predecessors": []} 
        if state:
            result["state"] = state # TODO should be status not state
        else:
            pass #otdbrpc.taskGetStatus not implemented and maybe not needed?
          
        if otdb_id in found_parsets:
            parset = found_parsets[otdb_id]
        else:
            parset = self.otdbrpc.taskGetSpecification( otdb_id=otdb_id )['specification']
            found_parsets[otdb_id] = parset
          
        logger.info("parset [%s]: %s" % (otdb_id, parset))
        result['specification'] = resourceIndicatorsFromParset(parset)
      
        key = PARSET_PREFIX + "Observation.processSubtype"
        result['task_type'], result['task_subtype'] = convertSchedulerProcessSubtype(parset.get(key, ""))

        logger.info("Processing predecessors")
        predecessor_ids = self.get_predecessors(parset)
        for id in predecessor_ids:
            predecessor_result = self.get_specification_with_predecessors(id['id'], id['source'], "", found_parsets)
            if predecessor_result:
                result["predecessors"].append(predecessor_result)
        return result

    def onObservationApproved(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'approved')

    def onObservationPrescheduled(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'prescheduled')

    def createAndSendSpecifiedTask(self, main_id, status):
        # Construct root node of tree
        resultTree = self.get_specification_with_predecessors(main_id, "otdb", status, {})
        logger.info("Sending result: %s" % resultTree)

        # Put result on bus
        msg = EventMessage(content=resultTree)
        self.send_bus.send(msg)
        logger.info("Result sent")
Beispiel #3
0
class RADBPGListener(PostgresListener):
    def __init__(self,
                 busname=DEFAULT_NOTIFICATION_BUSNAME,
                 notification_prefix=DEFAULT_NOTIFICATION_PREFIX,
                 dbcreds=None,
                 broker=None):
        super(RADBPGListener, self).__init__(dbcreds.host, dbcreds.database,
                                             dbcreds.user, dbcreds.password)

        self.notification_prefix = notification_prefix
        self.event_bus = ToBus(busname, broker=broker)

        self.subscribe('task_update_with_task_view', self.onTaskUpdated)
        self.subscribe('task_insert_with_task_view', self.onTaskInserted)
        self.subscribe('task_delete', self.onTaskDeleted)

        # when the specification starttime and endtime are updated, then that effects the task as well
        # so subscribe to specification_update, and use task_view as view_for_row
        self.subscribe('specification_update_with_task_view',
                       self.onSpecificationUpdated)

        self.subscribe('resource_claim_update_with_resource_claim_view',
                       self.onResourceClaimUpdated)
        self.subscribe('resource_claim_insert_with_resource_claim_view',
                       self.onResourceClaimInserted)
        self.subscribe('resource_claim_delete', self.onResourceClaimDeleted)

        self.subscribe('resource_availability_update',
                       self.onResourceAvailabilityUpdated)
        self.subscribe('resource_capacity_update',
                       self.onResourceCapacityUpdated)

    def onTaskUpdated(self, payload=None):
        self._sendNotification('TaskUpdated', payload,
                               ['starttime', 'endtime'])

    def onTaskInserted(self, payload=None):
        self._sendNotification('TaskInserted', payload,
                               ['starttime', 'endtime'])

    def onTaskDeleted(self, payload=None):
        self._sendNotification('TaskDeleted', payload)

    def onSpecificationUpdated(self, payload=None):
        # when the specification starttime and endtime are updated, then that effects the task as well
        # so send a TaskUpdated notification
        self._sendNotification('TaskUpdated', payload,
                               ['starttime', 'endtime'])

    def onResourceClaimUpdated(self, payload=None):
        self._sendNotification('ResourceClaimUpdated', payload,
                               ['starttime', 'endtime'])

    def onResourceClaimInserted(self, payload=None):
        self._sendNotification('ResourceClaimInserted', payload,
                               ['starttime', 'endtime'])

    def onResourceClaimDeleted(self, payload=None):
        self._sendNotification('ResourceClaimDeleted', payload)

    def onResourceAvailabilityUpdated(self, payload=None):
        self._sendNotification('ResourceAvailabilityUpdated', payload)

    def onResourceCapacityUpdated(self, payload=None):
        self._sendNotification('ResourceCapacityUpdated', payload)

    def __enter__(self):
        super(RADBPGListener, self).__enter__()
        self.event_bus.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        super(RADBPGListener, self).__exit__(exc_type, exc_val, exc_tb)
        self.event_bus.close()

    def _formatTimestampsAsIso(self, fields, contentDict):
        '''convert all requested fields in the contentDict to proper isoformat datetime strings.
        In postgres we use timestamps without timezone.
        By convention we only enter utc values.
        But, if they are json encoded by postgress, they are not properly formatted with the in isoformat with a 'Z' at the end.
        So, parse the requested fields, and return them as datetime.
        '''
        try:
            for state in ('old', 'new'):
                if state in contentDict:
                    for field in fields:
                        try:
                            if contentDict[state] and field in contentDict[
                                    state]:
                                timestampStr = contentDict[state][field]
                                formatStr = '%Y-%m-%dT%H:%M:%S' if 'T' in timestampStr else '%Y-%m-%d %H:%M:%S'
                                if timestampStr.rfind('.') > -1:
                                    formatStr += '.%f'

                                timestamp = datetime.strptime(
                                    timestampStr, formatStr)

                                contentDict[state][field] = timestamp
                        except Exception as e:
                            logger.error(
                                'Could not convert field \'%s\' to datetime: %s'
                                % (field, e))

            return contentDict
        except Exception as e:
            logger.error('Could not parse payload: %s\n%s' % (contentDict, e))

    def _sendNotification(self, subject, payload, timestampFields=None):
        try:
            content = json.loads(payload)

            if 'new' in content and content[
                    'new'] and 'old' in content and content['old']:
                # check if new and old are equal.
                # however, new and old can be based on different views,
                # so, only check the values for the keys they have in common
                new_keys = set(content['new'].keys())
                old_keys = set(content['old'].keys())
                common_keys = new_keys & old_keys
                equal_valued_keys = [
                    k for k in common_keys
                    if content['new'][k] == content['old'][k]
                ]
                if len(equal_valued_keys) == len(common_keys):
                    logger.info(
                        'new and old values are equal, not sending notification. %s'
                        % (content['new']))
                    return

            if timestampFields:
                content = self._formatTimestampsAsIso(timestampFields, content)
        except Exception as e:
            logger.error('Could not parse payload: %s\n%s' % (payload, e))
            content = None

        try:
            msg = EventMessage(context=self.notification_prefix + subject,
                               content=content)
            logger.info('Sending notification %s: %s' %
                        (subject, str(content).replace('\n', ' ')))
            self.event_bus.send(msg)
        except Exception as e:
            logger.error(str(e))
Beispiel #4
0
class RATaskSpecified(OTDBBusListener):
    def __init__(
            self,
            otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME,
            otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT,
            otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME,
            otdb_service_subject=DEFAULT_OTDB_SERVICENAME,
            notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME,
            notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT,
            broker=None,
            **kwargs):
        super(RATaskSpecified,
              self).__init__(busname=otdb_notification_busname,
                             subject=otdb_notification_subject,
                             **kwargs)
        self.otdbrpc = OTDBRPC(
            busname=otdb_service_busname,
            servicename=otdb_service_subject,
            broker=broker
        )  ## , ForwardExceptions=True hardcoded in RPCWrapper right now
        self.send_bus = ToBus("%s/%s" %
                              (notification_busname, notification_subject))

    def start_listening(self, **kwargs):
        self.otdbrpc.open()
        self.send_bus.open()
        super(RATaskSpecified, self).start_listening(**kwargs)

    def stop_listening(self, **kwargs):
        super(RATaskSpecified, self).stop_listening(**kwargs)
        self.send_bus.close()
        self.otdbrpc.close()

    def get_predecessors(self, parset):
        """ Extract the list of predecessor obs IDs from the given parset. """

        key = PARSET_PREFIX + "Observation.Scheduler.predecessors"
        stringlist = PyParameterValue(str(parset[key]), True).getStringVector()

        # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM
        # 'S' we can probably ignore? Might be only internal in the Scheduler?
        result = []
        for s in stringlist:
            try:  # Made the source a string for readability, but it's not efficient
                if s.startswith('M'):
                    result.append({'source': 'mom', 'id': int(s[1:])})
                elif s.startswith('L') or s.startswith('T'):
                    result.append({'source': 'otdb', 'id': int(s[1:])})
                else:  # 'S'
                    logger.info("found a predecessor ID I can't handle: %s" %
                                s)
                    result.append({'source': 'other', 'id': int(s[1:])})
            except ValueError:
                logger.warning("found a predecessor ID that I can't parse %s" %
                               s)
        return result

    def get_specification_with_predecessors(self, id, id_source, state,
                                            found_parsets):
        logger.info("Processing ID %s from %s" % (id, id_source))
        if id_source == "other":
            return None
        elif id_source == "mom":
            otdb_id = self.otdbrpc.taskGetIDs(mom_id=id)['otdb_id']
        elif id_source == "otdb":
            otdb_id = id
        else:
            logger.warning("Error in understanding id %s", id)

        logger.info("Processing OTDB ID %s", otdb_id)
        result = {"otdb_id": otdb_id, "predecessors": []}
        if state:
            result["state"] = state  # TODO should be status not state
        else:
            pass  #otdbrpc.taskGetStatus not implemented and maybe not needed?

        if otdb_id in found_parsets:
            parset = found_parsets[otdb_id]
        else:
            parset = self.otdbrpc.taskGetSpecification(
                otdb_id=otdb_id)['specification']
            found_parsets[otdb_id] = parset

        logger.info("parset [%s]: %s" % (otdb_id, parset))
        result['specification'] = resourceIndicatorsFromParset(parset)

        key = PARSET_PREFIX + "Observation.processSubtype"
        result['task_type'], result[
            'task_subtype'] = convertSchedulerProcessSubtype(
                parset.get(key, ""))

        logger.info("Processing predecessors")
        predecessor_ids = self.get_predecessors(parset)
        for id in predecessor_ids:
            predecessor_result = self.get_specification_with_predecessors(
                id['id'], id['source'], "", found_parsets)
            if predecessor_result:
                result["predecessors"].append(predecessor_result)
        return result

    def onObservationApproved(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'approved')

    def onObservationPrescheduled(self, main_id, modificationTime):
        self.createAndSendSpecifiedTask(main_id, 'prescheduled')

    def createAndSendSpecifiedTask(self, main_id, status):
        # Construct root node of tree
        resultTree = self.get_specification_with_predecessors(
            main_id, "otdb", status, {})
        logger.info("Sending result: %s" % resultTree)

        # Put result on bus
        msg = EventMessage(content=resultTree)
        self.send_bus.send(msg)
        logger.info("Result sent")
Beispiel #5
0
class TestPipelineControl(unittest.TestCase):
  def setUp(self):
    # Create a random bus
    self.busname = "%s-%s" % (sys.argv[0], str(uuid.uuid4())[:8])
    self.bus = ToBus(self.busname, { "create": "always", "delete": "always", "node": { "type": "topic" } })
    self.bus.open()
    self.addCleanup(self.bus.close)

    # Patch SLURM
    class MockSlurm(object):
      def __init__(self, *args, **kwargs):
        self.scheduled_jobs = {}

      def submit(self, jobName, *args, **kwargs):
        print "Schedule SLURM job '%s': %s, %s" % (jobName, args, kwargs)

        self.scheduled_jobs[jobName] = (args, kwargs)

        # Return job ID
        return "42"

      def jobid(self, jobname):
        if jobname in ["1", "2", "3"]:
          return jobname

        # "4" is an observation, so no SLURM job
        return None

    patcher = patch('lofar.mac.PipelineControl.Slurm')
    patcher.start().side_effect = MockSlurm
    self.addCleanup(patcher.stop)

    # Catch functions to prevent running executables
    patcher = patch('lofar.mac.PipelineControl.Parset.dockerImage')
    patcher.start().return_value = "lofar-pipeline:trunk"
    self.addCleanup(patcher.stop)

    # ================================
    # Setup mock otdb service
    # ================================

    class MockOTDBService(MessageHandlerInterface):
      def __init__(self, notification_bus):
        """
          notification_bus: bus to send state changes to
        """
        super(MockOTDBService, self).__init__()

        self.service2MethodMap = {
          "TaskGetSpecification": self.TaskGetSpecification,
          "TaskSetStatus": self.TaskSetStatus,
        }

        self.notification_bus = notification_bus

      def TaskGetSpecification(self, OtdbID):
        print "***** TaskGetSpecification(%s) *****" % (OtdbID,)

        if OtdbID == 1:
          predecessors = "[2,3,4]"
        elif OtdbID == 2:
          predecessors = "[3]"
        elif OtdbID == 3:
          predecessors = "[]"
        elif OtdbID == 4:
          return { "TaskSpecification": {
            "Version.number":                                                           "1",
            PARSET_PREFIX + "Observation.otdbID":                                str(OtdbID),
            PARSET_PREFIX + "Observation.Scheduler.predecessors":                "[]",
            PARSET_PREFIX + "Observation.processType":                           "Observation",
            PARSET_PREFIX + "Observation.Cluster.ProcessingCluster.clusterName": "CEP4",
            PARSET_PREFIX + "Observation.stopTime":                              "2016-01-01 01:00:00",
          } }
        else:
          raise Exception("Invalid OtdbID: %s" % OtdbID)

        return { "TaskSpecification": {
          "Version.number":                                                           "1",
          PARSET_PREFIX + "Observation.otdbID":                                str(OtdbID),
          PARSET_PREFIX + "Observation.Scheduler.predecessors":                predecessors,
          PARSET_PREFIX + "Observation.processType":                           "Pipeline",
          PARSET_PREFIX + "Observation.Cluster.ProcessingCluster.clusterName": "CEP4",
        } }

      def TaskSetStatus(self, OtdbID, NewStatus, UpdateTimestamps):
        print "***** TaskSetStatus(%s,%s) *****" % (OtdbID, NewStatus)

        # Broadcast the state change
        content = { "treeID" : OtdbID, "state" : NewStatus, "time_of_change" : datetime.datetime.utcnow() }
        msg = EventMessage(context=DEFAULT_OTDB_NOTIFICATION_SUBJECT, content=content)
        self.notification_bus.send(msg)

        return {'OtdbID':OtdbID, 'MomID':None, 'Success':True}

    service = Service(DEFAULT_OTDB_SERVICENAME,
                      MockOTDBService,
                      busname=self.busname,
                      use_service_methods=True,
                      handler_args={ "notification_bus": self.bus })
    service.start_listening()
    self.addCleanup(service.stop_listening)

    # ================================
    # Setup listener to catch result
    # of our service
    # ================================

    listener = OTDBBusListener(busname=self.busname)
    listener.start_listening()
    self.addCleanup(listener.stop_listening)

    self.trigger = MethodTrigger(listener, "onObservationQueued")

  def test_setStatus(self):
    with PipelineControl(otdb_notification_busname=self.busname, otdb_service_busname=self.busname) as pipelineControl:
      pipelineControl._setStatus(12345, "queued")

      # Wait for the status to propagate
      self.assertTrue(self.trigger.wait())
      self.assertEqual(self.trigger.args[0], 12345)

  def testNoPredecessors(self):
    """
      Request to start a simulated obsid 3, with the following predecessor tree:

        3 requires nothing
    """
    with PipelineControl(otdb_notification_busname=self.busname, otdb_service_busname=self.busname) as pipelineControl:
      # Send fake status update
      pipelineControl._setStatus(3, "scheduled")

      # Wait for message to arrive
      self.assertTrue(self.trigger.wait())

      # Verify message
      self.assertEqual(self.trigger.args[0], 3) # otdbId

      # Check if job was scheduled
      self.assertIn("3", pipelineControl.slurm.scheduled_jobs)
      self.assertIn("3-aborted", pipelineControl.slurm.scheduled_jobs)

  def testPredecessors(self):
    """
      Request to start a simulated obsid 1, with the following predecessor tree:

        1 requires 2, 3, 4
        2 requires 3
        4 is an observation
    """
    with PipelineControl(otdb_notification_busname=self.busname, otdb_service_busname=self.busname) as pipelineControl:
      # Send fake status update
      pipelineControl._setStatus(1, "scheduled")

      # Wait for message to arrive
      self.assertTrue(self.trigger.wait())

      # Verify message
      self.assertEqual(self.trigger.args[0], 1) # otdbId

      # Check if job was scheduled
      self.assertIn("1", pipelineControl.slurm.scheduled_jobs)
      self.assertIn("1-aborted", pipelineControl.slurm.scheduled_jobs)

      # Earliest start of this job > stop time of observation
      for p in pipelineControl.slurm.scheduled_jobs["1"][1]["sbatch_params"]:
        if p.startswith("--begin="):
          begin = datetime.datetime.strptime(p, "--begin=%Y-%m-%dT%H:%M:%S")
          self.assertGreater(begin, datetime.datetime(2016, 1, 1, 1, 0, 0))
          break
      else:
        self.assertTrue(False, "--begin parameter not given to SLURM job")
Beispiel #6
0
class RATaskSpecified(OTDBBusListener):
  def __init__(self, servicename, otdb_busname=None, my_busname=None, **kwargs):
    super(RATaskSpecified, self).__init__(busname=otdb_busname, subject="TaskStatus", **kwargs)

    self.parset_rpc = RPC(service="TaskSpecification", busname=otdb_busname)
    self.send_bus   = ToBus("%s/%s" % (my_busname, servicename))

  def start_listening(self, **kwargs):
    self.parset_rpc.open()
    self.send_bus.open()

    super(RATaskSpecified, self).start_listening(**kwargs)

  def stop_listening(self, **kwargs):
    super(RATaskSpecified, self).stop_listening(**kwargs)

    self.send_bus.close()
    self.parset_rpc.close()

  def onObservationPrescheduled(self, treeId, modificationTime):
    logger.info("Processing obs ID %s", treeId)

    # Request the parset
    main_obsID  = treeId
    main_parset,_ = self.parset_rpc( OtdbID=main_obsID )

    # Construct a dict of all the parsets we retrieved
    parsets = {}
    parsets[main_obsID] = main_parset

    logger.info("Processing predecessors")

    # Collect the initial set of predecessors
    request_obsIDs = set(predecessors(main_parset))

    logger.info("Processing %s", request_obsIDs)

    # Iterate recursively over all known predecessor obsIDs, and request their parsets
    while request_obsIDs:
        obsID = request_obsIDs.pop()

        if obsID in parsets:
            # Predecessor lists can overlap -- we already have this one
            continue

        logger.info("Fetching predecessor %s", obsID)

        # Request predecessor parset
        parsets[obsID],_ = self.parset_rpc( OtdbID=obsID )

        # Add the list of predecessors
        request_obsIDs = request_obsIDs.union(predecessors(parsets[obsID]))

    # Convert parsets to resource indicators
    logger.info("Extracting resource indicators")
    resourceIndicators = dict([(str(obsID), resourceIndicatorsFromParset(parset)) for (obsID,parset) in parsets.iteritems()])

    # Construct and send result message
    logger.info("Sending result")
    result = {
      "sasID": main_obsID,
      "state": "prescheduled",
      "time_of_change": modificationTime,
      "resource_indicators": resourceIndicators,
    }

    # Put result on bus
    msg = EventMessage(content=result)
    self.send_bus.send(msg)

    logger.info("Result sent")
Beispiel #7
0
class RATaskSpecified(OTDBBusListener):
    def __init__(self,
                 servicename,
                 otdb_busname=None,
                 my_busname=None,
                 **kwargs):
        super(RATaskSpecified, self).__init__(busname=otdb_busname,
                                              subject="TaskStatus",
                                              **kwargs)

        self.parset_rpc = RPC(service="TaskSpecification",
                              busname=otdb_busname)
        self.send_bus = ToBus("%s/%s" % (my_busname, servicename))

    def start_listening(self, **kwargs):
        self.parset_rpc.open()
        self.send_bus.open()

        super(RATaskSpecified, self).start_listening(**kwargs)

    def stop_listening(self, **kwargs):
        super(RATaskSpecified, self).stop_listening(**kwargs)

        self.send_bus.close()
        self.parset_rpc.close()

    def onObservationPrescheduled(self, treeId, modificationTime):
        logger.info("Processing obs ID %s", treeId)

        # Request the parset
        main_obsID = treeId
        main_parset, _ = self.parset_rpc(OtdbID=main_obsID)

        # Construct a dict of all the parsets we retrieved
        parsets = {}
        parsets[main_obsID] = main_parset

        logger.info("Processing predecessors")

        # Collect the initial set of predecessors
        request_obsIDs = set(predecessors(main_parset))

        logger.info("Processing %s", request_obsIDs)

        # Iterate recursively over all known predecessor obsIDs, and request their parsets
        while request_obsIDs:
            obsID = request_obsIDs.pop()

            if obsID in parsets:
                # Predecessor lists can overlap -- we already have this one
                continue

            logger.info("Fetching predecessor %s", obsID)

            # Request predecessor parset
            parsets[obsID], _ = self.parset_rpc(OtdbID=obsID)

            # Add the list of predecessors
            request_obsIDs = request_obsIDs.union(predecessors(parsets[obsID]))

        # Convert parsets to resource indicators
        logger.info("Extracting resource indicators")
        resourceIndicators = dict([(str(obsID),
                                    resourceIndicatorsFromParset(parset))
                                   for (obsID, parset) in parsets.iteritems()])

        # Construct and send result message
        logger.info("Sending result")
        result = {
            "sasID": main_obsID,
            "state": "prescheduled",
            "time_of_change": modificationTime,
            "resource_indicators": resourceIndicators,
        }

        # Put result on bus
        msg = EventMessage(content=result)
        self.send_bus.send(msg)

        logger.info("Result sent")
Beispiel #8
0
class CacheManager:
    def __init__(self,
                 cache_path='.du_cache.py',
                 mountpoint=CEP4_DATA_MOUNTPOINT,
                 exchange=DEFAULT_BUSNAME,
                 broker=DEFAULT_BROKER):

        self._cache_path = cache_path

        self.otdb_listener = OTDBBusListener(
            _CacheManagerOTDBEventMessageHandler,
            handler_kwargs={'cache_manager': self},
            exchange=exchange,
            broker=broker,
            num_threads=1)

        self.dm_listener = DataManagementBusListener(
            _CacheManagerDataManagementEventMessageHandler,
            handler_kwargs={'cache_manager': self},
            exchange=exchange,
            broker=broker,
            num_threads=1)

        self.event_bus = ToBus(exchange=exchange, broker=broker)

        self._updateCacheThread = None
        self._running = False

        self._cacheLock = RLock()

        self._cache = {'path_du_results': {}, 'otdb_id2path': {}}
        self._last_cache_write_timestamp = datetime.datetime(1970, 1, 1)
        self._readCacheFromDisk()

        # dict to hold threading Events per path to prevent expensive multiple du calls for each path
        self._du_threading_events = {}
        self._du_threading_events_lock = RLock()

        self.disk_usage = DiskUsage(mountpoint=mountpoint,
                                    exchange=exchange,
                                    broker=broker)

    def _sendDiskUsageChangedNotification(self,
                                          path,
                                          disk_usage,
                                          otdb_id=None):
        try:
            msg = EventMessage(subject='%s.DiskUsageChanged' %
                               DEFAULT_DM_NOTIFICATION_PREFIX,
                               content={
                                   'path':
                                   path,
                                   'disk_usage':
                                   disk_usage,
                                   'disk_usage_readable':
                                   humanreadablesize(disk_usage),
                                   'otdb_id':
                                   otdb_id
                               })
            logger.info('Sending notification with subject %s to %s: %s',
                        msg.subject, self.event_bus.exchange, msg.content)
            self.event_bus.send(msg)
        except Exception as e:
            logger.error(str(e))

    def _readCacheFromDisk(self):
        # maybe this cache on disk is slow, if so, revert to proper db solution
        try:
            if os.path.exists(self._cache_path):
                with open(self._cache_path, 'r') as file:
                    cache_from_disk = eval(file.read().strip())  #slow!
                    with self._cacheLock:
                        self._cache = cache_from_disk
                        if not isinstance(self._cache, dict):
                            self._cache = {
                                'path_du_results': {},
                                'otdb_id2path': {}
                            }
                        if 'path_du_results' not in self._cache:
                            self._cache['path_du_results'] = {}
                        if 'otdb_id2path' not in self._cache:
                            self._cache['otdb_id2path'] = {}
        except Exception as e:
            logger.error("Error while reading in du cache: %s", e)
            with self._cacheLock:
                self._cache = {'path_du_results': {}, 'otdb_id2path': {}}

    def _writeCacheToDisk(self):
        try:
            # only persist (a subset of) the cache to disk every once in a while.
            if datetime.datetime.utcnow(
            ) - self._last_cache_write_timestamp > datetime.timedelta(
                    minutes=5):
                cache_str = ''
                with self._cacheLock:
                    # Take a subset of the entire cache
                    # only the path_du_results for paths at project level (like /data/projects, /data/projects/LC9_001)
                    # Do not store path_du_results for deeper levels on disk, because that makes the disk read/write too slow,
                    # and the deeper levels can be obtained via rhb-du calls quite fast anyway.
                    # Furthermore, once a deeper level du results is stored in the memory cache, then it is also available for fast lookup.
                    # We just don't store these deep levels on disk.
                    sub_cache = {
                        path: du_result
                        for path, du_result in list(
                            self._cache['path_du_results'].items())
                        if self.getDepthToProjectsDir(path) <= 1
                        and du_result.get('found')
                    }
                    cache_str = str(sub_cache)

                tmp_path = '/tmp/tmp_storagequery_cache.py'
                with open(tmp_path, 'w') as file:
                    file.write(cache_str)
                dir_path = os.path.dirname(self._cache_path)
                if dir_path:
                    os.makedirs(dir_path, exist_ok=True)
                shutil.move(tmp_path, self._cache_path)
                self._last_cache_write_timestamp = datetime.datetime.utcnow()
        except Exception as e:
            logger.error("Error while writing du cache: %s", e)

    def _updateCache(self, du_result, send_notification=True):
        if not 'path' in du_result:
            return

        path = du_result['path']
        otdb_id = du_result.get('otdb_id')

        with self._cacheLock:
            path_cache = self._cache['path_du_results']
            otdb_id2path_cache = self._cache['otdb_id2path']

            if otdb_id is None:
                # try to look up the otdb in the path cache
                if path in path_cache:
                    otdb_id = du_result.get('otdb_id')

                # if still None, try to get the id from the path
                if otdb_id is None:
                    otdb_id = getOTDBIdFromPath(path)

            if not path in path_cache or path_cache[path][
                    'disk_usage'] != du_result['disk_usage']:
                # update the cache entry, even when no du result found,
                # cause that will save disk queries next time.
                logger.info('updating cache entry: %s', du_result)
                path_cache[path] = du_result

            if otdb_id != None:
                otdb_id2path_cache[otdb_id] = path

            if not du_result['found']:
                # even when the du for the path is not found,
                # keep a copy in the cache for fast lookup by clients
                # Make sure the size is 0
                du_result['disk_usage'] = 0
                du_result['disk_usage_readable'] = humanreadablesize(0)

            path_cache[path]['cache_timestamp'] = datetime.datetime.utcnow()
            path_cache[path]['needs_update'] = False

        self._writeCacheToDisk()

        if send_notification:
            self._sendDiskUsageChangedNotification(path,
                                                   du_result['disk_usage'],
                                                   otdb_id)

    def _invalidateCacheEntryForPath(self, path):
        with self._cacheLock:
            path_cache = self._cache['path_du_results']
            if path in path_cache:
                path_cache[path]['needs_update'] = True

    def getOtdbIdsFoundOnDisk(self):
        with self._cacheLock:
            otdb_id2path_cache = self._cache['otdb_id2path']
            return sorted(list(otdb_id2path_cache.keys()))

    def getDiskUsagesForAllOtdbIds(self, force_update=False):
        otdb_ids = self.getOtdbIdsFoundOnDisk()

        result = {}
        for otdb_id in otdb_ids:
            result[otdb_id] = self.getDiskUsageForOTDBId(
                otdb_id, force_update=force_update)

        return result

    def getDepthToProjectsDir(self, path):
        return len(
            path.replace(self.disk_usage.path_resolver.projects_path,
                         '').strip('/').split('/'))

    def _scanProjectsTree(self):
        try:

            def addSubDirectoriesToCache(directory):
                if not self._running:
                    return

                depth = self.getDepthToProjectsDir(directory)
                MAX_SCAN_DEPTH = 2
                #depth=0 : projects
                #depth=1 : projects/<project>
                #depth=2 : projects/<project>/<obs>
                #depth=3 : projects/<project>/<obs>/<sub_dir>
                if depth > MAX_SCAN_DEPTH:
                    return

                if depth < MAX_SCAN_DEPTH:
                    logger.info('tree scan: scanning \'%s\'', directory)
                    sd_result = self.disk_usage.path_resolver.getSubDirectories(
                        directory)

                    if sd_result['found']:
                        subdir_paths = [
                            os.path.join(directory, sd)
                            for sd in sd_result['sub_directories']
                        ]

                        for subdir_path in subdir_paths:
                            # recurse
                            addSubDirectoriesToCache(subdir_path)

                with self._cacheLock:
                    path_cache = self._cache['path_du_results']
                    add_empty_du_result_to_cache = not directory in path_cache

                if add_empty_du_result_to_cache:
                    logger.info(
                        'tree scan: adding \'%s\' with empty disk_usage to cache which will be du\'ed later',
                        directory)
                    empty_du_result = {
                        'found': True,
                        'disk_usage': None,
                        'path': directory,
                        'name': directory.split('/')[-1]
                    }
                    self._updateCache(empty_du_result, send_notification=False)

                with self._cacheLock:
                    path_cache = self._cache['path_du_results']
                    if directory in path_cache:
                        # mark cache entry for directory to be updated
                        path_cache[directory]['needs_update'] = True

            addSubDirectoriesToCache(
                self.disk_usage.path_resolver.projects_path)
            logger.info('tree scan complete')

        except Exception as e:
            logger.exception(str(e))

    def _updateOldEntriesInCache(self):
        logger.info('starting updating old cache entries')
        while self._running:
            try:
                now = datetime.datetime.utcnow()
                with self._cacheLock:
                    path_cache = self._cache['path_du_results']
                    old_entries = [
                        cache_entry
                        for cache_entry in list(path_cache.values()) if now -
                        cache_entry['cache_timestamp'] > MAX_CACHE_ENTRY_AGE
                    ]
                    needs_update_entries = [
                        cache_entry
                        for cache_entry in list(path_cache.values())
                        if cache_entry.get('needs_update', False)
                    ]

                updateable_entries = old_entries + needs_update_entries

                logger.info(
                    '%s old cache entries need to be updated, #age:%s #needs_update:%s',
                    len(updateable_entries), len(old_entries),
                    len(needs_update_entries))

                if updateable_entries:
                    # sort them oldest to newest, 'needs_update' paths first
                    def compareFunc(entry1, entry2):
                        if entry1.get('needs_update'
                                      ) and not entry2.get('needs_update'):
                            return -1
                        if not entry1.get('needs_update') and entry2.get(
                                'needs_update'):
                            return 1

                        #depth1 = self.getDepthToProjectsDir(entry1['path'])
                        #depth2 = self.getDepthToProjectsDir(entry2['path'])

                        #if depth1 != depth2:
                        ## lower level dirs are sorted in front of higher level dirs
                        #return depth1 - depth2

                        if entry1['cache_timestamp'] < entry2[
                                'cache_timestamp']:
                            return -1
                        if entry1['cache_timestamp'] > entry2[
                                'cache_timestamp']:
                            return 1
                        return 0

                    updateable_entries = sorted(updateable_entries,
                                                key=cmp_to_key(compareFunc))

                    cacheUpdateStart = datetime.datetime.utcnow()

                    for i, cache_entry in enumerate(updateable_entries):
                        try:
                            # it might be that the cache_entry was already updated via another way
                            # so only update it if still to old or needs_update
                            now = datetime.datetime.utcnow()
                            if now - cache_entry[
                                    'cache_timestamp'] > MAX_CACHE_ENTRY_AGE or cache_entry.get(
                                        'needs_update', False):
                                path = cache_entry.get('path')
                                if path:
                                    logger.info(
                                        '_updateOldEntriesInCache: examining entry %s/%s. timestamp:%s age:%s needs_update:%s path: \'%s\'',
                                        i, len(updateable_entries),
                                        cache_entry['cache_timestamp'],
                                        format_timedelta(
                                            now -
                                            cache_entry['cache_timestamp']),
                                        cache_entry.get('needs_update',
                                                        False), path)

                                    #du a full update from disk, which might be (really) slow.
                                    result = du_getDiskUsageForPath(path)
                                    logger.debug(
                                        'trying to update old entry in cache: %s',
                                        result)
                                    self._updateCache(result)
                        except Exception as e:
                            logger.error(str(e))

                        if not self._running:
                            logger.info('exiting _updateCacheThread')
                            return

                        if datetime.datetime.utcnow(
                        ) - cacheUpdateStart > datetime.timedelta(minutes=10):
                            # break out of cache update loop if full update takes more than 1min
                            # next loop we'll start with the oldest cache entries again
                            logger.info(
                                'skipping remaining %s old cache entries updates, they will be updated next time',
                                len(updateable_entries) - i)
                            break

                #update the CEP4 capacities in the RADB once in a while...
                self._updateCEP4CapacitiesInRADB()

                #sleep for a while, (or stop if requested)
                for i in range(60):
                    sleep(1)
                    if not self._running:
                        logger.info('exiting _updateCacheThread')
                        return

            except Exception as e:
                logger.exception(str(e))

    def _updateCEP4CapacitiesInRADB(self):
        try:
            df_result = self.disk_usage.getDiskFreeSpace()
            if df_result['found']:
                #get the total used space, and update the resource availability in the radb
                radbrpc = self.disk_usage.path_resolver.radbrpc
                storage_resources = radbrpc.getResources(
                    resource_types='storage', include_availability=True)
                cep4_storage_resource = next(x for x in storage_resources
                                             if 'CEP4' in x['name'])

                total_capacity = df_result.get('disk_size')
                used_capacity = df_result.get('disk_usage')
                available_capacity = df_result.get('disk_free')

                logger.info(
                    'updating capacities for resource \'%s\' (id=%s) in the RADB: total=%s, used=%s, available=%s',
                    cep4_storage_resource['name'], cep4_storage_resource['id'],
                    humanreadablesize(total_capacity),
                    humanreadablesize(used_capacity),
                    humanreadablesize(available_capacity))

                radbrpc.updateResourceAvailability(
                    cep4_storage_resource['id'],
                    available_capacity=available_capacity,
                    total_capacity=total_capacity)
        except Exception as e:
            logger.error('_updateCEP4CapacitiesInRADB: %s', e)

    def open(self):
        logger.info("opening storagequeryservice cache...")
        self._running = True

        self.disk_usage.open()
        self.event_bus.open()

        self._scanProjectsTree()

        self._updateCacheThread = Thread(target=self._updateOldEntriesInCache)
        self._updateCacheThread.daemon = True
        self._updateCacheThread.start()

        self.otdb_listener.start_listening()
        self.dm_listener.start_listening()
        logger.info("opened storagequeryservice cache")

    def close(self):
        logger.info("closing storagequeryservice cache...")
        self._running = False

        self.otdb_listener.stop_listening()
        self.dm_listener.stop_listening()
        self._updateCacheThread.join()

        self.event_bus.close()
        self.disk_usage.close()
        logger.info("closed storagequeryservice cache")

    def __enter__(self):
        self.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def onObservationFinished(self, otdb_id, modificationTime):
        self._onDiskActivityForOTDBId(otdb_id)

    def onObservationAborted(self, otdb_id, modificationTime):
        self._onDiskActivityForOTDBId(otdb_id)

    def onTaskDeleted(self, otdb_id, deleted, paths, message=''):
        self._onDiskActivityForOTDBId(otdb_id)

        with self._cacheLock:
            if deleted and otdb_id != None and otdb_id in self._cache[
                    'otdb_id2path']:
                del self._cache['otdb_id2path'][otdb_id]

    def _onDiskActivityForOTDBId(self, otdb_id):
        result = self.disk_usage.getDiskUsageForOTDBId(otdb_id)
        self._updateCache(result)

        task_path = result.get('path')
        projects_path = self.disk_usage.path_resolver.projects_path

        # update all paths up the tree up to the projects_path
        # update the resource availability in the radb as well
        path = task_path
        while path:
            parent_path = '/'.join(path.split('/')[:-1])

            if projects_path.startswith(
                    parent_path) and len(parent_path) < len(projects_path):
                break

            logger.info(
                'invalidating cache entry for %s because disk usage for task %s in %s changed',
                parent_path, otdb_id, task_path)

            self._invalidateCacheEntryForPath(parent_path)

            path = parent_path

    def getDiskUsageForOTDBId(self,
                              otdb_id,
                              include_scratch_paths=True,
                              force_update=False):
        return self.getDiskUsageForTask(
            otdb_id=otdb_id,
            include_scratch_paths=include_scratch_paths,
            force_update=force_update)

    def getDiskUsageForMoMId(self,
                             mom_id,
                             include_scratch_paths=True,
                             force_update=False):
        return self.getDiskUsageForTask(
            mom_id=mom_id,
            include_scratch_paths=include_scratch_paths,
            force_update=force_update)

    def getDiskUsageForRADBId(self,
                              radb_id,
                              include_scratch_paths=True,
                              force_update=False):
        return self.getDiskUsageForTask(
            radb_id=radb_id,
            include_scratch_paths=include_scratch_paths,
            force_update=force_update)

    def getDiskUsageForTask(self,
                            radb_id=None,
                            mom_id=None,
                            otdb_id=None,
                            include_scratch_paths=True,
                            force_update=False):
        logger.info(
            "cache.getDiskUsageForTask(radb_id=%s, mom_id=%s, otdb_id=%s, include_scratch_paths=%s, force_update=%s)",
            radb_id, mom_id, otdb_id, include_scratch_paths, force_update)

        if otdb_id != None and not include_scratch_paths:
            with self._cacheLock:
                path = self._cache['otdb_id2path'].get(otdb_id)

            if path:
                logger.info('Using path from cache for otdb_id %s %s', otdb_id,
                            path)
                return self.getDiskUsageForPath(path,
                                                force_update=force_update)

        logger.info(
            "cache.getDiskUsageForTask could not find path in cache, determining path..."
        )

        path_result = self.disk_usage.path_resolver.getPathForTask(
            radb_id=radb_id,
            mom_id=mom_id,
            otdb_id=otdb_id,
            include_scratch_paths=include_scratch_paths)

        if path_result['found']:
            task_path = path_result['path']
            scratch_paths = path_result.get('scratch_paths', [])

            # get all du's in parallel over all paths
            paths = [task_path] + scratch_paths
            paths_du_result = self.getDiskUsageForPaths(
                paths, force_update=force_update)

            # split into project and subdir
            path_du_result = paths_du_result.pop(task_path)
            scratch_du_result = paths_du_result

            task_du_result = dict(path_du_result)

            # yield id's for if available, or None
            for id in ['radb_id', 'otdb_id', 'mom_id']:
                task_du_result[id] = path_result.get(id)

            if scratch_du_result:
                task_du_result['scratch_paths'] = scratch_du_result

            return task_du_result

        # still no path(s) found for otdb_id, now try from cache and ignore possible scratch paths
        if otdb_id != None:
            with self._cacheLock:
                path = self._cache['otdb_id2path'].get(otdb_id)

            if path:
                logger.info(
                    'Using path from cache for otdb_id %s %s (ignoring possible scratch/share paths)',
                    otdb_id, path)
                return self.getDiskUsageForPath(path,
                                                force_update=force_update)

        return {'found': False, 'path': path_result['path']}

    def getDiskUsageForTasks(self,
                             radb_ids=None,
                             mom_ids=None,
                             otdb_ids=None,
                             include_scratch_paths=True,
                             force_update=False):
        logger.info(
            "cache.getDiskUsageForTasks(radb_ids=%s, mom_ids=%s, otdb_ids=%s)"
            % (radb_ids, mom_ids, otdb_ids))
        tasks_result = {'radb_ids': {}, 'mom_ids': {}, 'otdb_ids': {}}
        if radb_ids is None:
            radb_ids = []
        if mom_ids is None:
            mom_ids = []
        if otdb_ids is None:
            otdb_ids = []

        with futures.ThreadPoolExecutor(
                max_workers=os.cpu_count()) as executor:

            # helper function to expand parallel p_kwarg dict into kwargs
            def parallel_getDiskUsageForTask(p_kwarg):
                return self.getDiskUsageForTask(**p_kwarg)

            parallel_kwargs = [{
                'radb_id': radb_id,
                'include_scratch_paths': include_scratch_paths,
                'force_update': force_update
            } for radb_id in radb_ids]
            parallel_kwargs += [{
                'mom_id': mom_id,
                'include_scratch_paths': include_scratch_paths,
                'force_update': force_update
            } for mom_id in mom_ids]
            parallel_kwargs += [{
                'otdb_id': otdb_id,
                'include_scratch_paths': include_scratch_paths,
                'force_update': force_update
            } for otdb_id in otdb_ids]
            results = list(
                executor.map(parallel_getDiskUsageForTask, parallel_kwargs))

            # collect results in a dict grouped by id_type
            for result in results:
                if result.get('radb_id') in radb_ids:
                    tasks_result['radb_ids'][result['radb_id']] = result
                if result.get('mom_id') in mom_ids:
                    tasks_result['mom_ids'][result['mom_id']] = result
                if result.get('otdb_id') in otdb_ids:
                    tasks_result['otdb_ids'][result['otdb_id']] = result

        logger.info(
            "cache.getDiskUsageForTasks(radb_ids=%s, mom_ids=%s, otdb_ids=%s) returning: %s"
            % (radb_ids, mom_ids, otdb_ids, tasks_result))

        return tasks_result

    def getDiskUsageForPaths(self, paths, force_update=False):
        with futures.ThreadPoolExecutor(
                max_workers=os.cpu_count()) as executor:

            # helper function to expand parallel p_arg tuple into list of args
            def parallel_getDiskUsageForPath(p_arg):
                return self.getDiskUsageForPath(*p_arg)

            parallel_args = [(path, force_update) for path in paths]
            results = list(
                executor.map(parallel_getDiskUsageForPath, parallel_args))

            return {result['path']: result for result in results}

    def getDiskUsageForPath(self, path, force_update=False):
        logger.info("cache.getDiskUsageForPath('%s', force_update=%s)", path,
                    force_update)
        needs_cache_update = False
        if not force_update:
            with self._cacheLock:
                needs_cache_update |= path not in self._cache[
                    'path_du_results']

        if needs_cache_update or force_update:
            logger.info("cache update needed for %s", path)

            # check if some other thread is already doing a du call for this path...
            with self._du_threading_events_lock:
                path_threading_event = self._du_threading_events.get(path)
                need_to_do_du_call = path_threading_event is None

                if need_to_do_du_call:
                    # no other thread is currently du'ing/updating this path
                    # so create a threading Event and store it in the dict,
                    # so other threads can wait for this event.
                    logger.info("updating the cache for %s current_thread=%s",
                                path,
                                current_thread().name)
                    path_threading_event = Event()
                    self._du_threading_events[path] = path_threading_event

            if need_to_do_du_call:
                # no other thread is currently du'ing/updating this path
                # so we need to do it here.
                result = du_getDiskUsageForPath(path)
                self._updateCache(result)

                # signal threads waiting for this same path du call
                # and do bookkeeping
                with self._du_threading_events_lock:
                    logger.info(
                        "signaling other threads that the cache was updated for %s current_thread=%s",
                        path,
                        current_thread().name)
                    path_threading_event.set()
                    del self._du_threading_events[path]
            else:
                logger.info(
                    "waiting for du call on other thread that will update the cache for %s current_thread=%s",
                    path,
                    current_thread().name)
                path_threading_event.wait()
                logger.info(
                    "another thread just updated the cache for %s current_thread=%s",
                    path,
                    current_thread().name)

        with self._cacheLock:
            if path in self._cache['path_du_results']:
                result = self._cache['path_du_results'][path]
            else:
                result = {
                    'found': False,
                    'path': path,
                    'message': 'unknown error'
                }
                if not self.disk_usage.path_resolver.pathExists(path):
                    result['message'] = 'No such path: %s' % path

        result['disk_usage_readable'] = humanreadablesize(
            result.get('disk_usage', 0))
        logger.info('cache.getDiskUsageForPath(\'%s\') result: %s', path,
                    result)
        return result

    def getDiskUsageForTaskAndSubDirectories(self,
                                             radb_id=None,
                                             mom_id=None,
                                             otdb_id=None,
                                             force_update=False):
        logger.info(
            "cache.getDiskUsageForTaskAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s)"
            % (radb_id, mom_id, otdb_id))
        task_du_result = self.getDiskUsageForTask(radb_id=radb_id,
                                                  mom_id=mom_id,
                                                  otdb_id=otdb_id,
                                                  force_update=force_update)
        if task_du_result['found']:
            task_sd_result = self.disk_usage.path_resolver.getSubDirectories(
                task_du_result['path'])

            if task_sd_result['found']:
                subdir_paths = [
                    os.path.join(task_du_result['path'], sd)
                    for sd in task_sd_result['sub_directories']
                ]
                subdirs_du_result = self.getDiskUsageForPaths(
                    subdir_paths, force_update=force_update)

                result = {
                    'found': True,
                    'task_directory': task_du_result,
                    'sub_directories': subdirs_du_result
                }
                logger.info(
                    "result for cache.getDiskUsageForTaskAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s): %s",
                    radb_id, mom_id, otdb_id, result)
                return result

        logger.warning(
            "result for cache.getDiskUsageForTaskAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s): %s",
            radb_id, mom_id, otdb_id, task_du_result)
        return task_du_result

    def getDiskUsageForProjectDirAndSubDirectories(self,
                                                   radb_id=None,
                                                   mom_id=None,
                                                   otdb_id=None,
                                                   project_name=None,
                                                   force_update=False):
        logger.info(
            "cache.getDiskUsageForProjectDirAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s)"
            % (radb_id, mom_id, otdb_id))
        path_result = self.disk_usage.path_resolver.getProjectDirAndSubDirectories(
            radb_id=radb_id,
            mom_id=mom_id,
            otdb_id=otdb_id,
            project_name=project_name)
        if path_result['found']:
            projectdir_path = path_result['path']
            subdir_paths = [
                os.path.join(path_result['path'], sd)
                for sd in path_result['sub_directories']
            ]

            # get all du's in parallel over all paths
            paths = [projectdir_path] + subdir_paths
            paths_du_result = self.getDiskUsageForPaths(
                paths, force_update=force_update)

            # split into project and subdir
            projectdir_du_result = paths_du_result.pop(projectdir_path)
            subdirs_du_result = paths_du_result

            # create total result dict
            result = {
                'found': True,
                'projectdir': projectdir_du_result,
                'sub_directories': subdirs_du_result
            }
            logger.info(
                'cache.getDiskUsageForProjectDirAndSubDirectories result: %s' %
                result)
            return result

        return path_result

    def getDiskUsageForProjectsDirAndSubDirectories(self, force_update=False):
        logger.info("cache.getDiskUsageForProjectsDirAndSubDirectories")
        projects_path = self.disk_usage.path_resolver.projects_path
        project_subdirs_result = self.disk_usage.path_resolver.getSubDirectories(
            projects_path)
        subdir_paths = [
            os.path.join(projects_path, sd)
            for sd in project_subdirs_result['sub_directories']
        ] if project_subdirs_result['found'] else []

        # get all du's in parallel over all paths
        paths = [projects_path] + subdir_paths
        paths_du_result = self.getDiskUsageForPaths(paths,
                                                    force_update=force_update)

        # split into project and subdir
        projectsdir_du_result = paths_du_result.pop(projects_path)
        subdirs_du_result = paths_du_result

        # create total result dict
        result = {
            'found': True,
            'projectdir': projectsdir_du_result,
            'sub_directories': subdirs_du_result
        }

        logger.info(
            'cache.getDiskUsageForProjectsDirAndSubDirectories result: %s' %
            result)
        return result
Beispiel #9
0
class TestPipelineControl(unittest.TestCase):
    def setUp(self):
        # Create a random bus
        self.busname = "%s-%s" % (sys.argv[0], str(uuid.uuid4())[:8])
        self.bus = ToBus(self.busname, {
            "create": "always",
            "delete": "always",
            "node": {
                "type": "topic"
            }
        })
        self.bus.open()
        self.addCleanup(self.bus.close)

        # Patch SLURM
        class MockSlurm(object):
            def __init__(self, *args, **kwargs):
                self.scheduled_jobs = {}

            def submit(self, jobName, *args, **kwargs):
                print "Schedule SLURM job '%s': %s, %s" % (jobName, args,
                                                           kwargs)

                self.scheduled_jobs[jobName] = (args, kwargs)

                # Return job ID
                return "42"

            def jobid(self, jobname):
                if jobname in ["1", "2", "3"]:
                    return jobname

                # "4" is an observation, so no SLURM job
                return None

        patcher = patch('lofar.mac.PipelineControl.Slurm')
        patcher.start().side_effect = MockSlurm
        self.addCleanup(patcher.stop)

        # Catch functions to prevent running executables
        patcher = patch('lofar.mac.PipelineControl.Parset.dockerImage')
        patcher.start().return_value = "lofar-pipeline:trunk"
        self.addCleanup(patcher.stop)

        # ================================
        # Setup mock otdb service
        # ================================

        class MockOTDBService(MessageHandlerInterface):
            def __init__(self, notification_bus):
                """
          notification_bus: bus to send state changes to
        """
                super(MockOTDBService, self).__init__()

                self.service2MethodMap = {
                    "TaskGetSpecification": self.TaskGetSpecification,
                    "TaskSetStatus": self.TaskSetStatus,
                }

                self.notification_bus = notification_bus

            def TaskGetSpecification(self, OtdbID):
                print "***** TaskGetSpecification(%s) *****" % (OtdbID, )

                if OtdbID == 1:
                    predecessors = "[2,3,4]"
                elif OtdbID == 2:
                    predecessors = "[3]"
                elif OtdbID == 3:
                    predecessors = "[]"
                elif OtdbID == 4:
                    return {
                        "TaskSpecification": {
                            "Version.number":
                            "1",
                            PARSET_PREFIX + "Observation.otdbID":
                            str(OtdbID),
                            PARSET_PREFIX + "Observation.Scheduler.predecessors":
                            "[]",
                            PARSET_PREFIX + "Observation.processType":
                            "Observation",
                            PARSET_PREFIX + "Observation.Cluster.ProcessingCluster.clusterName":
                            "CEP4",
                            PARSET_PREFIX + "Observation.stopTime":
                            "2016-01-01 01:00:00",
                        }
                    }
                else:
                    raise Exception("Invalid OtdbID: %s" % OtdbID)

                return {
                    "TaskSpecification": {
                        "Version.number":
                        "1",
                        PARSET_PREFIX + "Observation.otdbID":
                        str(OtdbID),
                        PARSET_PREFIX + "Observation.Scheduler.predecessors":
                        predecessors,
                        PARSET_PREFIX + "Observation.processType":
                        "Pipeline",
                        PARSET_PREFIX + "Observation.Cluster.ProcessingCluster.clusterName":
                        "CEP4",
                    }
                }

            def TaskSetStatus(self, OtdbID, NewStatus, UpdateTimestamps):
                print "***** TaskSetStatus(%s,%s) *****" % (OtdbID, NewStatus)

                # Broadcast the state change
                content = {
                    "treeID": OtdbID,
                    "state": NewStatus,
                    "time_of_change": datetime.datetime.utcnow()
                }
                msg = EventMessage(context=DEFAULT_OTDB_NOTIFICATION_SUBJECT,
                                   content=content)
                self.notification_bus.send(msg)

                return {'OtdbID': OtdbID, 'MomID': None, 'Success': True}

        service = Service(DEFAULT_OTDB_SERVICENAME,
                          MockOTDBService,
                          busname=self.busname,
                          use_service_methods=True,
                          handler_args={"notification_bus": self.bus})
        service.start_listening()
        self.addCleanup(service.stop_listening)

        # ================================
        # Setup listener to catch result
        # of our service
        # ================================

        listener = OTDBBusListener(busname=self.busname)
        listener.start_listening()
        self.addCleanup(listener.stop_listening)

        self.trigger = MethodTrigger(listener, "onObservationQueued")

    def test_setStatus(self):
        with PipelineControl(
                otdb_notification_busname=self.busname,
                otdb_service_busname=self.busname) as pipelineControl:
            pipelineControl._setStatus(12345, "queued")

            # Wait for the status to propagate
            self.assertTrue(self.trigger.wait())
            self.assertEqual(self.trigger.args[0], 12345)

    def testNoPredecessors(self):
        """
      Request to start a simulated obsid 3, with the following predecessor tree:

        3 requires nothing
    """
        with PipelineControl(
                otdb_notification_busname=self.busname,
                otdb_service_busname=self.busname) as pipelineControl:
            # Send fake status update
            pipelineControl._setStatus(3, "scheduled")

            # Wait for message to arrive
            self.assertTrue(self.trigger.wait())

            # Verify message
            self.assertEqual(self.trigger.args[0], 3)  # otdbId

            # Check if job was scheduled
            self.assertIn("3", pipelineControl.slurm.scheduled_jobs)
            self.assertIn("3-aborted", pipelineControl.slurm.scheduled_jobs)

    def testPredecessors(self):
        """
      Request to start a simulated obsid 1, with the following predecessor tree:

        1 requires 2, 3, 4
        2 requires 3
        4 is an observation
    """
        with PipelineControl(
                otdb_notification_busname=self.busname,
                otdb_service_busname=self.busname) as pipelineControl:
            # Send fake status update
            pipelineControl._setStatus(1, "scheduled")

            # Wait for message to arrive
            self.assertTrue(self.trigger.wait())

            # Verify message
            self.assertEqual(self.trigger.args[0], 1)  # otdbId

            # Check if job was scheduled
            self.assertIn("1", pipelineControl.slurm.scheduled_jobs)
            self.assertIn("1-aborted", pipelineControl.slurm.scheduled_jobs)

            # Earliest start of this job > stop time of observation
            for p in pipelineControl.slurm.scheduled_jobs["1"][1][
                    "sbatch_params"]:
                if p.startswith("--begin="):
                    begin = datetime.datetime.strptime(
                        p, "--begin=%Y-%m-%dT%H:%M:%S")
                    self.assertGreater(begin,
                                       datetime.datetime(2016, 1, 1, 1, 0, 0))
                    break
            else:
                self.assertTrue(False,
                                "--begin parameter not given to SLURM job")
Beispiel #10
0
class RADBPGListener(PostgresListener):
    def __init__(self,
                 exchange=DEFAULT_BUSNAME,
                 dbcreds=None,
                 broker=DEFAULT_BROKER):
        super(RADBPGListener, self).__init__(dbcreds=dbcreds)

        self.event_bus = ToBus(exchange=exchange, broker=broker)

        self.radb = RADatabase(dbcreds=dbcreds)

        self.subscribe('task_update', self.onTaskUpdated)
        self.subscribe('task_insert', self.onTaskInserted)
        self.subscribe('task_delete', self.onTaskDeleted)

        self.subscribe('task_predecessor_insert_column_task_id',
                       self.onTaskPredecessorChanged)
        self.subscribe('task_predecessor_update_column_task_id',
                       self.onTaskPredecessorChanged)
        self.subscribe('task_predecessor_delete_column_task_id',
                       self.onTaskPredecessorChanged)

        self.subscribe('task_predecessor_insert_column_predecessor_id',
                       self.onTaskSuccessorChanged)
        self.subscribe('task_predecessor_update_column_predecessor_id',
                       self.onTaskSuccessorChanged)
        self.subscribe('task_predecessor_delete_column_predecessor_id',
                       self.onTaskSuccessorChanged)

        # when the specification starttime and endtime are updated, then that effects the task as well
        self.subscribe('specification_update', self.onSpecificationUpdated)

        self.subscribe('resource_claim_update', self.onResourceClaimUpdated)
        self.subscribe('resource_claim_insert', self.onResourceClaimInserted)
        self.subscribe('resource_claim_delete', self.onResourceClaimDeleted)

        self.subscribe('resource_availability_update',
                       self.onResourceAvailabilityUpdated)
        self.subscribe('resource_capacity_update',
                       self.onResourceCapacityUpdated)

    def onTaskUpdated(self, payload=None):
        # Send notification for the given updated task
        task_id = payload
        task = self.radb.getTask(task_id)
        self._sendNotification('TaskUpdated', task)

        # The "blocked_by_ids" property of the given task's successors might have been updated due to the given task
        # status being updated. Therefore also send a notification for these successors - lazily ignoring that they
        # might not have changed.
        suc_sched_tasks = self.radb.getTasks(task_ids=task['successor_ids'],
                                             task_status='scheduled')
        for suc_sched_task in suc_sched_tasks:
            self._sendNotification('TaskUpdated', suc_sched_task)

    def onTaskInserted(self, payload=None):
        self._sendNotification('TaskInserted', self.radb.getTask(payload))

    def onTaskDeleted(self, payload=None):
        self._sendNotification('TaskDeleted', payload)

    def onTaskPredecessorChanged(self, task_id):
        logger.info('onTaskPredecessorChanged(task_id=%s)', task_id)
        self._sendNotification('TaskUpdated', self.radb.getTask(task_id))

    def onTaskSuccessorChanged(self, task_id):
        logger.info('onTaskSuccessorChanged(task_id=%s)', task_id)
        self._sendNotification('TaskUpdated', self.radb.getTask(task_id))

    def onSpecificationUpdated(self, payload=None):
        # when the specification starttime and endtime are updated, then that effects the task as well
        self._sendNotification('TaskUpdated',
                               self.radb.getTask(specification_id=payload))

    def onResourceClaimUpdated(self, payload=None):
        self._sendNotification('ResourceClaimUpdated',
                               self.radb.getResourceClaim(payload))

    def onResourceClaimInserted(self, payload=None):
        self._sendNotification('ResourceClaimInserted',
                               self.radb.getResourceClaim(payload))

    def onResourceClaimDeleted(self, payload=None):
        self._sendNotification('ResourceClaimDeleted', payload)

    def onResourceAvailabilityUpdated(self, payload=None):
        r = self.radb.getResources(resource_ids=[payload],
                                   include_availability=True)[0]
        r = {k: r[k] for k in ['id', 'active']}
        self._sendNotification('ResourceAvailabilityUpdated', r)

    def onResourceCapacityUpdated(self, payload=None):
        r = self.radb.getResources(resource_ids=[payload],
                                   include_availability=True)[0]
        r = {
            k: r[k]
            for k in
            ['id', 'total_capacity', 'available_capacity', 'used_capacity']
        }
        self._sendNotification('ResourceCapacityUpdated', r)

    def __enter__(self):
        super(RADBPGListener, self).__enter__()
        self.radb.connect()
        self.event_bus.open()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        super(RADBPGListener, self).__exit__(exc_type, exc_val, exc_tb)
        self.radb.disconnect()
        self.event_bus.close()

    def _formatTimestampsAsIso(self, fields, contentDict):
        '''convert all requested fields in the contentDict to proper isoformat datetime strings.
        In postgres we use timestamps without timezone.
        By convention we only enter utc values.
        But, if they are json encoded by postgress, they are not properly formatted with the in isoformat with a 'Z' at the end.
        So, parse the requested fields, and return them as datetime.
        '''
        try:
            for field in fields:
                try:
                    if field in contentDict:
                        timestampStr = contentDict[field]
                        formatStr = '%Y-%m-%dT%H:%M:%S' if 'T' in timestampStr else '%Y-%m-%d %H:%M:%S'
                        if timestampStr.rfind('.') > -1:
                            formatStr += '.%f'

                        timestamp = datetime.strptime(timestampStr, formatStr)

                        contentDict[field] = timestamp
                except Exception as e:
                    logger.error(
                        'Could not convert field \'%s\' to datetime: %s' %
                        (field, e))

            return contentDict
        except Exception as e:
            logger.error(
                'Error while convering timestamp fields \'%s\'in %s\n%s' %
                (fields, contentDict, e))

    def _sendNotification(self, subject, contentDict):
        try:
            if subject and contentDict:
                msg = EventMessage(subject="%s.%s" %
                                   (DEFAULT_RADB_NOTIFICATION_PREFIX, subject),
                                   content=contentDict)
                logger.info('Sending notification %s to %s: %s', subject,
                            self.event_bus.exchange,
                            str(contentDict).replace('\n', ' '))
                self.event_bus.send(msg)
        except Exception as e:
            logger.error(str(e))