class RATaskSpecifiedOTDBEventMessageHandler(OTDBEventMessageHandler): def __init__(self, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): """ :param exchange: name of the exchange to listen on. :param broker: name of the broker to connect to. """ super().__init__() self.otdbrpc = OTDBRPC.create(exchange=exchange, broker=broker) self.radbrpc = RADBRPC.create(exchange=exchange, broker=broker) self.momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker) self.send_bus = ToBus(exchange=exchange, broker=broker) def start_handling(self): self.otdbrpc.open() self.momrpc.open() self.radbrpc.open() self.send_bus.open() def stop_handling(self): self.otdbrpc.close() self.momrpc.close() self.radbrpc.close() self.send_bus.close() # This is mainly to trigger the propagation of misc field values through read_from_mom # and then sending them to the RA to OTDB Service in the resource assigner. # Might need to be a separate service if we take on more mom-otdb-adapter function. def onObservationApproved(self, main_id, modification_time): self.createAndSendSpecifiedTask(main_id, 'approved') def onObservationPrescheduled(self, main_id, modification_time): self.createAndSendSpecifiedTask(main_id, 'prescheduled') def createAndSendSpecifiedTask(self, main_id, status): spec = Specification(self.otdbrpc, self.momrpc, self.radbrpc) spec.status = status spec.read_from_OTDB_with_predecessors(main_id, "otdb", {}) spec.read_from_mom() spec.update_start_end_times() # spec.insert_into_radb() is still done in resource_assigner for now. result_tree = spec.as_dict() if spec.status == status: logger.info("Sending result: %s" % result_tree) # Put result on bus msg = EventMessage(subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT, content=result_tree) with self.send_bus: self.send_bus.send(msg) logger.info("Result sent") else: logger.warning("Problem retrieving task %i" % main_id)
class RATaskSpecified(OTDBBusListener): def __init__(self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_service_subject=DEFAULT_OTDB_SERVICENAME, notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME, notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT, broker=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs) self.otdbrpc = OTDBRPC(busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject)) def start_listening(self, **kwargs): self.otdbrpc.open() self.send_bus.open() super(RATaskSpecified, self).start_listening(**kwargs) def stop_listening(self, **kwargs): super(RATaskSpecified, self).stop_listening(**kwargs) self.send_bus.close() self.otdbrpc.close() def get_predecessors(self, parset): """ Extract the list of predecessor obs IDs from the given parset. """ key = PARSET_PREFIX + "Observation.Scheduler.predecessors" stringlist = PyParameterValue(str(parset[key]), True).getStringVector() # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM # 'S' we can probably ignore? Might be only internal in the Scheduler? result = [] for s in stringlist: try: # Made the source a string for readability, but it's not efficient if s.startswith('M'): result.append({'source': 'mom', 'id': int(s[1:])}) elif s.startswith('L') or s.startswith('T'): result.append({'source': 'otdb', 'id': int(s[1:])}) else: # 'S' logger.info("found a predecessor ID I can't handle: %s" % s) result.append({'source': 'other', 'id': int(s[1:])}) except ValueError: logger.warning("found a predecessor ID that I can't parse %s" % s) return result def get_specification_with_predecessors(self, id, id_source, state, found_parsets): logger.info("Processing ID %s from %s" % (id, id_source)) if id_source == "other": return None elif id_source == "mom": otdb_id = self.otdbrpc.taskGetIDs( mom_id=id )['otdb_id'] elif id_source == "otdb": otdb_id = id else: logger.warning("Error in understanding id %s", id) logger.info("Processing OTDB ID %s", otdb_id) result = {"otdb_id": otdb_id, "predecessors": []} if state: result["state"] = state # TODO should be status not state else: pass #otdbrpc.taskGetStatus not implemented and maybe not needed? if otdb_id in found_parsets: parset = found_parsets[otdb_id] else: parset = self.otdbrpc.taskGetSpecification( otdb_id=otdb_id )['specification'] found_parsets[otdb_id] = parset logger.info("parset [%s]: %s" % (otdb_id, parset)) result['specification'] = resourceIndicatorsFromParset(parset) key = PARSET_PREFIX + "Observation.processSubtype" result['task_type'], result['task_subtype'] = convertSchedulerProcessSubtype(parset.get(key, "")) logger.info("Processing predecessors") predecessor_ids = self.get_predecessors(parset) for id in predecessor_ids: predecessor_result = self.get_specification_with_predecessors(id['id'], id['source'], "", found_parsets) if predecessor_result: result["predecessors"].append(predecessor_result) return result def onObservationApproved(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'approved') def onObservationPrescheduled(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'prescheduled') def createAndSendSpecifiedTask(self, main_id, status): # Construct root node of tree resultTree = self.get_specification_with_predecessors(main_id, "otdb", status, {}) logger.info("Sending result: %s" % resultTree) # Put result on bus msg = EventMessage(content=resultTree) self.send_bus.send(msg) logger.info("Result sent")
class RADBPGListener(PostgresListener): def __init__(self, busname=DEFAULT_NOTIFICATION_BUSNAME, notification_prefix=DEFAULT_NOTIFICATION_PREFIX, dbcreds=None, broker=None): super(RADBPGListener, self).__init__(dbcreds.host, dbcreds.database, dbcreds.user, dbcreds.password) self.notification_prefix = notification_prefix self.event_bus = ToBus(busname, broker=broker) self.subscribe('task_update_with_task_view', self.onTaskUpdated) self.subscribe('task_insert_with_task_view', self.onTaskInserted) self.subscribe('task_delete', self.onTaskDeleted) # when the specification starttime and endtime are updated, then that effects the task as well # so subscribe to specification_update, and use task_view as view_for_row self.subscribe('specification_update_with_task_view', self.onSpecificationUpdated) self.subscribe('resource_claim_update_with_resource_claim_view', self.onResourceClaimUpdated) self.subscribe('resource_claim_insert_with_resource_claim_view', self.onResourceClaimInserted) self.subscribe('resource_claim_delete', self.onResourceClaimDeleted) self.subscribe('resource_availability_update', self.onResourceAvailabilityUpdated) self.subscribe('resource_capacity_update', self.onResourceCapacityUpdated) def onTaskUpdated(self, payload=None): self._sendNotification('TaskUpdated', payload, ['starttime', 'endtime']) def onTaskInserted(self, payload=None): self._sendNotification('TaskInserted', payload, ['starttime', 'endtime']) def onTaskDeleted(self, payload=None): self._sendNotification('TaskDeleted', payload) def onSpecificationUpdated(self, payload=None): # when the specification starttime and endtime are updated, then that effects the task as well # so send a TaskUpdated notification self._sendNotification('TaskUpdated', payload, ['starttime', 'endtime']) def onResourceClaimUpdated(self, payload=None): self._sendNotification('ResourceClaimUpdated', payload, ['starttime', 'endtime']) def onResourceClaimInserted(self, payload=None): self._sendNotification('ResourceClaimInserted', payload, ['starttime', 'endtime']) def onResourceClaimDeleted(self, payload=None): self._sendNotification('ResourceClaimDeleted', payload) def onResourceAvailabilityUpdated(self, payload=None): self._sendNotification('ResourceAvailabilityUpdated', payload) def onResourceCapacityUpdated(self, payload=None): self._sendNotification('ResourceCapacityUpdated', payload) def __enter__(self): super(RADBPGListener, self).__enter__() self.event_bus.open() return self def __exit__(self, exc_type, exc_val, exc_tb): super(RADBPGListener, self).__exit__(exc_type, exc_val, exc_tb) self.event_bus.close() def _formatTimestampsAsIso(self, fields, contentDict): '''convert all requested fields in the contentDict to proper isoformat datetime strings. In postgres we use timestamps without timezone. By convention we only enter utc values. But, if they are json encoded by postgress, they are not properly formatted with the in isoformat with a 'Z' at the end. So, parse the requested fields, and return them as datetime. ''' try: for state in ('old', 'new'): if state in contentDict: for field in fields: try: if contentDict[state] and field in contentDict[ state]: timestampStr = contentDict[state][field] formatStr = '%Y-%m-%dT%H:%M:%S' if 'T' in timestampStr else '%Y-%m-%d %H:%M:%S' if timestampStr.rfind('.') > -1: formatStr += '.%f' timestamp = datetime.strptime( timestampStr, formatStr) contentDict[state][field] = timestamp except Exception as e: logger.error( 'Could not convert field \'%s\' to datetime: %s' % (field, e)) return contentDict except Exception as e: logger.error('Could not parse payload: %s\n%s' % (contentDict, e)) def _sendNotification(self, subject, payload, timestampFields=None): try: content = json.loads(payload) if 'new' in content and content[ 'new'] and 'old' in content and content['old']: # check if new and old are equal. # however, new and old can be based on different views, # so, only check the values for the keys they have in common new_keys = set(content['new'].keys()) old_keys = set(content['old'].keys()) common_keys = new_keys & old_keys equal_valued_keys = [ k for k in common_keys if content['new'][k] == content['old'][k] ] if len(equal_valued_keys) == len(common_keys): logger.info( 'new and old values are equal, not sending notification. %s' % (content['new'])) return if timestampFields: content = self._formatTimestampsAsIso(timestampFields, content) except Exception as e: logger.error('Could not parse payload: %s\n%s' % (payload, e)) content = None try: msg = EventMessage(context=self.notification_prefix + subject, content=content) logger.info('Sending notification %s: %s' % (subject, str(content).replace('\n', ' '))) self.event_bus.send(msg) except Exception as e: logger.error(str(e))
class RATaskSpecified(OTDBBusListener): def __init__( self, otdb_notification_busname=DEFAULT_OTDB_NOTIFICATION_BUSNAME, otdb_notification_subject=DEFAULT_OTDB_NOTIFICATION_SUBJECT, otdb_service_busname=DEFAULT_OTDB_SERVICE_BUSNAME, otdb_service_subject=DEFAULT_OTDB_SERVICENAME, notification_busname=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_BUSNAME, notification_subject=DEFAULT_RA_TASK_SPECIFIED_NOTIFICATION_SUBJECT, broker=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_notification_busname, subject=otdb_notification_subject, **kwargs) self.otdbrpc = OTDBRPC( busname=otdb_service_busname, servicename=otdb_service_subject, broker=broker ) ## , ForwardExceptions=True hardcoded in RPCWrapper right now self.send_bus = ToBus("%s/%s" % (notification_busname, notification_subject)) def start_listening(self, **kwargs): self.otdbrpc.open() self.send_bus.open() super(RATaskSpecified, self).start_listening(**kwargs) def stop_listening(self, **kwargs): super(RATaskSpecified, self).stop_listening(**kwargs) self.send_bus.close() self.otdbrpc.close() def get_predecessors(self, parset): """ Extract the list of predecessor obs IDs from the given parset. """ key = PARSET_PREFIX + "Observation.Scheduler.predecessors" stringlist = PyParameterValue(str(parset[key]), True).getStringVector() # Key contains values starting with 'S' = Scheduler, 'L'/'T' = OTDB, 'M' = MoM # 'S' we can probably ignore? Might be only internal in the Scheduler? result = [] for s in stringlist: try: # Made the source a string for readability, but it's not efficient if s.startswith('M'): result.append({'source': 'mom', 'id': int(s[1:])}) elif s.startswith('L') or s.startswith('T'): result.append({'source': 'otdb', 'id': int(s[1:])}) else: # 'S' logger.info("found a predecessor ID I can't handle: %s" % s) result.append({'source': 'other', 'id': int(s[1:])}) except ValueError: logger.warning("found a predecessor ID that I can't parse %s" % s) return result def get_specification_with_predecessors(self, id, id_source, state, found_parsets): logger.info("Processing ID %s from %s" % (id, id_source)) if id_source == "other": return None elif id_source == "mom": otdb_id = self.otdbrpc.taskGetIDs(mom_id=id)['otdb_id'] elif id_source == "otdb": otdb_id = id else: logger.warning("Error in understanding id %s", id) logger.info("Processing OTDB ID %s", otdb_id) result = {"otdb_id": otdb_id, "predecessors": []} if state: result["state"] = state # TODO should be status not state else: pass #otdbrpc.taskGetStatus not implemented and maybe not needed? if otdb_id in found_parsets: parset = found_parsets[otdb_id] else: parset = self.otdbrpc.taskGetSpecification( otdb_id=otdb_id)['specification'] found_parsets[otdb_id] = parset logger.info("parset [%s]: %s" % (otdb_id, parset)) result['specification'] = resourceIndicatorsFromParset(parset) key = PARSET_PREFIX + "Observation.processSubtype" result['task_type'], result[ 'task_subtype'] = convertSchedulerProcessSubtype( parset.get(key, "")) logger.info("Processing predecessors") predecessor_ids = self.get_predecessors(parset) for id in predecessor_ids: predecessor_result = self.get_specification_with_predecessors( id['id'], id['source'], "", found_parsets) if predecessor_result: result["predecessors"].append(predecessor_result) return result def onObservationApproved(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'approved') def onObservationPrescheduled(self, main_id, modificationTime): self.createAndSendSpecifiedTask(main_id, 'prescheduled') def createAndSendSpecifiedTask(self, main_id, status): # Construct root node of tree resultTree = self.get_specification_with_predecessors( main_id, "otdb", status, {}) logger.info("Sending result: %s" % resultTree) # Put result on bus msg = EventMessage(content=resultTree) self.send_bus.send(msg) logger.info("Result sent")
class RATaskSpecified(OTDBBusListener): def __init__(self, servicename, otdb_busname=None, my_busname=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_busname, subject="TaskStatus", **kwargs) self.parset_rpc = RPC(service="TaskSpecification", busname=otdb_busname) self.send_bus = ToBus("%s/%s" % (my_busname, servicename)) def start_listening(self, **kwargs): self.parset_rpc.open() self.send_bus.open() super(RATaskSpecified, self).start_listening(**kwargs) def stop_listening(self, **kwargs): super(RATaskSpecified, self).stop_listening(**kwargs) self.send_bus.close() self.parset_rpc.close() def onObservationPrescheduled(self, treeId, modificationTime): logger.info("Processing obs ID %s", treeId) # Request the parset main_obsID = treeId main_parset,_ = self.parset_rpc( OtdbID=main_obsID ) # Construct a dict of all the parsets we retrieved parsets = {} parsets[main_obsID] = main_parset logger.info("Processing predecessors") # Collect the initial set of predecessors request_obsIDs = set(predecessors(main_parset)) logger.info("Processing %s", request_obsIDs) # Iterate recursively over all known predecessor obsIDs, and request their parsets while request_obsIDs: obsID = request_obsIDs.pop() if obsID in parsets: # Predecessor lists can overlap -- we already have this one continue logger.info("Fetching predecessor %s", obsID) # Request predecessor parset parsets[obsID],_ = self.parset_rpc( OtdbID=obsID ) # Add the list of predecessors request_obsIDs = request_obsIDs.union(predecessors(parsets[obsID])) # Convert parsets to resource indicators logger.info("Extracting resource indicators") resourceIndicators = dict([(str(obsID), resourceIndicatorsFromParset(parset)) for (obsID,parset) in parsets.iteritems()]) # Construct and send result message logger.info("Sending result") result = { "sasID": main_obsID, "state": "prescheduled", "time_of_change": modificationTime, "resource_indicators": resourceIndicators, } # Put result on bus msg = EventMessage(content=result) self.send_bus.send(msg) logger.info("Result sent")
class RATaskSpecified(OTDBBusListener): def __init__(self, servicename, otdb_busname=None, my_busname=None, **kwargs): super(RATaskSpecified, self).__init__(busname=otdb_busname, subject="TaskStatus", **kwargs) self.parset_rpc = RPC(service="TaskSpecification", busname=otdb_busname) self.send_bus = ToBus("%s/%s" % (my_busname, servicename)) def start_listening(self, **kwargs): self.parset_rpc.open() self.send_bus.open() super(RATaskSpecified, self).start_listening(**kwargs) def stop_listening(self, **kwargs): super(RATaskSpecified, self).stop_listening(**kwargs) self.send_bus.close() self.parset_rpc.close() def onObservationPrescheduled(self, treeId, modificationTime): logger.info("Processing obs ID %s", treeId) # Request the parset main_obsID = treeId main_parset, _ = self.parset_rpc(OtdbID=main_obsID) # Construct a dict of all the parsets we retrieved parsets = {} parsets[main_obsID] = main_parset logger.info("Processing predecessors") # Collect the initial set of predecessors request_obsIDs = set(predecessors(main_parset)) logger.info("Processing %s", request_obsIDs) # Iterate recursively over all known predecessor obsIDs, and request their parsets while request_obsIDs: obsID = request_obsIDs.pop() if obsID in parsets: # Predecessor lists can overlap -- we already have this one continue logger.info("Fetching predecessor %s", obsID) # Request predecessor parset parsets[obsID], _ = self.parset_rpc(OtdbID=obsID) # Add the list of predecessors request_obsIDs = request_obsIDs.union(predecessors(parsets[obsID])) # Convert parsets to resource indicators logger.info("Extracting resource indicators") resourceIndicators = dict([(str(obsID), resourceIndicatorsFromParset(parset)) for (obsID, parset) in parsets.iteritems()]) # Construct and send result message logger.info("Sending result") result = { "sasID": main_obsID, "state": "prescheduled", "time_of_change": modificationTime, "resource_indicators": resourceIndicators, } # Put result on bus msg = EventMessage(content=result) self.send_bus.send(msg) logger.info("Result sent")
class CacheManager: def __init__(self, cache_path='.du_cache.py', mountpoint=CEP4_DATA_MOUNTPOINT, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): self._cache_path = cache_path self.otdb_listener = OTDBBusListener( _CacheManagerOTDBEventMessageHandler, handler_kwargs={'cache_manager': self}, exchange=exchange, broker=broker, num_threads=1) self.dm_listener = DataManagementBusListener( _CacheManagerDataManagementEventMessageHandler, handler_kwargs={'cache_manager': self}, exchange=exchange, broker=broker, num_threads=1) self.event_bus = ToBus(exchange=exchange, broker=broker) self._updateCacheThread = None self._running = False self._cacheLock = RLock() self._cache = {'path_du_results': {}, 'otdb_id2path': {}} self._last_cache_write_timestamp = datetime.datetime(1970, 1, 1) self._readCacheFromDisk() # dict to hold threading Events per path to prevent expensive multiple du calls for each path self._du_threading_events = {} self._du_threading_events_lock = RLock() self.disk_usage = DiskUsage(mountpoint=mountpoint, exchange=exchange, broker=broker) def _sendDiskUsageChangedNotification(self, path, disk_usage, otdb_id=None): try: msg = EventMessage(subject='%s.DiskUsageChanged' % DEFAULT_DM_NOTIFICATION_PREFIX, content={ 'path': path, 'disk_usage': disk_usage, 'disk_usage_readable': humanreadablesize(disk_usage), 'otdb_id': otdb_id }) logger.info('Sending notification with subject %s to %s: %s', msg.subject, self.event_bus.exchange, msg.content) self.event_bus.send(msg) except Exception as e: logger.error(str(e)) def _readCacheFromDisk(self): # maybe this cache on disk is slow, if so, revert to proper db solution try: if os.path.exists(self._cache_path): with open(self._cache_path, 'r') as file: cache_from_disk = eval(file.read().strip()) #slow! with self._cacheLock: self._cache = cache_from_disk if not isinstance(self._cache, dict): self._cache = { 'path_du_results': {}, 'otdb_id2path': {} } if 'path_du_results' not in self._cache: self._cache['path_du_results'] = {} if 'otdb_id2path' not in self._cache: self._cache['otdb_id2path'] = {} except Exception as e: logger.error("Error while reading in du cache: %s", e) with self._cacheLock: self._cache = {'path_du_results': {}, 'otdb_id2path': {}} def _writeCacheToDisk(self): try: # only persist (a subset of) the cache to disk every once in a while. if datetime.datetime.utcnow( ) - self._last_cache_write_timestamp > datetime.timedelta( minutes=5): cache_str = '' with self._cacheLock: # Take a subset of the entire cache # only the path_du_results for paths at project level (like /data/projects, /data/projects/LC9_001) # Do not store path_du_results for deeper levels on disk, because that makes the disk read/write too slow, # and the deeper levels can be obtained via rhb-du calls quite fast anyway. # Furthermore, once a deeper level du results is stored in the memory cache, then it is also available for fast lookup. # We just don't store these deep levels on disk. sub_cache = { path: du_result for path, du_result in list( self._cache['path_du_results'].items()) if self.getDepthToProjectsDir(path) <= 1 and du_result.get('found') } cache_str = str(sub_cache) tmp_path = '/tmp/tmp_storagequery_cache.py' with open(tmp_path, 'w') as file: file.write(cache_str) dir_path = os.path.dirname(self._cache_path) if dir_path: os.makedirs(dir_path, exist_ok=True) shutil.move(tmp_path, self._cache_path) self._last_cache_write_timestamp = datetime.datetime.utcnow() except Exception as e: logger.error("Error while writing du cache: %s", e) def _updateCache(self, du_result, send_notification=True): if not 'path' in du_result: return path = du_result['path'] otdb_id = du_result.get('otdb_id') with self._cacheLock: path_cache = self._cache['path_du_results'] otdb_id2path_cache = self._cache['otdb_id2path'] if otdb_id is None: # try to look up the otdb in the path cache if path in path_cache: otdb_id = du_result.get('otdb_id') # if still None, try to get the id from the path if otdb_id is None: otdb_id = getOTDBIdFromPath(path) if not path in path_cache or path_cache[path][ 'disk_usage'] != du_result['disk_usage']: # update the cache entry, even when no du result found, # cause that will save disk queries next time. logger.info('updating cache entry: %s', du_result) path_cache[path] = du_result if otdb_id != None: otdb_id2path_cache[otdb_id] = path if not du_result['found']: # even when the du for the path is not found, # keep a copy in the cache for fast lookup by clients # Make sure the size is 0 du_result['disk_usage'] = 0 du_result['disk_usage_readable'] = humanreadablesize(0) path_cache[path]['cache_timestamp'] = datetime.datetime.utcnow() path_cache[path]['needs_update'] = False self._writeCacheToDisk() if send_notification: self._sendDiskUsageChangedNotification(path, du_result['disk_usage'], otdb_id) def _invalidateCacheEntryForPath(self, path): with self._cacheLock: path_cache = self._cache['path_du_results'] if path in path_cache: path_cache[path]['needs_update'] = True def getOtdbIdsFoundOnDisk(self): with self._cacheLock: otdb_id2path_cache = self._cache['otdb_id2path'] return sorted(list(otdb_id2path_cache.keys())) def getDiskUsagesForAllOtdbIds(self, force_update=False): otdb_ids = self.getOtdbIdsFoundOnDisk() result = {} for otdb_id in otdb_ids: result[otdb_id] = self.getDiskUsageForOTDBId( otdb_id, force_update=force_update) return result def getDepthToProjectsDir(self, path): return len( path.replace(self.disk_usage.path_resolver.projects_path, '').strip('/').split('/')) def _scanProjectsTree(self): try: def addSubDirectoriesToCache(directory): if not self._running: return depth = self.getDepthToProjectsDir(directory) MAX_SCAN_DEPTH = 2 #depth=0 : projects #depth=1 : projects/<project> #depth=2 : projects/<project>/<obs> #depth=3 : projects/<project>/<obs>/<sub_dir> if depth > MAX_SCAN_DEPTH: return if depth < MAX_SCAN_DEPTH: logger.info('tree scan: scanning \'%s\'', directory) sd_result = self.disk_usage.path_resolver.getSubDirectories( directory) if sd_result['found']: subdir_paths = [ os.path.join(directory, sd) for sd in sd_result['sub_directories'] ] for subdir_path in subdir_paths: # recurse addSubDirectoriesToCache(subdir_path) with self._cacheLock: path_cache = self._cache['path_du_results'] add_empty_du_result_to_cache = not directory in path_cache if add_empty_du_result_to_cache: logger.info( 'tree scan: adding \'%s\' with empty disk_usage to cache which will be du\'ed later', directory) empty_du_result = { 'found': True, 'disk_usage': None, 'path': directory, 'name': directory.split('/')[-1] } self._updateCache(empty_du_result, send_notification=False) with self._cacheLock: path_cache = self._cache['path_du_results'] if directory in path_cache: # mark cache entry for directory to be updated path_cache[directory]['needs_update'] = True addSubDirectoriesToCache( self.disk_usage.path_resolver.projects_path) logger.info('tree scan complete') except Exception as e: logger.exception(str(e)) def _updateOldEntriesInCache(self): logger.info('starting updating old cache entries') while self._running: try: now = datetime.datetime.utcnow() with self._cacheLock: path_cache = self._cache['path_du_results'] old_entries = [ cache_entry for cache_entry in list(path_cache.values()) if now - cache_entry['cache_timestamp'] > MAX_CACHE_ENTRY_AGE ] needs_update_entries = [ cache_entry for cache_entry in list(path_cache.values()) if cache_entry.get('needs_update', False) ] updateable_entries = old_entries + needs_update_entries logger.info( '%s old cache entries need to be updated, #age:%s #needs_update:%s', len(updateable_entries), len(old_entries), len(needs_update_entries)) if updateable_entries: # sort them oldest to newest, 'needs_update' paths first def compareFunc(entry1, entry2): if entry1.get('needs_update' ) and not entry2.get('needs_update'): return -1 if not entry1.get('needs_update') and entry2.get( 'needs_update'): return 1 #depth1 = self.getDepthToProjectsDir(entry1['path']) #depth2 = self.getDepthToProjectsDir(entry2['path']) #if depth1 != depth2: ## lower level dirs are sorted in front of higher level dirs #return depth1 - depth2 if entry1['cache_timestamp'] < entry2[ 'cache_timestamp']: return -1 if entry1['cache_timestamp'] > entry2[ 'cache_timestamp']: return 1 return 0 updateable_entries = sorted(updateable_entries, key=cmp_to_key(compareFunc)) cacheUpdateStart = datetime.datetime.utcnow() for i, cache_entry in enumerate(updateable_entries): try: # it might be that the cache_entry was already updated via another way # so only update it if still to old or needs_update now = datetime.datetime.utcnow() if now - cache_entry[ 'cache_timestamp'] > MAX_CACHE_ENTRY_AGE or cache_entry.get( 'needs_update', False): path = cache_entry.get('path') if path: logger.info( '_updateOldEntriesInCache: examining entry %s/%s. timestamp:%s age:%s needs_update:%s path: \'%s\'', i, len(updateable_entries), cache_entry['cache_timestamp'], format_timedelta( now - cache_entry['cache_timestamp']), cache_entry.get('needs_update', False), path) #du a full update from disk, which might be (really) slow. result = du_getDiskUsageForPath(path) logger.debug( 'trying to update old entry in cache: %s', result) self._updateCache(result) except Exception as e: logger.error(str(e)) if not self._running: logger.info('exiting _updateCacheThread') return if datetime.datetime.utcnow( ) - cacheUpdateStart > datetime.timedelta(minutes=10): # break out of cache update loop if full update takes more than 1min # next loop we'll start with the oldest cache entries again logger.info( 'skipping remaining %s old cache entries updates, they will be updated next time', len(updateable_entries) - i) break #update the CEP4 capacities in the RADB once in a while... self._updateCEP4CapacitiesInRADB() #sleep for a while, (or stop if requested) for i in range(60): sleep(1) if not self._running: logger.info('exiting _updateCacheThread') return except Exception as e: logger.exception(str(e)) def _updateCEP4CapacitiesInRADB(self): try: df_result = self.disk_usage.getDiskFreeSpace() if df_result['found']: #get the total used space, and update the resource availability in the radb radbrpc = self.disk_usage.path_resolver.radbrpc storage_resources = radbrpc.getResources( resource_types='storage', include_availability=True) cep4_storage_resource = next(x for x in storage_resources if 'CEP4' in x['name']) total_capacity = df_result.get('disk_size') used_capacity = df_result.get('disk_usage') available_capacity = df_result.get('disk_free') logger.info( 'updating capacities for resource \'%s\' (id=%s) in the RADB: total=%s, used=%s, available=%s', cep4_storage_resource['name'], cep4_storage_resource['id'], humanreadablesize(total_capacity), humanreadablesize(used_capacity), humanreadablesize(available_capacity)) radbrpc.updateResourceAvailability( cep4_storage_resource['id'], available_capacity=available_capacity, total_capacity=total_capacity) except Exception as e: logger.error('_updateCEP4CapacitiesInRADB: %s', e) def open(self): logger.info("opening storagequeryservice cache...") self._running = True self.disk_usage.open() self.event_bus.open() self._scanProjectsTree() self._updateCacheThread = Thread(target=self._updateOldEntriesInCache) self._updateCacheThread.daemon = True self._updateCacheThread.start() self.otdb_listener.start_listening() self.dm_listener.start_listening() logger.info("opened storagequeryservice cache") def close(self): logger.info("closing storagequeryservice cache...") self._running = False self.otdb_listener.stop_listening() self.dm_listener.stop_listening() self._updateCacheThread.join() self.event_bus.close() self.disk_usage.close() logger.info("closed storagequeryservice cache") def __enter__(self): self.open() return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def onObservationFinished(self, otdb_id, modificationTime): self._onDiskActivityForOTDBId(otdb_id) def onObservationAborted(self, otdb_id, modificationTime): self._onDiskActivityForOTDBId(otdb_id) def onTaskDeleted(self, otdb_id, deleted, paths, message=''): self._onDiskActivityForOTDBId(otdb_id) with self._cacheLock: if deleted and otdb_id != None and otdb_id in self._cache[ 'otdb_id2path']: del self._cache['otdb_id2path'][otdb_id] def _onDiskActivityForOTDBId(self, otdb_id): result = self.disk_usage.getDiskUsageForOTDBId(otdb_id) self._updateCache(result) task_path = result.get('path') projects_path = self.disk_usage.path_resolver.projects_path # update all paths up the tree up to the projects_path # update the resource availability in the radb as well path = task_path while path: parent_path = '/'.join(path.split('/')[:-1]) if projects_path.startswith( parent_path) and len(parent_path) < len(projects_path): break logger.info( 'invalidating cache entry for %s because disk usage for task %s in %s changed', parent_path, otdb_id, task_path) self._invalidateCacheEntryForPath(parent_path) path = parent_path def getDiskUsageForOTDBId(self, otdb_id, include_scratch_paths=True, force_update=False): return self.getDiskUsageForTask( otdb_id=otdb_id, include_scratch_paths=include_scratch_paths, force_update=force_update) def getDiskUsageForMoMId(self, mom_id, include_scratch_paths=True, force_update=False): return self.getDiskUsageForTask( mom_id=mom_id, include_scratch_paths=include_scratch_paths, force_update=force_update) def getDiskUsageForRADBId(self, radb_id, include_scratch_paths=True, force_update=False): return self.getDiskUsageForTask( radb_id=radb_id, include_scratch_paths=include_scratch_paths, force_update=force_update) def getDiskUsageForTask(self, radb_id=None, mom_id=None, otdb_id=None, include_scratch_paths=True, force_update=False): logger.info( "cache.getDiskUsageForTask(radb_id=%s, mom_id=%s, otdb_id=%s, include_scratch_paths=%s, force_update=%s)", radb_id, mom_id, otdb_id, include_scratch_paths, force_update) if otdb_id != None and not include_scratch_paths: with self._cacheLock: path = self._cache['otdb_id2path'].get(otdb_id) if path: logger.info('Using path from cache for otdb_id %s %s', otdb_id, path) return self.getDiskUsageForPath(path, force_update=force_update) logger.info( "cache.getDiskUsageForTask could not find path in cache, determining path..." ) path_result = self.disk_usage.path_resolver.getPathForTask( radb_id=radb_id, mom_id=mom_id, otdb_id=otdb_id, include_scratch_paths=include_scratch_paths) if path_result['found']: task_path = path_result['path'] scratch_paths = path_result.get('scratch_paths', []) # get all du's in parallel over all paths paths = [task_path] + scratch_paths paths_du_result = self.getDiskUsageForPaths( paths, force_update=force_update) # split into project and subdir path_du_result = paths_du_result.pop(task_path) scratch_du_result = paths_du_result task_du_result = dict(path_du_result) # yield id's for if available, or None for id in ['radb_id', 'otdb_id', 'mom_id']: task_du_result[id] = path_result.get(id) if scratch_du_result: task_du_result['scratch_paths'] = scratch_du_result return task_du_result # still no path(s) found for otdb_id, now try from cache and ignore possible scratch paths if otdb_id != None: with self._cacheLock: path = self._cache['otdb_id2path'].get(otdb_id) if path: logger.info( 'Using path from cache for otdb_id %s %s (ignoring possible scratch/share paths)', otdb_id, path) return self.getDiskUsageForPath(path, force_update=force_update) return {'found': False, 'path': path_result['path']} def getDiskUsageForTasks(self, radb_ids=None, mom_ids=None, otdb_ids=None, include_scratch_paths=True, force_update=False): logger.info( "cache.getDiskUsageForTasks(radb_ids=%s, mom_ids=%s, otdb_ids=%s)" % (radb_ids, mom_ids, otdb_ids)) tasks_result = {'radb_ids': {}, 'mom_ids': {}, 'otdb_ids': {}} if radb_ids is None: radb_ids = [] if mom_ids is None: mom_ids = [] if otdb_ids is None: otdb_ids = [] with futures.ThreadPoolExecutor( max_workers=os.cpu_count()) as executor: # helper function to expand parallel p_kwarg dict into kwargs def parallel_getDiskUsageForTask(p_kwarg): return self.getDiskUsageForTask(**p_kwarg) parallel_kwargs = [{ 'radb_id': radb_id, 'include_scratch_paths': include_scratch_paths, 'force_update': force_update } for radb_id in radb_ids] parallel_kwargs += [{ 'mom_id': mom_id, 'include_scratch_paths': include_scratch_paths, 'force_update': force_update } for mom_id in mom_ids] parallel_kwargs += [{ 'otdb_id': otdb_id, 'include_scratch_paths': include_scratch_paths, 'force_update': force_update } for otdb_id in otdb_ids] results = list( executor.map(parallel_getDiskUsageForTask, parallel_kwargs)) # collect results in a dict grouped by id_type for result in results: if result.get('radb_id') in radb_ids: tasks_result['radb_ids'][result['radb_id']] = result if result.get('mom_id') in mom_ids: tasks_result['mom_ids'][result['mom_id']] = result if result.get('otdb_id') in otdb_ids: tasks_result['otdb_ids'][result['otdb_id']] = result logger.info( "cache.getDiskUsageForTasks(radb_ids=%s, mom_ids=%s, otdb_ids=%s) returning: %s" % (radb_ids, mom_ids, otdb_ids, tasks_result)) return tasks_result def getDiskUsageForPaths(self, paths, force_update=False): with futures.ThreadPoolExecutor( max_workers=os.cpu_count()) as executor: # helper function to expand parallel p_arg tuple into list of args def parallel_getDiskUsageForPath(p_arg): return self.getDiskUsageForPath(*p_arg) parallel_args = [(path, force_update) for path in paths] results = list( executor.map(parallel_getDiskUsageForPath, parallel_args)) return {result['path']: result for result in results} def getDiskUsageForPath(self, path, force_update=False): logger.info("cache.getDiskUsageForPath('%s', force_update=%s)", path, force_update) needs_cache_update = False if not force_update: with self._cacheLock: needs_cache_update |= path not in self._cache[ 'path_du_results'] if needs_cache_update or force_update: logger.info("cache update needed for %s", path) # check if some other thread is already doing a du call for this path... with self._du_threading_events_lock: path_threading_event = self._du_threading_events.get(path) need_to_do_du_call = path_threading_event is None if need_to_do_du_call: # no other thread is currently du'ing/updating this path # so create a threading Event and store it in the dict, # so other threads can wait for this event. logger.info("updating the cache for %s current_thread=%s", path, current_thread().name) path_threading_event = Event() self._du_threading_events[path] = path_threading_event if need_to_do_du_call: # no other thread is currently du'ing/updating this path # so we need to do it here. result = du_getDiskUsageForPath(path) self._updateCache(result) # signal threads waiting for this same path du call # and do bookkeeping with self._du_threading_events_lock: logger.info( "signaling other threads that the cache was updated for %s current_thread=%s", path, current_thread().name) path_threading_event.set() del self._du_threading_events[path] else: logger.info( "waiting for du call on other thread that will update the cache for %s current_thread=%s", path, current_thread().name) path_threading_event.wait() logger.info( "another thread just updated the cache for %s current_thread=%s", path, current_thread().name) with self._cacheLock: if path in self._cache['path_du_results']: result = self._cache['path_du_results'][path] else: result = { 'found': False, 'path': path, 'message': 'unknown error' } if not self.disk_usage.path_resolver.pathExists(path): result['message'] = 'No such path: %s' % path result['disk_usage_readable'] = humanreadablesize( result.get('disk_usage', 0)) logger.info('cache.getDiskUsageForPath(\'%s\') result: %s', path, result) return result def getDiskUsageForTaskAndSubDirectories(self, radb_id=None, mom_id=None, otdb_id=None, force_update=False): logger.info( "cache.getDiskUsageForTaskAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s)" % (radb_id, mom_id, otdb_id)) task_du_result = self.getDiskUsageForTask(radb_id=radb_id, mom_id=mom_id, otdb_id=otdb_id, force_update=force_update) if task_du_result['found']: task_sd_result = self.disk_usage.path_resolver.getSubDirectories( task_du_result['path']) if task_sd_result['found']: subdir_paths = [ os.path.join(task_du_result['path'], sd) for sd in task_sd_result['sub_directories'] ] subdirs_du_result = self.getDiskUsageForPaths( subdir_paths, force_update=force_update) result = { 'found': True, 'task_directory': task_du_result, 'sub_directories': subdirs_du_result } logger.info( "result for cache.getDiskUsageForTaskAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s): %s", radb_id, mom_id, otdb_id, result) return result logger.warning( "result for cache.getDiskUsageForTaskAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s): %s", radb_id, mom_id, otdb_id, task_du_result) return task_du_result def getDiskUsageForProjectDirAndSubDirectories(self, radb_id=None, mom_id=None, otdb_id=None, project_name=None, force_update=False): logger.info( "cache.getDiskUsageForProjectDirAndSubDirectories(radb_id=%s, mom_id=%s, otdb_id=%s)" % (radb_id, mom_id, otdb_id)) path_result = self.disk_usage.path_resolver.getProjectDirAndSubDirectories( radb_id=radb_id, mom_id=mom_id, otdb_id=otdb_id, project_name=project_name) if path_result['found']: projectdir_path = path_result['path'] subdir_paths = [ os.path.join(path_result['path'], sd) for sd in path_result['sub_directories'] ] # get all du's in parallel over all paths paths = [projectdir_path] + subdir_paths paths_du_result = self.getDiskUsageForPaths( paths, force_update=force_update) # split into project and subdir projectdir_du_result = paths_du_result.pop(projectdir_path) subdirs_du_result = paths_du_result # create total result dict result = { 'found': True, 'projectdir': projectdir_du_result, 'sub_directories': subdirs_du_result } logger.info( 'cache.getDiskUsageForProjectDirAndSubDirectories result: %s' % result) return result return path_result def getDiskUsageForProjectsDirAndSubDirectories(self, force_update=False): logger.info("cache.getDiskUsageForProjectsDirAndSubDirectories") projects_path = self.disk_usage.path_resolver.projects_path project_subdirs_result = self.disk_usage.path_resolver.getSubDirectories( projects_path) subdir_paths = [ os.path.join(projects_path, sd) for sd in project_subdirs_result['sub_directories'] ] if project_subdirs_result['found'] else [] # get all du's in parallel over all paths paths = [projects_path] + subdir_paths paths_du_result = self.getDiskUsageForPaths(paths, force_update=force_update) # split into project and subdir projectsdir_du_result = paths_du_result.pop(projects_path) subdirs_du_result = paths_du_result # create total result dict result = { 'found': True, 'projectdir': projectsdir_du_result, 'sub_directories': subdirs_du_result } logger.info( 'cache.getDiskUsageForProjectsDirAndSubDirectories result: %s' % result) return result
class RADBPGListener(PostgresListener): def __init__(self, exchange=DEFAULT_BUSNAME, dbcreds=None, broker=DEFAULT_BROKER): super(RADBPGListener, self).__init__(dbcreds=dbcreds) self.event_bus = ToBus(exchange=exchange, broker=broker) self.radb = RADatabase(dbcreds=dbcreds) self.subscribe('task_update', self.onTaskUpdated) self.subscribe('task_insert', self.onTaskInserted) self.subscribe('task_delete', self.onTaskDeleted) self.subscribe('task_predecessor_insert_column_task_id', self.onTaskPredecessorChanged) self.subscribe('task_predecessor_update_column_task_id', self.onTaskPredecessorChanged) self.subscribe('task_predecessor_delete_column_task_id', self.onTaskPredecessorChanged) self.subscribe('task_predecessor_insert_column_predecessor_id', self.onTaskSuccessorChanged) self.subscribe('task_predecessor_update_column_predecessor_id', self.onTaskSuccessorChanged) self.subscribe('task_predecessor_delete_column_predecessor_id', self.onTaskSuccessorChanged) # when the specification starttime and endtime are updated, then that effects the task as well self.subscribe('specification_update', self.onSpecificationUpdated) self.subscribe('resource_claim_update', self.onResourceClaimUpdated) self.subscribe('resource_claim_insert', self.onResourceClaimInserted) self.subscribe('resource_claim_delete', self.onResourceClaimDeleted) self.subscribe('resource_availability_update', self.onResourceAvailabilityUpdated) self.subscribe('resource_capacity_update', self.onResourceCapacityUpdated) def onTaskUpdated(self, payload=None): # Send notification for the given updated task task_id = payload task = self.radb.getTask(task_id) self._sendNotification('TaskUpdated', task) # The "blocked_by_ids" property of the given task's successors might have been updated due to the given task # status being updated. Therefore also send a notification for these successors - lazily ignoring that they # might not have changed. suc_sched_tasks = self.radb.getTasks(task_ids=task['successor_ids'], task_status='scheduled') for suc_sched_task in suc_sched_tasks: self._sendNotification('TaskUpdated', suc_sched_task) def onTaskInserted(self, payload=None): self._sendNotification('TaskInserted', self.radb.getTask(payload)) def onTaskDeleted(self, payload=None): self._sendNotification('TaskDeleted', payload) def onTaskPredecessorChanged(self, task_id): logger.info('onTaskPredecessorChanged(task_id=%s)', task_id) self._sendNotification('TaskUpdated', self.radb.getTask(task_id)) def onTaskSuccessorChanged(self, task_id): logger.info('onTaskSuccessorChanged(task_id=%s)', task_id) self._sendNotification('TaskUpdated', self.radb.getTask(task_id)) def onSpecificationUpdated(self, payload=None): # when the specification starttime and endtime are updated, then that effects the task as well self._sendNotification('TaskUpdated', self.radb.getTask(specification_id=payload)) def onResourceClaimUpdated(self, payload=None): self._sendNotification('ResourceClaimUpdated', self.radb.getResourceClaim(payload)) def onResourceClaimInserted(self, payload=None): self._sendNotification('ResourceClaimInserted', self.radb.getResourceClaim(payload)) def onResourceClaimDeleted(self, payload=None): self._sendNotification('ResourceClaimDeleted', payload) def onResourceAvailabilityUpdated(self, payload=None): r = self.radb.getResources(resource_ids=[payload], include_availability=True)[0] r = {k: r[k] for k in ['id', 'active']} self._sendNotification('ResourceAvailabilityUpdated', r) def onResourceCapacityUpdated(self, payload=None): r = self.radb.getResources(resource_ids=[payload], include_availability=True)[0] r = { k: r[k] for k in ['id', 'total_capacity', 'available_capacity', 'used_capacity'] } self._sendNotification('ResourceCapacityUpdated', r) def __enter__(self): super(RADBPGListener, self).__enter__() self.radb.connect() self.event_bus.open() return self def __exit__(self, exc_type, exc_val, exc_tb): super(RADBPGListener, self).__exit__(exc_type, exc_val, exc_tb) self.radb.disconnect() self.event_bus.close() def _formatTimestampsAsIso(self, fields, contentDict): '''convert all requested fields in the contentDict to proper isoformat datetime strings. In postgres we use timestamps without timezone. By convention we only enter utc values. But, if they are json encoded by postgress, they are not properly formatted with the in isoformat with a 'Z' at the end. So, parse the requested fields, and return them as datetime. ''' try: for field in fields: try: if field in contentDict: timestampStr = contentDict[field] formatStr = '%Y-%m-%dT%H:%M:%S' if 'T' in timestampStr else '%Y-%m-%d %H:%M:%S' if timestampStr.rfind('.') > -1: formatStr += '.%f' timestamp = datetime.strptime(timestampStr, formatStr) contentDict[field] = timestamp except Exception as e: logger.error( 'Could not convert field \'%s\' to datetime: %s' % (field, e)) return contentDict except Exception as e: logger.error( 'Error while convering timestamp fields \'%s\'in %s\n%s' % (fields, contentDict, e)) def _sendNotification(self, subject, contentDict): try: if subject and contentDict: msg = EventMessage(subject="%s.%s" % (DEFAULT_RADB_NOTIFICATION_PREFIX, subject), content=contentDict) logger.info('Sending notification %s to %s: %s', subject, self.event_bus.exchange, str(contentDict).replace('\n', ' ')) self.event_bus.send(msg) except Exception as e: logger.error(str(e))