class SystemManagementService(BaseSystemManagementService): """ container management requests are handled by the event listener ion.processes.event.container_manager.ContainerManager which must be running on each container. """ def on_start(self,*a,**b): super(SystemManagementService,self).on_start(*a,**b) self.sender = EventPublisher() def on_quit(self,*a,**b): self.sender.close() def perform_action(self, predicate, action): userid = None # get from context self.sender.publish_event(event_type=OT.ContainerManagementRequest, origin=userid, predicate=predicate, action=action) def set_log_level(self, logger='', level='', recursive=False): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ChangeLogLevel, logger=logger, level=level, recursive=recursive)) def reset_policy_cache(self, headers=None, timeout=None): """Clears and reloads the policy caches in all of the containers. @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ResetPolicyCache)) def trigger_garbage_collection(self): """Triggers a garbage collection in all containers @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerGarbageCollection))
class SystemManagementService(BaseSystemManagementService): """ container management requests are handled by the event listener ion.processes.event.container_manager.ContainerManager which must be running on each container. """ def on_start(self,*a,**b): super(SystemManagementService,self).on_start(*a,**b) self.sender = EventPublisher() def on_quit(self,*a,**b): self.sender.close() def perform_action(self, predicate, action): userid = None # get from context self.sender.publish_event(event_type=OT.ContainerManagementRequest, origin=userid, predicate=predicate, action=action) def set_log_level(self, logger='', level='', recursive=False): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ChangeLogLevel, logger=logger, level=level, recursive=recursive)) def reset_policy_cache(self, headers=None, timeout=None): """Clears and reloads the policy caches in all of the containers. @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ResetPolicyCache)) def trigger_garbage_collection(self): """Triggers a garbage collection in all containers @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerGarbageCollection)) def trigger_container_snapshot(self, snapshot_id='', include_snapshots=None, exclude_snapshots=None, take_at_time='', clear_all=False, persist_snapshot=True, snapshot_kwargs=None): if not snapshot_id: snapshot_id = get_ion_ts() if not snapshot_kwargs: snapshot_kwargs = {} self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerContainerSnapshot, snapshot_id=snapshot_id, include_snapshots=include_snapshots, exclude_snapshots=exclude_snapshots, take_at_time=take_at_time, clear_all=clear_all, persist_snapshot=persist_snapshot, snapshot_kwargs=snapshot_kwargs)) log.info("Event to trigger container snapshots sent. snapshot_id=%s" % snapshot_id) def start_gevent_block(self, alarm_mode=False): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.StartGeventBlock, alarm_mode=alarm_mode)) def stop_gevent_block(self): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.StopGeventBlock)) def prepare_system_shutdown(self, mode=''): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.PrepareSystemShutdown, mode=mode))
class TransformEventPublisher(TransformEventProcess): def on_start(self): event_type = self.CFG.get_safe('process.event_type', '') self.publisher = EventPublisher(event_type=event_type) def publish_event(self, *args, **kwargs): raise NotImplementedError('Method publish_event not implemented') def on_quit(self): self.publisher.close()
class SystemManagementService(BaseSystemManagementService): """ container management requests are handled by the event listener ion.processes.event.container_manager.ContainerManager which must be running on each container. """ def on_start(self,*a,**b): super(SystemManagementService,self).on_start(*a,**b) self.sender = EventPublisher() def on_quit(self,*a,**b): self.sender.close() def perform_action(self, predicate, action): userid = None # get from context self.sender.publish_event(event_type="ContainerManagementRequest", origin=userid, predicate=predicate, action=action) def set_log_level(self, logger='', level='', recursive=False): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject('ChangeLogLevel', logger=logger, level=level, recursive=recursive))
class SystemManagementService(BaseSystemManagementService): """ container management requests are handled by the event listener ion.processes.event.container_manager.ContainerManager which must be running on each container. """ def on_start(self, *a, **b): super(SystemManagementService, self).on_start(*a, **b) self.sender = EventPublisher() def on_quit(self, *a, **b): self.sender.close() def perform_action(self, predicate, action): userid = None # get from context self.sender.publish_event(event_type=OT.ContainerManagementRequest, origin=userid, predicate=predicate, action=action) def set_log_level(self, logger='', level='', recursive=False): self.perform_action( ALL_CONTAINERS_INSTANCE, IonObject(OT.ChangeLogLevel, logger=logger, level=level, recursive=recursive)) def reset_policy_cache(self, headers=None, timeout=None): """Clears and reloads the policy caches in all of the containers. @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ResetPolicyCache)) def trigger_garbage_collection(self): """Triggers a garbage collection in all containers @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerGarbageCollection))
class SchedulerService(BaseSchedulerService): def __init__(self, *args, **kwargs): BaseSchedulerService.__init__(self, *args, **kwargs) self.schedule_entries = {} self._no_reschedule = False def on_start(self): if CFG.get_safe("process.start_mode") == "RESTART": self.on_system_restart() self.pub = EventPublisher(event_type="ResourceEvent") def on_quit(self): self.pub.close() # throw killswitch on future reschedules self._no_reschedule = True # terminate any pending spawns self._stop_pending_timers() def __notify(self, task, id, index): log.debug("SchedulerService:__notify: - " + task.event_origin + " - Time: " + str(self.__now()) + " - ID: " + id + " -Index:" + str(index)) self.pub.publish_event(origin=task.event_origin) def __now(self): return datetime.utcnow() def __now_posix(self, now): return time.mktime(now.timetuple()) def _expire_callback(self, id, index): task = self.__get_entry(id) self.__notify(task, id, index) if not self.__reschedule(id, index): self.__delete(id, index) def __calculate_next_interval(self, task, current_time): if task.start_time < current_time: next_interval = task.start_time while (next_interval < current_time): next_interval = next_interval + task.interval return (next_interval - current_time) else: return (task.start_time - current_time) + task.interval def __get_expire_time(self, task): now = self.__now() now_posix = self.__now_posix(now) expires_in = [] if type(task) == TimeOfDayTimer: for time_of_day in task.times_of_day: expire_time = datetime(now.year, now.month, now.day, time_of_day['hour'], time_of_day['minute'], time_of_day['second']) expires_in.append(ceil((expire_time - now).total_seconds())) elif type(task) == IntervalTimer and (task.end_time == -1 or ((now_posix + task.interval) <= task.end_time)): expires_in = [(self.__calculate_next_interval(task, now_posix))] return expires_in def __get_reschedule_expire_time(self, task, index): expires_in = False now = self.__now() now_posix = self.__now_posix(now) if type(task) == TimeOfDayTimer: if task.expires > now_posix: time_of_day = task.times_of_day[index] tomorrow = now + timedelta(days=1) expire_time = datetime(tomorrow.year, tomorrow.month, tomorrow.day, time_of_day['hour'], time_of_day['minute'], time_of_day['second']) expires_in = (ceil((expire_time - now).total_seconds())) else: expires_in = False elif type(task) == IntervalTimer and (task.end_time == -1 or ((now_posix + task.interval) <= task.end_time)): if task.start_time <= now_posix: expires_in = (task.interval) else: expires_in = ((task.start_time - now_posix) + task.interval) return expires_in def __validate_expire_times(self, expire_times): for index, expire_time in enumerate(expire_times): if expire_time < 0: return False return True def __schedule(self, scheduler_entry, id=False): # if "id" is set, it means scheduler_entry is already in Resource Regsitry. This can occur during a sytsem restart spawns = [] task = scheduler_entry.entry expire_times = self.__get_expire_time(task) if not self.__validate_expire_times(expire_times): log.error("SchedulerService:__schedule: scheduling: expire time is less than zero: ") return False if not id: id, _ = self.clients.resource_registry.create(scheduler_entry) self.__create_entry(task, spawns, id) for index, expire_time in enumerate(expire_times): log.debug("SchedulerService:__schedule: scheduling: - " + task.event_origin + " - Now: " + str(self.__now()) + " - Expire: " + str(expire_time) + " - ID: " + id + " - Index:" + str(index)) spawn = gevent.spawn_later(expire_time, self._expire_callback, id, index) spawns.append(spawn) return id def __reschedule(self, id, index): if self._no_reschedule: log.debug("SchedulerService:__reschedule: process quitting, refusing to reschedule %s", id) return False task = self.__get_entry(id) expire_time = self.__get_reschedule_expire_time(task, index) if expire_time: log.debug("SchedulerService:__reschedule: rescheduling: - " + task.event_origin + " - Now: " + str(self.__now()) + " - Expire: " + str(expire_time) + " - ID: " + id + " -Index:" + str(index)) spawn = gevent.spawn_later(expire_time, self._expire_callback, id, index) self.__update_entry(id=id, index=index, spawn=spawn) return True else: log.debug("SchedulerService:__reschedule: timer expired. Removed from RR : - " + task.event_origin + " - Now: " + str(self.__now()) + " - Expire: " + str(expire_time) + " - ID: " + id + " -Index:" + str(index)) return False def __create_entry(self, task, spawns, id): self.schedule_entries[id] = {"task": task, "spawns": spawns} def __update_entry(self, id, index, spawn=None, interval=None): if spawn is not None: self.schedule_entries[id]["spawns"][index] = spawn if interval is not None: self.schedule_entries[id]["task"].interval = interval def __get_entry_all(self, id): return self.schedule_entries[id] def __get_spawns(self, id): return self.schedule_entries[id]["spawns"] def __get_entry(self, id): return self.schedule_entries[id]["task"] def __delete(self, id, index, force=False): if id in self.schedule_entries: task = self.__get_entry(id) if force and type(task) == TimeOfDayTimer: log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index)) del self.schedule_entries[id] self.clients.resource_registry.delete(id) elif type(task) == TimeOfDayTimer: task = self.__get_entry(id) task.times_of_day[index] = None # Delete if all the timers are set to none are_all_timers_expired = True for time_of_day in task.times_of_day: if time_of_day is not None: are_all_timers_expired = False break if are_all_timers_expired: log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index)) del self.schedule_entries[id] self.clients.resource_registry.delete(id) else: log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index)) del self.schedule_entries[id] self.clients.resource_registry.delete(id) return True return False def __is_timer_valid(self, task): # Validate event_origin is set if not task.event_origin: log.error("SchedulerService.__is_timer_valid: event_origin is not set") return False # Validate the timer is set correctly if type(task) == IntervalTimer: if (task.end_time != -1 and (self.__now_posix(self.__now()) >= task.end_time)): log.error("SchedulerService.__is_timer_valid: IntervalTimer is set to incorrect value") return False elif type(task) == TimeOfDayTimer: for time_of_day in task.times_of_day: time_of_day['hour'] = int(time_of_day['hour']) time_of_day['minute'] = int(time_of_day['minute']) time_of_day['second'] = int(time_of_day['second']) if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or (time_of_day['minute'] < 0 or time_of_day['minute'] > 59) or (time_of_day['second'] < 0 or time_of_day['second'] > 61)): log.error("SchedulerService.__is_timer_valid: TimeOfDayTimer is set to incorrect value") return False else: return False return True def _stop_pending_timers(self): """ Safely stops all pending and active timers. For all timers still waiting to run, calls kill on them. For active timers, let them exit naturally and prevent the reschedule by setting the _no_reschedule flag. """ # prevent reschedules self._no_reschedule = True gls = [] for timer_id in self.schedule_entries: spawns = self.__get_spawns(timer_id) for spawn in spawns: gls.append(spawn) # only kill spawns that haven't started yet if spawn._start_event is not None: spawn.kill() log.debug("_stop_pending_timers: timer %s deleted", timer_id) self.schedule_entries.clear() # wait for running gls to finish up gevent.joinall(gls, timeout=10) # allow reschedules from here on out self._no_reschedule = False def on_system_restart(self): ''' On system restart, get timer data from Resource Registry and restore the Scheduler state ''' # Remove all active timers # When this method is called, there should not be any active timers but if it is called from test, this helps # to remove current active timer and restore them from Resource Regstiry self._stop_pending_timers() # Restore the timer from Resource Registry scheduler_entries, _ = self.clients.resource_registry.find_resources(RT.SchedulerEntry, id_only=False) for scheduler_entry in scheduler_entries: self.__schedule(scheduler_entry, scheduler_entry._id) log.debug("SchedulerService:on_system_restart: timer restored: " + scheduler_entry._id) def create_timer(self, scheduler_entry=None): """ Create a timer which will send TimerEvents as requested for a given schedule. The schedule request is expressed through a specific subtype of TimerSchedulerEntry. The task is delivered as a TimeEvent to which processes can subscribe. The creator defines the fields of the task. A GUID-based id prefixed by readable process name is recommended for the origin. Because the delivery of the task is via the ION Exchange there is potential for a small deviation in precision. Returns a timer_id which can be used to cancel the timer. @param timer__schedule TimerSchedulerEntry @retval timer_id str @throws BadRequest if timer is misformed and can not be scheduled """ ##scheduler_entry = scheduler_entry.entry status = self.__is_timer_valid(scheduler_entry.entry) if not status: raise BadRequest id = self.__schedule(scheduler_entry) if not id: raise BadRequest return id def cancel_timer(self, timer_id=''): """ Cancels an existing timer which has not reached its expire time. @param timer_id str @throws NotFound if timer_id doesn't exist """ #try: try: spawns = self.__get_spawns(timer_id) for spawn in spawns: spawn.kill() log.debug("SchedulerService: cancel_timer: id: " + str(timer_id)) self.__delete(id=timer_id, index=None, force=True) except: log.error("SchedulerService: cancel_timer: timer id doesn't exist: " + str(timer_id)) raise BadRequest def create_interval_timer(self, start_time="", interval=0, end_time="", event_origin="", event_subtype=""): if (end_time != -1 and (self.__now_posix(self.__now()) >= end_time)) or not event_origin: log.error("SchedulerService.create_interval_timer: event_origin is not set") raise BadRequest if start_time == "now": start_time = self.__now_posix(self.__now()) log.debug("SchedulerService:create_interval_timer start_time: %s interval: %s end_time: %s event_origin: %s" %(start_time, interval, end_time, event_origin)) interval_timer = IonObject("IntervalTimer", {"start_time": start_time, "interval": interval, "end_time": end_time, "event_origin": event_origin, "event_subtype": event_subtype}) se = IonObject(RT.SchedulerEntry, {"entry": interval_timer}) return self.create_timer(se) def create_time_of_day_timer(self, times_of_day=None, expires='', event_origin='', event_subtype=''): # Validate the timer if not event_origin: log.error("SchedulerService.create_time_of_day_timer: event_origin is set to invalid value") raise BadRequest for time_of_day in times_of_day: time_of_day['hour'] = int(time_of_day['hour']) time_of_day['minute'] = int(time_of_day['minute']) time_of_day['second'] = int(time_of_day['second']) log.debug("SchedulerService:create_time_of_day_timer - hour: %d minute: %d second: %d expires: %d event_origin: %s" %(time_of_day['hour'] , time_of_day['minute'] , time_of_day['second'], time_of_day['second'], event_origin)) if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or (time_of_day['minute'] < 0 or time_of_day['minute'] > 59) or (time_of_day['second'] < 0 or time_of_day['second'] > 61)): log.error("SchedulerService:create_time_of_day_timer: TimeOfDayTimer is set to invalid value") raise BadRequest time_of_day_timer = IonObject("TimeOfDayTimer", {"times_of_day": times_of_day, "expires": expires, "event_origin": event_origin, "event_subtype": event_subtype}) se = IonObject(RT.SchedulerEntry, {"entry": time_of_day_timer}) return self.create_timer(se)
class UploadCalibrationProcessing(ImmediateProcess): """ Upload Calibration Processing Process This process provides the capability to ION clients and operators to process uploaded calibration coefficients to calibrate data products. This parameters that this process accepts as configurations are: - fuc_id: The FileUploadContext identifier, required, stores where the file was written """ def on_start(self): ImmediateProcess.on_start(self) # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions fuc_id = self.CFG.get_safe('process.fuc_id',None) # FileUploadContext ID # Clients self.object_store = self.container.object_store self.resource_registry = self.container.resource_registry self.event_publisher = EventPublisher(OT.ResetQCEvent) self.data_product_management = DataProductManagementServiceProcessClient(process=self) self.create_map() # run process if fuc_id: self.process(fuc_id) # cleanup self.event_publisher.close() def process(self,fuc_id): # get the Object (dict) containing details of the uploaded file fuc = self.object_store.read(fuc_id) if fuc['filetype'] == 'ZIP': raise BadRequest("ZIP format not determined by project scientists yet (2014-04-21)") #self.process_zip(fuc) else: self.process_csv(fuc) def create_map(self): ''' Creates a map from property numbers to datasets ''' self.property_map = {} for instrument_device in self.resource_registry.find_resources(restype=RT.InstrumentDevice)[0]: if instrument_device.ooi_property_number: self.property_map[instrument_device.ooi_property_number] = self.data_products_for_device(instrument_device) def data_products_for_device(self, device): data_products, _ = self.resource_registry.find_objects(device, PRED.hasOutputProduct, id_only=True) return data_products def dataset_for_data_product(self, data_product): datasets, _ = self.resource_registry.find_objects(data_product, PRED.hasDataset, id_only=True) return datasets[0] def do_something_with_the_update(self, updates): for property_no, calibration_update in updates.iteritems(): # Check to see if we even have an instrument with this property number if property_no not in self.property_map: continue # Get the data product listings for this instrument data_products = self.property_map[property_no] # Go through each data product and update the data IF # - There is a set of parameters that match those in the calibration for data_product in data_products: self.update_data_product(data_product, calibration_update) def update_data_product(self, data_product, calibration_update): parameters = [p.name for p in self.data_product_management.get_data_product_parameters(data_product)] dataset_updates = [] for cal_name in calibration_update.iterkeys(): if cal_name in parameters: dataset_id = self.dataset_for_data_product(data_product) dataset_updates.append(dataset_id) for dataset in dataset_updates: self.apply_to_dataset(dataset, calibration_update) def apply_to_dataset(self, dataset, calibration_update): cov = DatasetManagementService._get_coverage(dataset, mode='r+') try: self.set_sparse_values(cov, calibration_update) self.publish_calibration_event(dataset, calibration_update.keys()) finally: cov.close() def set_sparse_values(self, cov, calibration_update): for calibration_name, updates in calibration_update.iteritems(): if calibration_name not in cov.list_parameters(): continue for update in updates: np_dict = {} self.check_units(cov, calibration_name, update['units']) start_date = self.ntp_from_iso(update['start_date']) np_dict[calibration_name] = ConstantOverTime(calibration_name, update['value'], time_start=start_date) cov.set_parameter_values(np_dict) def check_units(self, cov, calibration_name, units): pass def publish_calibration_event(self, dataset, calibrations): publisher = EventPublisher(OT.DatasetCalibrationEvent) publisher.publish_event(origin=dataset, calibrations=calibrations) def ntp_from_iso(self, iso): return TimeUtils.ntp_from_iso(iso) def process_csv(self, fuc): # CSV file open here csv_filename = fuc.get('path', None) if csv_filename is None: raise BadRequest("uploaded file has no path") # keep track of the number of calibrations we actually process nupdates = 0 updates = {} # keys are reference_designators, use to update object store after parsing CSV with open(csv_filename, 'rb') as csvfile: # eliminate blank lines csvfile = (row for row in csvfile if len(row.strip()) > 0) # eliminate commented lines csvfile = (row for row in csvfile if not row.startswith('#')) # open CSV reader csv_reader = csv.reader(csvfile, delimiter=',') # skip commented lines # iterate the rows returned by csv.reader for row in csv_reader: if len(row) != 6: log.warn("invalid calibration line %s" % ','.join(row)) continue try: ipn = row[0] # instrument_property_number name = row[1] # calibration_name value = float(row[2]) # calibration_value units = row[3] description = row[4] # description start_date = row[5] # start_date TODO date object? d = { 'value':value, 'units':units, 'description':description, 'start_date':start_date } except ValueError as e: continue #TODO error message? or just skip? # get ipn key if ipn not in updates: updates[ipn] = {} # initialize empty array if name not in updates[ipn]: updates[ipn][name] = [] # will be array of dicts updates[ipn][name].append(d) nupdates = nupdates + 1 self.do_something_with_the_update(updates) # insert the updates into object store self.update_object_store(updates) # update FileUploadContext object (change status to complete) fuc['status'] = 'UploadCalibrationProcessing process complete - %d updates added to object store' % nupdates self.object_store.update_doc(fuc) # remove uploaded file try: os.remove(csv_filename) except OSError: pass # TODO take action to get this removed def process_zip(self,fuc): pass def update_object_store(self, updates): '''inserts the updates into object store''' for i in updates: # loops the instrument_property_number(ipn) in the updates object try: # if i exists in object_store, read it ipn = self.object_store.read(i) #TODO: what will this be? except: # if does not yet exist in object_store, create it (can't use update_doc because need to set id) ipn = self.object_store.create_doc({'_type':'CALIBRATION'},i) # CAUTION: this returns a tuple, not a dict like read() returns ipn = self.object_store.read(i) # read so we have a dict like we expect # merge all from updates[i] into dict destined for the object_store (ipn) for name in updates[i]: # loops the calibration_names under each IPN in updates #TODO: if name not initialized, will append work? if so, can use same op for both if name not in ipn: # if name doesn't exist, we can just add the entire object (dict of lists) ipn[name] = updates[i][name] else: # if it does, we need to append to each of the lists ipn[name].append(updates[i][name]) # append the list from updates # store updated ipn keyed object in object_store (should overwrite full object, contains all previous too) self.object_store.update_doc(ipn) # publish ResetQCEvent event (one for each instrument_property_number [AKA ipn]) self.event_publisher.publish_event(origin=i)
class SchedulerService(BaseSchedulerService): def __init__(self, *args, **kwargs): BaseSchedulerService.__init__(self, *args, **kwargs) self.schedule_entries = {} self._no_reschedule = False def on_start(self): if CFG.get_safe("process.start_mode") == "RESTART": self.on_system_restart() self.pub = EventPublisher(event_type="ResourceEvent") def on_quit(self): self.pub.close() # throw killswitch on future reschedules self._no_reschedule = True # terminate any pending spawns self._stop_pending_timers() def __notify(self, task, id, index): log.debug("SchedulerService:__notify: - " + task.event_origin + " - Time: " + str(self.__now()) + " - ID: " + id + " -Index:" + str(index)) self.pub.publish_event(origin=task.event_origin) def __now(self): return datetime.utcnow() def __now_posix(self, now): return time.mktime(now.timetuple()) def _expire_callback(self, id, index): task = self.__get_entry(id) self.__notify(task, id, index) if not self.__reschedule(id, index): self.__delete(id, index) def __calculate_next_interval(self, task, current_time): if task.start_time < current_time: next_interval = task.start_time while (next_interval < current_time): next_interval = next_interval + task.interval return (next_interval - current_time) else: return (task.start_time - current_time) + task.interval def __get_expire_time(self, task): now = self.__now() now_posix = self.__now_posix(now) expires_in = [] if type(task) == TimeOfDayTimer: for time_of_day in task.times_of_day: expire_time = datetime(now.year, now.month, now.day, time_of_day['hour'], time_of_day['minute'], time_of_day['second']) expires_in.append(ceil((expire_time - now).total_seconds())) elif type(task) == IntervalTimer and (task.end_time == -1 or ( (now_posix + task.interval) <= task.end_time)): expires_in = [(self.__calculate_next_interval(task, now_posix))] return expires_in def __get_reschedule_expire_time(self, task, index): expires_in = False now = self.__now() now_posix = self.__now_posix(now) if type(task) == TimeOfDayTimer: if task.expires > now_posix: time_of_day = task.times_of_day[index] tomorrow = now + timedelta(days=1) expire_time = datetime(tomorrow.year, tomorrow.month, tomorrow.day, time_of_day['hour'], time_of_day['minute'], time_of_day['second']) expires_in = (ceil((expire_time - now).total_seconds())) else: expires_in = False elif type(task) == IntervalTimer and (task.end_time == -1 or ( (now_posix + task.interval) <= task.end_time)): if task.start_time <= now_posix: expires_in = (task.interval) else: expires_in = ((task.start_time - now_posix) + task.interval) return expires_in def __validate_expire_times(self, expire_times): for index, expire_time in enumerate(expire_times): if expire_time < 0: return False return True def __schedule(self, scheduler_entry, id=False): # if "id" is set, it means scheduler_entry is already in Resource Regsitry. This can occur during a sytsem restart spawns = [] task = scheduler_entry.entry expire_times = self.__get_expire_time(task) if not self.__validate_expire_times(expire_times): log.error( "SchedulerService:__schedule: scheduling: expire time is less than zero: " ) return False if not id: id, _ = self.clients.resource_registry.create(scheduler_entry) self.__create_entry(task, spawns, id) for index, expire_time in enumerate(expire_times): log.debug("SchedulerService:__schedule: scheduling: - " + task.event_origin + " - Now: " + str(self.__now()) + " - Expire: " + str(expire_time) + " - ID: " + id + " - Index:" + str(index)) spawn = gevent.spawn_later(expire_time, self._expire_callback, id, index) spawns.append(spawn) return id def __reschedule(self, id, index): if self._no_reschedule: log.debug( "SchedulerService:__reschedule: process quitting, refusing to reschedule %s", id) return False task = self.__get_entry(id) expire_time = self.__get_reschedule_expire_time(task, index) if expire_time: log.debug("SchedulerService:__reschedule: rescheduling: - " + task.event_origin + " - Now: " + str(self.__now()) + " - Expire: " + str(expire_time) + " - ID: " + id + " -Index:" + str(index)) spawn = gevent.spawn_later(expire_time, self._expire_callback, id, index) self.__update_entry(id=id, index=index, spawn=spawn) return True else: log.debug( "SchedulerService:__reschedule: timer expired. Removed from RR : - " + task.event_origin + " - Now: " + str(self.__now()) + " - Expire: " + str(expire_time) + " - ID: " + id + " -Index:" + str(index)) return False def __create_entry(self, task, spawns, id): self.schedule_entries[id] = {"task": task, "spawns": spawns} def __update_entry(self, id, index, spawn=None, interval=None): if spawn is not None: self.schedule_entries[id]["spawns"][index] = spawn if interval is not None: self.schedule_entries[id]["task"].interval = interval def __get_entry_all(self, id): return self.schedule_entries[id] def __get_spawns(self, id): return self.schedule_entries[id]["spawns"] def __get_entry(self, id): return self.schedule_entries[id]["task"] def __delete(self, id, index, force=False): if id in self.schedule_entries: task = self.__get_entry(id) if force and type(task) == TimeOfDayTimer: log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index)) del self.schedule_entries[id] self.clients.resource_registry.delete(id) elif type(task) == TimeOfDayTimer: task = self.__get_entry(id) task.times_of_day[index] = None # Delete if all the timers are set to none are_all_timers_expired = True for time_of_day in task.times_of_day: if time_of_day is not None: are_all_timers_expired = False break if are_all_timers_expired: log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index)) del self.schedule_entries[id] self.clients.resource_registry.delete(id) else: log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index)) del self.schedule_entries[id] self.clients.resource_registry.delete(id) return True return False def __is_timer_valid(self, task): # Validate event_origin is set if not task.event_origin: log.error( "SchedulerService.__is_timer_valid: event_origin is not set") return False # Validate the timer is set correctly if type(task) == IntervalTimer: if (task.end_time != -1 and (self.__now_posix(self.__now()) >= task.end_time)): log.error( "SchedulerService.__is_timer_valid: IntervalTimer is set to incorrect value" ) return False elif type(task) == TimeOfDayTimer: for time_of_day in task.times_of_day: time_of_day['hour'] = int(time_of_day['hour']) time_of_day['minute'] = int(time_of_day['minute']) time_of_day['second'] = int(time_of_day['second']) if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or (time_of_day['minute'] < 0 or time_of_day['minute'] > 59) or (time_of_day['second'] < 0 or time_of_day['second'] > 61)): log.error( "SchedulerService.__is_timer_valid: TimeOfDayTimer is set to incorrect value" ) return False else: return False return True def _stop_pending_timers(self): """ Safely stops all pending and active timers. For all timers still waiting to run, calls kill on them. For active timers, let them exit naturally and prevent the reschedule by setting the _no_reschedule flag. """ # prevent reschedules self._no_reschedule = True gls = [] for timer_id in self.schedule_entries: spawns = self.__get_spawns(timer_id) for spawn in spawns: gls.append(spawn) # only kill spawns that haven't started yet if spawn._start_event is not None: spawn.kill() log.debug("_stop_pending_timers: timer %s deleted", timer_id) self.schedule_entries.clear() # wait for running gls to finish up gevent.joinall(gls, timeout=10) # allow reschedules from here on out self._no_reschedule = False def on_system_restart(self): ''' On system restart, get timer data from Resource Registry and restore the Scheduler state ''' # Remove all active timers # When this method is called, there should not be any active timers but if it is called from test, this helps # to remove current active timer and restore them from Resource Regstiry self._stop_pending_timers() # Restore the timer from Resource Registry scheduler_entries, _ = self.clients.resource_registry.find_resources( RT.SchedulerEntry, id_only=False) for scheduler_entry in scheduler_entries: self.__schedule(scheduler_entry, scheduler_entry._id) log.debug("SchedulerService:on_system_restart: timer restored: " + scheduler_entry._id) def create_timer(self, scheduler_entry=None): """ Create a timer which will send TimerEvents as requested for a given schedule. The schedule request is expressed through a specific subtype of TimerSchedulerEntry. The task is delivered as a TimeEvent to which processes can subscribe. The creator defines the fields of the task. A GUID-based id prefixed by readable process name is recommended for the origin. Because the delivery of the task is via the ION Exchange there is potential for a small deviation in precision. Returns a timer_id which can be used to cancel the timer. @param timer__schedule TimerSchedulerEntry @retval timer_id str @throws BadRequest if timer is misformed and can not be scheduled """ ##scheduler_entry = scheduler_entry.entry status = self.__is_timer_valid(scheduler_entry.entry) if not status: raise BadRequest id = self.__schedule(scheduler_entry) if not id: raise BadRequest return id def cancel_timer(self, timer_id=''): """ Cancels an existing timer which has not reached its expire time. @param timer_id str @throws NotFound if timer_id doesn't exist """ #try: try: spawns = self.__get_spawns(timer_id) for spawn in spawns: spawn.kill() log.debug("SchedulerService: cancel_timer: id: " + str(timer_id)) self.__delete(id=timer_id, index=None, force=True) except: log.error( "SchedulerService: cancel_timer: timer id doesn't exist: " + str(timer_id)) raise BadRequest def create_interval_timer(self, start_time="", interval=0, end_time="", event_origin="", event_subtype=""): if (end_time != -1 and (self.__now_posix(self.__now()) >= end_time)) or not event_origin: log.error( "SchedulerService.create_interval_timer: event_origin is not set" ) raise BadRequest if start_time == "now": start_time = self.__now_posix(self.__now()) log.debug( "SchedulerService:create_interval_timer start_time: %s interval: %s end_time: %s event_origin: %s" % (start_time, interval, end_time, event_origin)) interval_timer = IonObject( "IntervalTimer", { "start_time": start_time, "interval": interval, "end_time": end_time, "event_origin": event_origin, "event_subtype": event_subtype }) se = IonObject(RT.SchedulerEntry, {"entry": interval_timer}) return self.create_timer(se) def create_time_of_day_timer(self, times_of_day=None, expires='', event_origin='', event_subtype=''): # Validate the timer if not event_origin: log.error( "SchedulerService.create_time_of_day_timer: event_origin is set to invalid value" ) raise BadRequest for time_of_day in times_of_day: time_of_day['hour'] = int(time_of_day['hour']) time_of_day['minute'] = int(time_of_day['minute']) time_of_day['second'] = int(time_of_day['second']) log.debug( "SchedulerService:create_time_of_day_timer - hour: %d minute: %d second: %d expires: %d event_origin: %s" % (time_of_day['hour'], time_of_day['minute'], time_of_day['second'], time_of_day['second'], event_origin)) if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or (time_of_day['minute'] < 0 or time_of_day['minute'] > 59) or (time_of_day['second'] < 0 or time_of_day['second'] > 61)): log.error( "SchedulerService:create_time_of_day_timer: TimeOfDayTimer is set to invalid value" ) raise BadRequest time_of_day_timer = IonObject( "TimeOfDayTimer", { "times_of_day": times_of_day, "expires": expires, "event_origin": event_origin, "event_subtype": event_subtype }) se = IonObject(RT.SchedulerEntry, {"entry": time_of_day_timer}) return self.create_timer(se)
class SystemManagementService(BaseSystemManagementService): """ container management requests are handled by the event listener ion.processes.event.container_manager.ContainerManager which must be running on each container. """ def on_start(self, *a, **b): super(SystemManagementService, self).on_start(*a, **b) self.sender = EventPublisher(process=self) def on_quit(self, *a, **b): self.sender.close() def perform_action(self, predicate, action): userid = None # get from context self.sender.publish_event(event_type=OT.ContainerManagementRequest, origin=userid, predicate=predicate, action=action) def set_log_level(self, logger='', level='', recursive=False): self.perform_action( ALL_CONTAINERS_INSTANCE, IonObject(OT.ChangeLogLevel, logger=logger, level=level, recursive=recursive)) def reset_policy_cache(self, headers=None, timeout=None): """Clears and reloads the policy caches in all of the containers. @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ResetPolicyCache)) def trigger_garbage_collection(self): """Triggers a garbage collection in all containers @throws BadRequest None """ self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerGarbageCollection)) def trigger_container_snapshot(self, snapshot_id='', include_snapshots=None, exclude_snapshots=None, take_at_time='', clear_all=False, persist_snapshot=True, snapshot_kwargs=None): if not snapshot_id: snapshot_id = get_ion_ts() if not snapshot_kwargs: snapshot_kwargs = {} self.perform_action( ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerContainerSnapshot, snapshot_id=snapshot_id, include_snapshots=include_snapshots, exclude_snapshots=exclude_snapshots, take_at_time=take_at_time, clear_all=clear_all, persist_snapshot=persist_snapshot, snapshot_kwargs=snapshot_kwargs)) log.info("Event to trigger container snapshots sent. snapshot_id=%s" % snapshot_id) def start_gevent_block(self, alarm_mode=False): self.perform_action( ALL_CONTAINERS_INSTANCE, IonObject(OT.StartGeventBlock, alarm_mode=alarm_mode)) def stop_gevent_block(self): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.StopGeventBlock)) def prepare_system_shutdown(self, mode=''): self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.PrepareSystemShutdown, mode=mode))
class TransformWorker(TransformStreamListener): CACHE_LIMIT=CFG.get_safe('container.ingestion_cache',5) # Status publishes after a set of granules has been processed STATUS_INTERVAL = 100 def __init__(self, *args,**kwargs): super(TransformWorker, self).__init__(*args, **kwargs) # the set of data processes hosted by this worker self._dataprocesses = {} self._streamid_map = {} self._publisher_map = {} self._transforms = {} def on_start(self): #pragma no cover #super(TransformWorker,self).on_start() #-------------------------------------------------------------------------------- # Explicit on_start #-------------------------------------------------------------------------------- # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created # We want explicit management of the thread and subscriber object for ingestion #todo: check how to manage multi queue subscription (transform scenario 3) TransformStreamProcess.on_start(self) #todo: can the subscription be changed or updated when new dataprocesses are added ? self.queue_name = self.CFG.get_safe('process.queue_name',self.id) self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback) self.thread_lock = RLock() self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self) self.add_endpoint(self._rpc_server) self.start_listener() #todo: determine and publish appropriate set of status events self.event_publisher = EventPublisher(OT.DataProcessStatusEvent) url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' filepath = self.download_egg(url) print filepath import pkg_resources pkg_resources.working_set.add_entry('ion_example-0.1-py2.7.egg') from ion_example.add_arrays import add_arrays def on_quit(self): #pragma no cover self.event_publisher.close() if self.subscriber_thread: self.stop_listener() super(TransformWorker, self).on_quit() def start_listener(self): # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering with self.thread_lock: self.subscriber_thread = self._process.thread_manager.spawn(self.subscriber.listen, thread_name='%s-subscriber' % self.id) def stop_listener(self): # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one) with self.thread_lock: self.subscriber.close() self.subscriber_thread.join(timeout=10) self.subscriber_thread = None @handle_stream_exception() def recv_packet(self, msg, stream_route, stream_id): ''' receive packet for ingestion ''' log.debug('received granule for stream %s', stream_id) if msg == {}: log.error('Received empty message from stream: %s', stream_id) return # Message validation if not isinstance(msg, Granule): log.error('Ingestion received a message that is not a granule: %s', msg) return rdt = RecordDictionaryTool.load_from_granule(msg) if rdt is None: log.error('Invalid granule (no RDT) for stream %s', stream_id) return if not len(rdt): log.debug('Empty granule for stream %s', stream_id) return dp_id_list = self.retrieve_dataprocess_for_stream(stream_id) for dp_id in dp_id_list: function, argument_list = self.retrieve_function_and_define_args(dp_id) args = [] rdt = RecordDictionaryTool.load_from_granule(msg) #create the input arguments list #todo: this logic is tied to the example funcation, generalize for func_param, record_param in argument_list.iteritems(): args.append(rdt[record_param]) try: #run the calc #todo: nothing in the data process resource to specify multi-out map result = function(*args) out_stream_definition, output_parameter = self.retrieve_dp_output_params(dp_id) rdt = RecordDictionaryTool(stream_definition_id=out_stream_definition) publisher = self._publisher_map.get(dp_id,'') rdt[ output_parameter ] = result if publisher: publisher.publish(rdt.to_granule()) else: log.error('Publisher not found for data process %s', dp_id) self.update_dp_metrics( dp_id ) except ImportError: log.error('Error running transform') def retrieve_dataprocess_for_stream(self, stream_id): # if any data procrocesses apply to this stream dp_id_list = [] if stream_id in self._streamid_map: dp_id_list = self._streamid_map[stream_id] else: dp_id_list = self.load_data_process(stream_id) return dp_id_list def retrieve_function_and_define_args(self, dataprocess_id): import importlib argument_list = {} args = [] #load the details of this data process dataprocess_info = self._dataprocesses[dataprocess_id] try: #todo: load once into a 'set' of modules? #load the associated transform function egg = self.download_egg(dataprocess_info.get_safe('uri','')) import pkg_resources pkg_resources.working_set.add_entry(egg) module = importlib.import_module(dataprocess_info.get_safe('module', '') ) function = getattr(module, dataprocess_info.get_safe('function','') ) arguments = dataprocess_info.get_safe('arguments', '') argument_list = dataprocess_info.get_safe('argument_map', {}) except ImportError: log.error('Error running transform') return function, argument_list def retrieve_dp_output_params(self, dataprocess_id): dataprocess_info = self._dataprocesses[dataprocess_id] out_stream_definition = dataprocess_info.get_safe('out_stream_def', '') output_parameter = dataprocess_info.get_safe('output_param','') return out_stream_definition, output_parameter def update_dp_metrics(self, dataprocess_id): #update metrics dataprocess_info = self._dataprocesses[dataprocess_id] dataprocess_info.granule_counter += 1 if dataprocess_info.granule_counter % self.STATUS_INTERVAL == 0: #publish a status update event self.event_publisher.publish_event(origin=dataprocess_id, origin_type='DataProcess', status=DataProcessStatusType.NORMAL, description='data process status update. %s granules processed'% dataprocess_info.granule_counter ) def load_data_process(self, stream_id=""): dpms_client = DataProcessManagementServiceClient() dataprocess_details = dpms_client.read_data_process_for_stream(stream_id) dataprocess_details = DotDict(dataprocess_details or {}) dataprocess_id = dataprocess_details.dataprocess_id #set metrics attributes dataprocess_details.granule_counter = 0 self._dataprocesses[dataprocess_id] = dataprocess_details #add the stream id to the map if 'in_stream_id' in dataprocess_details: if dataprocess_details['in_stream_id'] in self._streamid_map: (self._streamid_map[ dataprocess_details['in_stream_id'] ]).append(dataprocess_id) else: self._streamid_map[ dataprocess_details['in_stream_id'] ] = [dataprocess_id] #todo: add transform worker id self.event_publisher.publish_event(origin=dataprocess_id, origin_type='DataProcess', status=DataProcessStatusType.NORMAL, description='data process loaded into transform worker') #create a publisher for output stream self.create_publisher(dataprocess_id, dataprocess_details) return [dataprocess_id] def create_publisher(self, dataprocess_id, dataprocess_details): #todo: create correct publisher type for the transform type #todo: DataMonitor, Event Monitor get EventPublishers #todo: DataProcess, EventProcess get stream publishers out_stream_route = dataprocess_details.get('out_stream_route', '') out_stream_id = dataprocess_details.get('out_stream_id', '') publisher = StreamPublisher(process=self, stream_id=out_stream_id, stream_route=out_stream_route) self._publisher_map[dataprocess_id] = publisher @classmethod def download_egg(cls, url): ''' Downloads an egg from the URL specified into the cache directory Returns the full path to the egg ''' # Get the filename based on the URL filename = url.split('/')[-1] # Store it in the $TMPDIR egg_cache = gettempdir() path = os.path.join(egg_cache, filename) r = requests.get(url, stream=True) if r.status_code == 200: # Download the file using requests stream with open(path, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() return path raise IOError("Couldn't download the file at %s" % url)
class UploadCalibrationProcessing(ImmediateProcess): """ Upload Calibration Processing Process This process provides the capability to ION clients and operators to process uploaded calibration coefficients to calibrate data products. This parameters that this process accepts as configurations are: - fuc_id: The FileUploadContext identifier, required, stores where the file was written """ def on_start(self): ImmediateProcess.on_start(self) # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions fuc_id = self.CFG.get_safe('process.fuc_id', None) # FileUploadContext ID # Clients self.object_store = self.container.object_store self.resource_registry = self.container.resource_registry self.event_publisher = EventPublisher(OT.ResetQCEvent) self.data_product_management = DataProductManagementServiceProcessClient( process=self) self.create_map() # run process if fuc_id: self.process(fuc_id) # cleanup self.event_publisher.close() def process(self, fuc_id): # get the Object (dict) containing details of the uploaded file fuc = self.object_store.read(fuc_id) if fuc['filetype'] == 'ZIP': raise BadRequest( "ZIP format not determined by project scientists yet (2014-04-21)" ) #self.process_zip(fuc) else: self.process_csv(fuc) def create_map(self): ''' Creates a map from property numbers to datasets ''' self.property_map = {} for instrument_device in self.resource_registry.find_resources( restype=RT.InstrumentDevice)[0]: if instrument_device.ooi_property_number: self.property_map[ instrument_device. ooi_property_number] = self.data_products_for_device( instrument_device) def data_products_for_device(self, device): data_products, _ = self.resource_registry.find_objects( device, PRED.hasOutputProduct, id_only=True) return data_products def dataset_for_data_product(self, data_product): datasets, _ = self.resource_registry.find_objects(data_product, PRED.hasDataset, id_only=True) return datasets[0] def do_something_with_the_update(self, updates): for property_no, calibration_update in updates.iteritems(): # Check to see if we even have an instrument with this property number if property_no not in self.property_map: continue # Get the data product listings for this instrument data_products = self.property_map[property_no] # Go through each data product and update the data IF # - There is a set of parameters that match those in the calibration for data_product in data_products: self.update_data_product(data_product, calibration_update) def update_data_product(self, data_product, calibration_update): parameters = [ p.name for p in self.data_product_management.get_data_product_parameters( data_product) ] dataset_updates = [] for cal_name in calibration_update.iterkeys(): if cal_name in parameters: dataset_id = self.dataset_for_data_product(data_product) dataset_updates.append(dataset_id) for dataset in dataset_updates: self.apply_to_dataset(dataset, calibration_update) def apply_to_dataset(self, dataset, calibration_update): cov = DatasetManagementService._get_coverage(dataset, mode='r+') try: self.set_sparse_values(cov, calibration_update) self.publish_calibration_event(dataset, calibration_update.keys()) finally: cov.close() def set_sparse_values(self, cov, calibration_update): for calibration_name, updates in calibration_update.iteritems(): if calibration_name not in cov.list_parameters(): continue for update in updates: np_dict = {} self.check_units(cov, calibration_name, update['units']) start_date = self.ntp_from_iso(update['start_date']) np_dict[calibration_name] = ConstantOverTime( calibration_name, update['value'], time_start=start_date) cov.set_parameter_values(np_dict) def check_units(self, cov, calibration_name, units): pass def publish_calibration_event(self, dataset, calibrations): publisher = EventPublisher(OT.DatasetCalibrationEvent) publisher.publish_event(origin=dataset, calibrations=calibrations) def ntp_from_iso(self, iso): return TimeUtils.ntp_from_iso(iso) def process_csv(self, fuc): # CSV file open here csv_filename = fuc.get('path', None) if csv_filename is None: raise BadRequest("uploaded file has no path") # keep track of the number of calibrations we actually process nupdates = 0 updates = { } # keys are reference_designators, use to update object store after parsing CSV with open(csv_filename, 'rb') as csvfile: # eliminate blank lines csvfile = (row for row in csvfile if len(row.strip()) > 0) # eliminate commented lines csvfile = (row for row in csvfile if not row.startswith('#')) # open CSV reader csv_reader = csv.reader(csvfile, delimiter=',') # skip commented lines # iterate the rows returned by csv.reader for row in csv_reader: if len(row) != 6: log.warn("invalid calibration line %s" % ','.join(row)) continue try: ipn = row[0] # instrument_property_number name = row[1] # calibration_name value = float(row[2]) # calibration_value units = row[3] description = row[4] # description start_date = row[5] # start_date TODO date object? d = { 'value': value, 'units': units, 'description': description, 'start_date': start_date } except ValueError as e: continue #TODO error message? or just skip? # get ipn key if ipn not in updates: updates[ipn] = {} # initialize empty array if name not in updates[ipn]: updates[ipn][name] = [] # will be array of dicts updates[ipn][name].append(d) nupdates = nupdates + 1 self.do_something_with_the_update(updates) # insert the updates into object store self.update_object_store(updates) # update FileUploadContext object (change status to complete) fuc['status'] = 'UploadCalibrationProcessing process complete - %d updates added to object store' % nupdates self.object_store.update_doc(fuc) # remove uploaded file try: os.remove(csv_filename) except OSError: pass # TODO take action to get this removed def process_zip(self, fuc): pass def update_object_store(self, updates): '''inserts the updates into object store''' for i in updates: # loops the instrument_property_number(ipn) in the updates object try: # if i exists in object_store, read it ipn = self.object_store.read(i) #TODO: what will this be? except: # if does not yet exist in object_store, create it (can't use update_doc because need to set id) ipn = self.object_store.create_doc( {'_type': 'CALIBRATION'}, i ) # CAUTION: this returns a tuple, not a dict like read() returns ipn = self.object_store.read( i) # read so we have a dict like we expect # merge all from updates[i] into dict destined for the object_store (ipn) for name in updates[ i]: # loops the calibration_names under each IPN in updates #TODO: if name not initialized, will append work? if so, can use same op for both if name not in ipn: # if name doesn't exist, we can just add the entire object (dict of lists) ipn[name] = updates[i][name] else: # if it does, we need to append to each of the lists ipn[name].append( updates[i][name]) # append the list from updates # store updated ipn keyed object in object_store (should overwrite full object, contains all previous too) self.object_store.update_doc(ipn) # publish ResetQCEvent event (one for each instrument_property_number [AKA ipn]) self.event_publisher.publish_event(origin=i)
class TransformWorker(TransformStreamListener): CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5) # Status publishes after a set of granules has been processed STATUS_INTERVAL = 100 def __init__(self, *args, **kwargs): super(TransformWorker, self).__init__(*args, **kwargs) # the set of data processes hosted by this worker self._dataprocesses = {} self._streamid_map = {} self._publisher_map = {} self._transforms = {} def on_start(self): #pragma no cover #super(TransformWorker,self).on_start() #-------------------------------------------------------------------------------- # Explicit on_start #-------------------------------------------------------------------------------- # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created # We want explicit management of the thread and subscriber object for ingestion #todo: check how to manage multi queue subscription (transform scenario 3) TransformStreamProcess.on_start(self) #todo: can the subscription be changed or updated when new dataprocesses are added ? self.queue_name = self.CFG.get_safe('process.queue_name', self.id) self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback) self.thread_lock = RLock() self._rpc_server = self.container.proc_manager._create_listening_endpoint( from_name=self.id, process=self) self.add_endpoint(self._rpc_server) self.start_listener() #todo: determine and publish appropriate set of status events self.event_publisher = EventPublisher(OT.DataProcessStatusEvent) def on_quit(self): #pragma no cover self.event_publisher.close() if self.subscriber_thread: self.stop_listener() super(TransformWorker, self).on_quit() def start_listener(self): # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering with self.thread_lock: self.subscriber_thread = self._process.thread_manager.spawn( self.subscriber.listen, thread_name='%s-subscriber' % self.id) def stop_listener(self): # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one) with self.thread_lock: self.subscriber.close() self.subscriber_thread.join(timeout=10) self.subscriber_thread = None @handle_stream_exception() def recv_packet(self, msg, stream_route, stream_id): ''' receive packet for ingestion ''' log.debug('received granule for stream %s', stream_id) if msg == {}: log.error('Received empty message from stream: %s', stream_id) return # Message validation if not isinstance(msg, Granule): log.error('Ingestion received a message that is not a granule: %s', msg) return rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('received granule for stream rdt %s', rdt) if rdt is None: log.error('Invalid granule (no RDT) for stream %s', stream_id) return if not len(rdt): log.debug('Empty granule for stream %s', stream_id) return dp_id_list = self.retrieve_dataprocess_for_stream(stream_id) for dp_id in dp_id_list: function, argument_list, context = self.retrieve_function_and_define_args( stream_id, dp_id) args = [] rdt = RecordDictionaryTool.load_from_granule(msg) #create the input arguments list #todo: this logic is tied to the example function, generalize #todo: how to inject params not in the granule such as stream_id, dp_id, etc? for func_param, record_param in argument_list.iteritems(): args.append(rdt[record_param]) if context: args.append(context) try: #run the calc #todo: nothing in the data process resource to specify multi-out map result = '' try: result = function(*args) log.debug('recv_packet result: %s', result) except: log.error('Error running transform %s with args %s.', dp_id, args, exc_info=True) raise out_stream_definition, output_parameter = self.retrieve_dp_output_params( dp_id) if out_stream_definition and output_parameter: rdt_out = RecordDictionaryTool( stream_definition_id=out_stream_definition) publisher = self._publisher_map.get(dp_id, '') for param in rdt: if param in rdt_out: rdt_out[param] = rdt[param] rdt_out[output_parameter] = result if publisher: log.debug('output rdt: %s', rdt) publisher.publish(rdt_out.to_granule()) else: log.error('Publisher not found for data process %s', dp_id) self.update_dp_metrics(dp_id) except ImportError: log.error('Error running transform') def retrieve_dataprocess_for_stream(self, stream_id): # if any data procrocesses apply to this stream dp_id_list = [] if stream_id in self._streamid_map: dp_id_list = self._streamid_map[stream_id] else: dp_id_list = self.load_data_process(stream_id) return dp_id_list def retrieve_function_and_define_args(self, stream_id, dataprocess_id): import importlib argument_list = {} function = '' context = {} #load the details of this data process dataprocess_info = self._dataprocesses[dataprocess_id] try: #todo: load once into a 'set' of modules? #load the associated transform function egg_uri = dataprocess_info.get_safe('uri', '') if egg_uri: egg = self.download_egg(egg_uri) import pkg_resources pkg_resources.working_set.add_entry(egg) else: log.warning( 'No uri provided for module in data process definition.') module = importlib.import_module( dataprocess_info.get_safe('module', '')) function = getattr(module, dataprocess_info.get_safe('function', '')) arguments = dataprocess_info.get_safe('arguments', '') argument_list = dataprocess_info.get_safe('argument_map', {}) if self.has_context_arg(function, argument_list): context = self.create_context_arg(stream_id, dataprocess_id) except ImportError: log.error('Error running transform') log.debug('retrieve_function_and_define_args argument_list: %s', argument_list) return function, argument_list, context def retrieve_dp_output_params(self, dataprocess_id): dataprocess_info = self._dataprocesses[dataprocess_id] out_stream_definition = dataprocess_info.get_safe('out_stream_def', '') output_parameter = dataprocess_info.get_safe('output_param', '') return out_stream_definition, output_parameter def update_dp_metrics(self, dataprocess_id): #update metrics dataprocess_info = self._dataprocesses[dataprocess_id] dataprocess_info.granule_counter += 1 if dataprocess_info.granule_counter % self.STATUS_INTERVAL == 0: #publish a status update event self.event_publisher.publish_event( origin=dataprocess_id, origin_type='DataProcess', status=DataProcessStatusType.NORMAL, description='data process status update. %s granules processed' % dataprocess_info.granule_counter) def load_data_process(self, stream_id=""): dpms_client = DataProcessManagementServiceClient() dataprocess_details_list = dpms_client.read_data_process_for_stream( stream_id) dataprocess_ids = [] #this returns a list of data process info dicts for dataprocess_details in dataprocess_details_list: dataprocess_details = DotDict(dataprocess_details or {}) dataprocess_id = dataprocess_details.dataprocess_id #set metrics attributes dataprocess_details.granule_counter = 0 self._dataprocesses[dataprocess_id] = dataprocess_details log.debug('load_data_process dataprocess_id: %s', dataprocess_id) log.debug('load_data_process dataprocess_details: %s', dataprocess_details) # validate details # if not outstream info avaialable log a warning but TF may publish an event so proceed if not dataprocess_details.out_stream_def or not dataprocess_details.output_param: log.warning( 'No output stream details provided for data process %s, will not publish a granule', dataprocess_id) #add the stream id to the map if 'in_stream_id' in dataprocess_details: if dataprocess_details['in_stream_id'] in self._streamid_map: (self._streamid_map[dataprocess_details['in_stream_id']] ).append(dataprocess_id) else: self._streamid_map[dataprocess_details['in_stream_id']] = [ dataprocess_id ] #todo: add transform worker id self.event_publisher.publish_event( origin=dataprocess_id, origin_type='DataProcess', status=DataProcessStatusType.NORMAL, description='data process loaded into transform worker') #create a publisher for output stream self.create_publisher(dataprocess_id, dataprocess_details) dataprocess_ids.append(dataprocess_id) return dataprocess_ids def create_publisher(self, dataprocess_id, dataprocess_details): #todo: create correct publisher type for the transform type #todo: DataMonitor, Event Monitor get EventPublishers #todo: DataProcess, EventProcess get stream publishers out_stream_route = dataprocess_details.get('out_stream_route', '') out_stream_id = dataprocess_details.get('out_stream_id', '') publisher = StreamPublisher(process=self, stream_id=out_stream_id, stream_route=out_stream_route) self._publisher_map[dataprocess_id] = publisher @classmethod def download_egg(cls, url): ''' Downloads an egg from the URL specified into the cache directory Returns the full path to the egg ''' # Get the filename based on the URL filename = url.split('/')[-1] # Store it in the $TMPDIR egg_cache = gettempdir() path = os.path.join(egg_cache, filename) r = requests.get(url, stream=True) if r.status_code == 200: # Download the file using requests stream with open(path, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() return path raise IOError("Couldn't download the file at %s" % url) def has_context_arg(self, func, argument_map): import inspect argspec = inspect.getargspec(func) return argspec.args != argument_map and 'context' in argspec.args def create_context_arg(self, stream_id, dataprocess_id): context = DotDict() context.stream_id = stream_id context.dataprocess_id = dataprocess_id return context
class UploadCalibrationProcessing(ImmediateProcess): """ Upload Calibration Processing Process This process provides the capability to ION clients and operators to process uploaded calibration coefficients to calibrate data products. This parameters that this process accepts as configurations are: - fuc_id: The FileUploadContext identifier, required, stores where the file was written """ def on_start(self): ImmediateProcess.on_start(self) # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions fuc_id = self.CFG.get_safe('process.fuc_id',None) # FileUploadContext ID # Clients self.object_store = self.container.object_store self.event_publisher = EventPublisher(OT.ResetQCEvent) # run process self.process(fuc_id) # cleanup self.event_publisher.close() def process(self,fuc_id): # get the Object (dict) containing details of the uploaded file fuc = self.object_store.read(fuc_id) if fuc['filetype'] == 'ZIP': raise BadRequest("ZIP format not determined by project scientists yet (2014-04-21)") #self.process_zip(fuc) else: self.process_csv(fuc) def process_csv(self, fuc): # CSV file open here csv_filename = fuc.get('path', None) if csv_filename is None: raise BadRequest("uploaded file has no path") # keep track of the number of calibrations we actually process nupdates = 0 updates = {} # keys are reference_designators, use to update object store after parsing CSV with open(csv_filename, 'rb') as csvfile: # eliminate blank lines csvfile = (row for row in csvfile if len(row.strip()) > 0) # eliminate commented lines csvfile = (row for row in csvfile if not row.startswith('#')) # open CSV reader csv_reader = csv.reader(csvfile, delimiter=',') # skip commented lines # iterate the rows returned by csv.reader for row in csv_reader: if len(row) != 6: log.warn("invalid calibration line %s" % ','.join(row)) continue try: ipn = row[0] # instrument_property_number name = row[1] # calibration_name value = float(row[2]) # calibration_value units = row[3] description = row[4] # description start_date = row[5] # start_date TODO date object? d = { 'value':value, 'units':units, 'description':description, 'start_date':start_date } except ValueError as e: continue #TODO error message? or just skip? # get ipn key if ipn not in updates: updates[ipn] = {} # initialize empty array if name not in updates[ipn]: updates[ipn][name] = [] # will be array of dicts updates[ipn][name].append(d) nupdates = nupdates + 1 # insert the updates into object store self.update_object_store(updates) # update FileUploadContext object (change status to complete) fuc['status'] = 'UploadCalibrationProcessing process complete - %d updates added to object store' % nupdates self.object_store.update_doc(fuc) # remove uploaded file try: os.remove(csv_filename) except OSError: pass # TODO take action to get this removed def process_zip(self,fuc): pass def update_object_store(self, updates): '''inserts the updates into object store''' for i in updates: # loops the instrument_property_number(ipn) in the updates object try: # if i exists in object_store, read it ipn = self.object_store.read(i) #TODO: what will this be? except: # if does not yet exist in object_store, create it (can't use update_doc because need to set id) ipn = self.object_store.create_doc({'_type':'CALIBRATION'},i) # CAUTION: this returns a tuple, not a dict like read() returns ipn = self.object_store.read(i) # read so we have a dict like we expect # merge all from updates[i] into dict destined for the object_store (ipn) for name in updates[i]: # loops the calibration_names under each IPN in updates #TODO: if name not initialized, will append work? if so, can use same op for both if name not in ipn: # if name doesn't exist, we can just add the entire object (dict of lists) ipn[name] = updates[i][name] else: # if it does, we need to append to each of the lists ipn[name].append(updates[i][name]) # append the list from updates # store updated ipn keyed object in object_store (should overwrite full object, contains all previous too) self.object_store.update_doc(ipn) # publish ResetQCEvent event (one for each instrument_property_number [AKA ipn]) self.event_publisher.publish_event(origin=i)