class SystemManagementService(BaseSystemManagementService):
    """ container management requests are handled by the event listener
        ion.processes.event.container_manager.ContainerManager
        which must be running on each container.
    """
    def on_start(self,*a,**b):
        super(SystemManagementService,self).on_start(*a,**b)
        self.sender = EventPublisher()
    def on_quit(self,*a,**b):
        self.sender.close()
    def perform_action(self, predicate, action):
        userid = None # get from context
        self.sender.publish_event(event_type=OT.ContainerManagementRequest, origin=userid, predicate=predicate, action=action)
    def set_log_level(self, logger='', level='', recursive=False):
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ChangeLogLevel, logger=logger, level=level, recursive=recursive))


    def reset_policy_cache(self, headers=None, timeout=None):
        """Clears and reloads the policy caches in all of the containers.

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ResetPolicyCache))

    def trigger_garbage_collection(self):
        """Triggers a garbage collection in all containers

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerGarbageCollection))
class SystemManagementService(BaseSystemManagementService):
    """ container management requests are handled by the event listener
        ion.processes.event.container_manager.ContainerManager
        which must be running on each container.
    """
    def on_start(self,*a,**b):
        super(SystemManagementService,self).on_start(*a,**b)
        self.sender = EventPublisher()

    def on_quit(self,*a,**b):
        self.sender.close()

    def perform_action(self, predicate, action):
        userid = None # get from context
        self.sender.publish_event(event_type=OT.ContainerManagementRequest, origin=userid, predicate=predicate, action=action)

    def set_log_level(self, logger='', level='', recursive=False):
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ChangeLogLevel, logger=logger, level=level, recursive=recursive))


    def reset_policy_cache(self, headers=None, timeout=None):
        """Clears and reloads the policy caches in all of the containers.

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.ResetPolicyCache))

    def trigger_garbage_collection(self):
        """Triggers a garbage collection in all containers

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerGarbageCollection))

    def trigger_container_snapshot(self, snapshot_id='', include_snapshots=None, exclude_snapshots=None,
                                   take_at_time='', clear_all=False, persist_snapshot=True, snapshot_kwargs=None):

        if not snapshot_id:
            snapshot_id = get_ion_ts()
        if not snapshot_kwargs:
            snapshot_kwargs = {}

        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.TriggerContainerSnapshot,
                                                               snapshot_id=snapshot_id,
                                                               include_snapshots=include_snapshots,
                                                               exclude_snapshots=exclude_snapshots,
                                                               take_at_time=take_at_time,
                                                               clear_all=clear_all,
                                                               persist_snapshot=persist_snapshot,
                                                               snapshot_kwargs=snapshot_kwargs))
        log.info("Event to trigger container snapshots sent. snapshot_id=%s" % snapshot_id)

    def start_gevent_block(self, alarm_mode=False):
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.StartGeventBlock, alarm_mode=alarm_mode))

    def stop_gevent_block(self):
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.StopGeventBlock))

    def prepare_system_shutdown(self, mode=''):
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject(OT.PrepareSystemShutdown, mode=mode))
Example #3
0
class TransformEventPublisher(TransformEventProcess):

    def on_start(self):
        event_type = self.CFG.get_safe('process.event_type', '')

        self.publisher = EventPublisher(event_type=event_type)

    def publish_event(self, *args, **kwargs):
        raise NotImplementedError('Method publish_event not implemented')

    def on_quit(self):
        self.publisher.close()
class SystemManagementService(BaseSystemManagementService):
    """ container management requests are handled by the event listener
        ion.processes.event.container_manager.ContainerManager
        which must be running on each container.
    """
    def on_start(self,*a,**b):
        super(SystemManagementService,self).on_start(*a,**b)
        self.sender = EventPublisher()
    def on_quit(self,*a,**b):
        self.sender.close()
    def perform_action(self, predicate, action):
        userid = None # get from context
        self.sender.publish_event(event_type="ContainerManagementRequest", origin=userid, predicate=predicate, action=action)
    def set_log_level(self, logger='', level='', recursive=False):
        self.perform_action(ALL_CONTAINERS_INSTANCE, IonObject('ChangeLogLevel', logger=logger, level=level, recursive=recursive))
class SystemManagementService(BaseSystemManagementService):
    """ container management requests are handled by the event listener
        ion.processes.event.container_manager.ContainerManager
        which must be running on each container.
    """
    def on_start(self, *a, **b):
        super(SystemManagementService, self).on_start(*a, **b)
        self.sender = EventPublisher()

    def on_quit(self, *a, **b):
        self.sender.close()

    def perform_action(self, predicate, action):
        userid = None  # get from context
        self.sender.publish_event(event_type=OT.ContainerManagementRequest,
                                  origin=userid,
                                  predicate=predicate,
                                  action=action)

    def set_log_level(self, logger='', level='', recursive=False):
        self.perform_action(
            ALL_CONTAINERS_INSTANCE,
            IonObject(OT.ChangeLogLevel,
                      logger=logger,
                      level=level,
                      recursive=recursive))

    def reset_policy_cache(self, headers=None, timeout=None):
        """Clears and reloads the policy caches in all of the containers.

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE,
                            IonObject(OT.ResetPolicyCache))

    def trigger_garbage_collection(self):
        """Triggers a garbage collection in all containers

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE,
                            IonObject(OT.TriggerGarbageCollection))
class SchedulerService(BaseSchedulerService):

    def __init__(self, *args, **kwargs):
        BaseSchedulerService.__init__(self, *args, **kwargs)

        self.schedule_entries = {}
        self._no_reschedule = False

    def on_start(self):
        if CFG.get_safe("process.start_mode") == "RESTART":
            self.on_system_restart()
        self.pub = EventPublisher(event_type="ResourceEvent")

    def on_quit(self):
        self.pub.close()

        # throw killswitch on future reschedules
        self._no_reschedule = True

        # terminate any pending spawns
        self._stop_pending_timers()

    def __notify(self, task, id, index):
        log.debug("SchedulerService:__notify: - " + task.event_origin + " - Time: " + str(self.__now()) + " - ID: " + id + " -Index:" + str(index))
        self.pub.publish_event(origin=task.event_origin)

    def __now(self):
        return datetime.utcnow()

    def __now_posix(self, now):
        return time.mktime(now.timetuple())

    def _expire_callback(self, id, index):
        task = self.__get_entry(id)
        self.__notify(task, id, index)
        if not self.__reschedule(id, index):
            self.__delete(id, index)

    def __calculate_next_interval(self, task, current_time):
        if task.start_time < current_time:
            next_interval = task.start_time
            while (next_interval < current_time):
                next_interval = next_interval + task.interval
            return (next_interval - current_time)
        else:
            return (task.start_time - current_time) + task.interval

    def __get_expire_time(self, task):
        now = self.__now()
        now_posix = self.__now_posix(now)
        expires_in = []
        if type(task) == TimeOfDayTimer:
            for time_of_day in task.times_of_day:
                expire_time = datetime(now.year, now.month, now.day, time_of_day['hour'], time_of_day['minute'], time_of_day['second'])
                expires_in.append(ceil((expire_time - now).total_seconds()))
        elif type(task) == IntervalTimer and (task.end_time == -1 or ((now_posix + task.interval) <= task.end_time)):
            expires_in = [(self.__calculate_next_interval(task, now_posix))]
        return expires_in

    def __get_reschedule_expire_time(self, task, index):
        expires_in = False
        now = self.__now()
        now_posix = self.__now_posix(now)
        if type(task) == TimeOfDayTimer:
            if task.expires > now_posix:
                time_of_day = task.times_of_day[index]
                tomorrow = now + timedelta(days=1)
                expire_time = datetime(tomorrow.year, tomorrow.month, tomorrow.day, time_of_day['hour'], time_of_day['minute'], time_of_day['second'])
                expires_in = (ceil((expire_time - now).total_seconds()))
            else:
                expires_in = False
        elif type(task) == IntervalTimer and (task.end_time == -1 or ((now_posix + task.interval) <= task.end_time)):
            if task.start_time <= now_posix:
                expires_in = (task.interval)
            else:
                expires_in = ((task.start_time - now_posix) + task.interval)

        return expires_in

    def __validate_expire_times(self, expire_times):
        for index, expire_time in enumerate(expire_times):
            if expire_time < 0:
                return False
        return True

    def __schedule(self, scheduler_entry, id=False):
        # if "id" is set, it means scheduler_entry is already in Resource Regsitry. This can occur during a sytsem restart
        spawns = []
        task = scheduler_entry.entry
        expire_times = self.__get_expire_time(task)
        if not self.__validate_expire_times(expire_times):
            log.error("SchedulerService:__schedule: scheduling: expire time is less than zero: ")
            return False

        if not id:
            id, _ = self.clients.resource_registry.create(scheduler_entry)
        self.__create_entry(task, spawns, id)
        for index, expire_time in enumerate(expire_times):
            log.debug("SchedulerService:__schedule: scheduling: - " + task.event_origin + " - Now: " + str(self.__now()) +
                      " - Expire: " + str(expire_time) + " - ID: " + id + " - Index:" + str(index))
            spawn = gevent.spawn_later(expire_time, self._expire_callback, id, index)
            spawns.append(spawn)
        return id

    def __reschedule(self, id, index):
        if self._no_reschedule:
            log.debug("SchedulerService:__reschedule: process quitting, refusing to reschedule %s", id)
            return False

        task = self.__get_entry(id)
        expire_time = self.__get_reschedule_expire_time(task, index)
        if expire_time:
            log.debug("SchedulerService:__reschedule: rescheduling: - " + task.event_origin + " - Now: " + str(self.__now()) +
                      " - Expire: " + str(expire_time) + " - ID: " + id + " -Index:" + str(index))
            spawn = gevent.spawn_later(expire_time, self._expire_callback, id, index)
            self.__update_entry(id=id, index=index, spawn=spawn)

            return True
        else:
            log.debug("SchedulerService:__reschedule: timer expired. Removed from RR  : - " + task.event_origin + " - Now: " + str(self.__now()) +
                      " - Expire: " + str(expire_time) + " - ID: " + id + " -Index:" + str(index))
        return False

    def __create_entry(self, task, spawns, id):
        self.schedule_entries[id] = {"task": task, "spawns": spawns}

    def __update_entry(self, id, index, spawn=None, interval=None):
        if spawn is not None:
            self.schedule_entries[id]["spawns"][index] = spawn
        if interval is not None:
            self.schedule_entries[id]["task"].interval = interval

    def __get_entry_all(self, id):
        return self.schedule_entries[id]

    def __get_spawns(self, id):
        return self.schedule_entries[id]["spawns"]

    def __get_entry(self, id):
        return self.schedule_entries[id]["task"]

    def __delete(self, id, index, force=False):
        if id in self.schedule_entries:
            task = self.__get_entry(id)
            if force and type(task) == TimeOfDayTimer:
                log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index))
                del self.schedule_entries[id]
                self.clients.resource_registry.delete(id)
            elif type(task) == TimeOfDayTimer:
                task = self.__get_entry(id)
                task.times_of_day[index] = None
                # Delete if all the timers are set to none
                are_all_timers_expired = True
                for time_of_day in task.times_of_day:
                    if time_of_day is not None:
                        are_all_timers_expired = False
                        break
                if are_all_timers_expired:
                    log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index))
                    del self.schedule_entries[id]
                    self.clients.resource_registry.delete(id)
            else:
                log.debug("SchedulerService:__delete: entry deleted " + id + " -Index:" + str(index))
                del self.schedule_entries[id]
                self.clients.resource_registry.delete(id)
            return True
        return False

    def __is_timer_valid(self, task):
        # Validate event_origin is set
        if not task.event_origin:
            log.error("SchedulerService.__is_timer_valid: event_origin is not set")
            return False
            # Validate the timer is set correctly
        if type(task) == IntervalTimer:
            if (task.end_time != -1 and (self.__now_posix(self.__now()) >= task.end_time)):
                log.error("SchedulerService.__is_timer_valid: IntervalTimer is set to incorrect value")
                return False
        elif type(task) == TimeOfDayTimer:
            for time_of_day in task.times_of_day:
                time_of_day['hour'] = int(time_of_day['hour'])
                time_of_day['minute'] = int(time_of_day['minute'])
                time_of_day['second'] = int(time_of_day['second'])
                if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or
                    (time_of_day['minute'] < 0 or time_of_day['minute'] > 59) or
                    (time_of_day['second'] < 0 or time_of_day['second'] > 61)):
                    log.error("SchedulerService.__is_timer_valid: TimeOfDayTimer is set to incorrect value")
                    return False
        else:
            return False

        return True

    def _stop_pending_timers(self):
        """
        Safely stops all pending and active timers.

        For all timers still waiting to run, calls kill on them. For active timers, let
        them exit naturally and prevent the reschedule by setting the _no_reschedule flag.
        """
        # prevent reschedules
        self._no_reschedule = True

        gls = []
        for timer_id in self.schedule_entries:
            spawns = self.__get_spawns(timer_id)

            for spawn in spawns:
                gls.append(spawn)
                # only kill spawns that haven't started yet
                if spawn._start_event is not None:
                    spawn.kill()

            log.debug("_stop_pending_timers: timer %s deleted", timer_id)

        self.schedule_entries.clear()

        # wait for running gls to finish up
        gevent.joinall(gls, timeout=10)

        # allow reschedules from here on out
        self._no_reschedule = False

    def on_system_restart(self):
        '''
        On system restart, get timer data from Resource Registry and restore the Scheduler state
        '''
        # Remove all active timers
        # When this method is called, there should not be any active timers but if it is called from test, this helps
        # to remove current active timer and restore them from Resource Regstiry
        self._stop_pending_timers()

        # Restore the timer from Resource Registry
        scheduler_entries, _ = self.clients.resource_registry.find_resources(RT.SchedulerEntry, id_only=False)
        for scheduler_entry in scheduler_entries:
            self.__schedule(scheduler_entry, scheduler_entry._id)
            log.debug("SchedulerService:on_system_restart: timer restored: " + scheduler_entry._id)

    def create_timer(self, scheduler_entry=None):
        """
        Create a timer which will send TimerEvents as requested for a given schedule.
        The schedule request is expressed through a specific subtype of TimerSchedulerEntry.
        The task is delivered as a TimeEvent to which processes can subscribe. The creator
        defines the fields of the task. A GUID-based id prefixed by readable process name
        is recommended for the origin. Because the delivery of the task is via the ION Exchange
        there is potential for a small deviation in precision.
        Returns a timer_id which can be used to cancel the timer.

        @param timer__schedule    TimerSchedulerEntry
        @retval timer_id    str
        @throws BadRequest    if timer is misformed and can not be scheduled
        """
        ##scheduler_entry = scheduler_entry.entry
        status = self.__is_timer_valid(scheduler_entry.entry)
        if not status:
            raise BadRequest
        id = self.__schedule(scheduler_entry)
        if not id:
            raise BadRequest
        return id

    def cancel_timer(self, timer_id=''):
        """
        Cancels an existing timer which has not reached its expire time.

        @param timer_id    str
        @throws NotFound    if timer_id doesn't exist
        """
        #try:
        try:
            spawns = self.__get_spawns(timer_id)
            for spawn in spawns:
                spawn.kill()
            log.debug("SchedulerService: cancel_timer: id: " + str(timer_id))
            self.__delete(id=timer_id, index=None, force=True)
        except:
            log.error("SchedulerService: cancel_timer: timer id doesn't exist: " + str(timer_id))
            raise BadRequest

    def create_interval_timer(self, start_time="", interval=0, end_time="", event_origin="", event_subtype=""):
        if (end_time != -1 and (self.__now_posix(self.__now()) >= end_time)) or not event_origin:
            log.error("SchedulerService.create_interval_timer: event_origin is not set")
            raise BadRequest
        if start_time == "now":
            start_time = self.__now_posix(self.__now())
        log.debug("SchedulerService:create_interval_timer start_time: %s interval: %s end_time: %s event_origin: %s" %(start_time, interval, end_time, event_origin))
        interval_timer = IonObject("IntervalTimer", {"start_time": start_time, "interval": interval, "end_time": end_time,
                                                     "event_origin": event_origin, "event_subtype": event_subtype})
        se = IonObject(RT.SchedulerEntry, {"entry": interval_timer})
        return self.create_timer(se)

    def create_time_of_day_timer(self, times_of_day=None, expires='', event_origin='', event_subtype=''):
        # Validate the timer
        if not event_origin:
            log.error("SchedulerService.create_time_of_day_timer: event_origin is set to invalid value")
            raise BadRequest
        for time_of_day in times_of_day:
            time_of_day['hour'] = int(time_of_day['hour'])
            time_of_day['minute'] = int(time_of_day['minute'])
            time_of_day['second'] = int(time_of_day['second'])
            log.debug("SchedulerService:create_time_of_day_timer - hour: %d minute: %d second: %d expires: %d event_origin: %s" %(time_of_day['hour'] , time_of_day['minute'] , time_of_day['second'], time_of_day['second'], event_origin))
            if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or
                (time_of_day['minute'] < 0 or time_of_day['minute'] > 59) or
                (time_of_day['second'] < 0 or time_of_day['second'] > 61)):
                log.error("SchedulerService:create_time_of_day_timer: TimeOfDayTimer is set to invalid value")
                raise BadRequest

        time_of_day_timer = IonObject("TimeOfDayTimer", {"times_of_day": times_of_day, "expires": expires,
                                                         "event_origin": event_origin, "event_subtype": event_subtype})

        se = IonObject(RT.SchedulerEntry, {"entry": time_of_day_timer})
        return self.create_timer(se)
class UploadCalibrationProcessing(ImmediateProcess):
    """
    Upload Calibration Processing Process

    This process provides the capability to ION clients and operators to process uploaded calibration
    coefficients to calibrate data products.

    This parameters that this process accepts as configurations are:
        - fuc_id: The FileUploadContext identifier, required, stores where the file was written
    """

    def on_start(self):

        ImmediateProcess.on_start(self)

        # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions
        fuc_id = self.CFG.get_safe('process.fuc_id',None) # FileUploadContext ID

        # Clients
        self.object_store = self.container.object_store
        self.resource_registry = self.container.resource_registry
        self.event_publisher = EventPublisher(OT.ResetQCEvent)
        self.data_product_management = DataProductManagementServiceProcessClient(process=self)
        self.create_map()

        # run process
        if fuc_id:
            self.process(fuc_id)

        # cleanup
        self.event_publisher.close()



    def process(self,fuc_id):

        # get the Object (dict) containing details of the uploaded file
        fuc = self.object_store.read(fuc_id)

        if fuc['filetype'] == 'ZIP':
            raise BadRequest("ZIP format not determined by project scientists yet (2014-04-21)")
            #self.process_zip(fuc)
        else:
            self.process_csv(fuc)

    def create_map(self):
        '''
        Creates a map from property numbers to datasets
        '''
        self.property_map = {}

        for instrument_device in self.resource_registry.find_resources(restype=RT.InstrumentDevice)[0]:
            if instrument_device.ooi_property_number:
                self.property_map[instrument_device.ooi_property_number] = self.data_products_for_device(instrument_device)

    def data_products_for_device(self, device):
        data_products, _ = self.resource_registry.find_objects(device, PRED.hasOutputProduct, id_only=True)
        return data_products

    def dataset_for_data_product(self, data_product):
        datasets, _ = self.resource_registry.find_objects(data_product, PRED.hasDataset, id_only=True)
        return datasets[0]

    def do_something_with_the_update(self, updates):
        for property_no, calibration_update in updates.iteritems():
            # Check to see if we even have an instrument with this property number
            if property_no not in self.property_map:
                continue

            # Get the data product listings for this instrument
            data_products = self.property_map[property_no]
            # Go through each data product and update the data IF
            #  - There is a set of parameters that match those in the calibration

            for data_product in data_products:
                self.update_data_product(data_product, calibration_update)

    def update_data_product(self, data_product, calibration_update):
        parameters = [p.name for p in self.data_product_management.get_data_product_parameters(data_product)]

        dataset_updates = []
        for cal_name in calibration_update.iterkeys():
            if cal_name in parameters:
                dataset_id = self.dataset_for_data_product(data_product)
                dataset_updates.append(dataset_id)


        for dataset in dataset_updates:
            self.apply_to_dataset(dataset, calibration_update)

    def apply_to_dataset(self, dataset, calibration_update):
        cov = DatasetManagementService._get_coverage(dataset, mode='r+')
        try:
            self.set_sparse_values(cov, calibration_update)
            self.publish_calibration_event(dataset, calibration_update.keys())

        finally:
            cov.close()

    def set_sparse_values(self, cov, calibration_update):
        for calibration_name, updates in calibration_update.iteritems():
            if calibration_name not in cov.list_parameters():
                continue

            for update in updates:
                np_dict = {}
                self.check_units(cov, calibration_name, update['units'])
                start_date = self.ntp_from_iso(update['start_date'])
                np_dict[calibration_name] = ConstantOverTime(calibration_name, update['value'], time_start=start_date)

                cov.set_parameter_values(np_dict)


    def check_units(self, cov, calibration_name, units):
        pass

    def publish_calibration_event(self, dataset, calibrations):
        publisher = EventPublisher(OT.DatasetCalibrationEvent)
        publisher.publish_event(origin=dataset, calibrations=calibrations)

    def ntp_from_iso(self, iso):
        return TimeUtils.ntp_from_iso(iso)

    def process_csv(self, fuc):

        # CSV file open here
        csv_filename = fuc.get('path', None)
        if csv_filename is None:
            raise BadRequest("uploaded file has no path")

        # keep track of the number of calibrations we actually process
        nupdates = 0

        updates = {} # keys are reference_designators, use to update object store after parsing CSV

        with open(csv_filename, 'rb') as csvfile:
            # eliminate blank lines
            csvfile = (row for row in csvfile if len(row.strip()) > 0)
            # eliminate commented lines
            csvfile = (row for row in csvfile if not row.startswith('#'))
            # open CSV reader
            csv_reader = csv.reader(csvfile, delimiter=',') # skip commented lines
            # iterate the rows returned by csv.reader
            for row in csv_reader:
                if len(row) != 6:
                    log.warn("invalid calibration line %s" % ','.join(row))
                    continue
                try:
                    ipn = row[0] # instrument_property_number
                    name = row[1] # calibration_name
                    value = float(row[2]) # calibration_value
                    units = row[3]
                    description = row[4] # description
                    start_date = row[5] # start_date TODO date object?
                    d = {
                        'value':value,
                        'units':units,
                        'description':description,
                        'start_date':start_date
                    }
                except ValueError as e:
                    continue #TODO error message? or just skip?
                # get ipn key
                if ipn not in updates:
                    updates[ipn] = {} # initialize empty array
                if name not in updates[ipn]:
                    updates[ipn][name] = [] # will be array of dicts
                updates[ipn][name].append(d)
                
                nupdates = nupdates + 1

        self.do_something_with_the_update(updates)
        # insert the updates into object store
        self.update_object_store(updates)

        # update FileUploadContext object (change status to complete)
        fuc['status'] = 'UploadCalibrationProcessing process complete - %d updates added to object store' % nupdates
        self.object_store.update_doc(fuc)

        # remove uploaded file
        try:
            os.remove(csv_filename)
        except OSError:
            pass # TODO take action to get this removed
    
    def process_zip(self,fuc):
        pass

    def update_object_store(self, updates):
        '''inserts the updates into object store'''
        for i in updates: # loops the instrument_property_number(ipn) in the updates object
            try: # if i exists in object_store, read it                           
                ipn = self.object_store.read(i) #TODO: what will this be?
            except: # if does not yet exist in object_store, create it (can't use update_doc because need to set id)
                ipn = self.object_store.create_doc({'_type':'CALIBRATION'},i) # CAUTION: this returns a tuple, not a dict like read() returns
                ipn = self.object_store.read(i) # read so we have a dict like we expect
            # merge all from updates[i] into dict destined for the object_store (ipn)
            for name in updates[i]: # loops the calibration_names under each IPN in updates
                #TODO: if name not initialized, will append work? if so, can use same op for both
                if name not in ipn: # if name doesn't exist, we can just add the entire object (dict of lists)
                    ipn[name] = updates[i][name]
                else: # if it does, we need to append to each of the lists
                    ipn[name].append(updates[i][name]) # append the list from updates
            # store updated ipn keyed object in object_store (should overwrite full object, contains all previous too)
            self.object_store.update_doc(ipn)
            # publish ResetQCEvent event (one for each instrument_property_number [AKA ipn])
            self.event_publisher.publish_event(origin=i)
class SchedulerService(BaseSchedulerService):
    def __init__(self, *args, **kwargs):
        BaseSchedulerService.__init__(self, *args, **kwargs)

        self.schedule_entries = {}
        self._no_reschedule = False

    def on_start(self):
        if CFG.get_safe("process.start_mode") == "RESTART":
            self.on_system_restart()
        self.pub = EventPublisher(event_type="ResourceEvent")

    def on_quit(self):
        self.pub.close()

        # throw killswitch on future reschedules
        self._no_reschedule = True

        # terminate any pending spawns
        self._stop_pending_timers()

    def __notify(self, task, id, index):
        log.debug("SchedulerService:__notify: - " + task.event_origin +
                  " - Time: " + str(self.__now()) + " - ID: " + id +
                  " -Index:" + str(index))
        self.pub.publish_event(origin=task.event_origin)

    def __now(self):
        return datetime.utcnow()

    def __now_posix(self, now):
        return time.mktime(now.timetuple())

    def _expire_callback(self, id, index):
        task = self.__get_entry(id)
        self.__notify(task, id, index)
        if not self.__reschedule(id, index):
            self.__delete(id, index)

    def __calculate_next_interval(self, task, current_time):
        if task.start_time < current_time:
            next_interval = task.start_time
            while (next_interval < current_time):
                next_interval = next_interval + task.interval
            return (next_interval - current_time)
        else:
            return (task.start_time - current_time) + task.interval

    def __get_expire_time(self, task):
        now = self.__now()
        now_posix = self.__now_posix(now)
        expires_in = []
        if type(task) == TimeOfDayTimer:
            for time_of_day in task.times_of_day:
                expire_time = datetime(now.year, now.month, now.day,
                                       time_of_day['hour'],
                                       time_of_day['minute'],
                                       time_of_day['second'])
                expires_in.append(ceil((expire_time - now).total_seconds()))
        elif type(task) == IntervalTimer and (task.end_time == -1 or (
            (now_posix + task.interval) <= task.end_time)):
            expires_in = [(self.__calculate_next_interval(task, now_posix))]
        return expires_in

    def __get_reschedule_expire_time(self, task, index):
        expires_in = False
        now = self.__now()
        now_posix = self.__now_posix(now)
        if type(task) == TimeOfDayTimer:
            if task.expires > now_posix:
                time_of_day = task.times_of_day[index]
                tomorrow = now + timedelta(days=1)
                expire_time = datetime(tomorrow.year, tomorrow.month,
                                       tomorrow.day, time_of_day['hour'],
                                       time_of_day['minute'],
                                       time_of_day['second'])
                expires_in = (ceil((expire_time - now).total_seconds()))
            else:
                expires_in = False
        elif type(task) == IntervalTimer and (task.end_time == -1 or (
            (now_posix + task.interval) <= task.end_time)):
            if task.start_time <= now_posix:
                expires_in = (task.interval)
            else:
                expires_in = ((task.start_time - now_posix) + task.interval)

        return expires_in

    def __validate_expire_times(self, expire_times):
        for index, expire_time in enumerate(expire_times):
            if expire_time < 0:
                return False
        return True

    def __schedule(self, scheduler_entry, id=False):
        # if "id" is set, it means scheduler_entry is already in Resource Regsitry. This can occur during a sytsem restart
        spawns = []
        task = scheduler_entry.entry
        expire_times = self.__get_expire_time(task)
        if not self.__validate_expire_times(expire_times):
            log.error(
                "SchedulerService:__schedule: scheduling: expire time is less than zero: "
            )
            return False

        if not id:
            id, _ = self.clients.resource_registry.create(scheduler_entry)
        self.__create_entry(task, spawns, id)
        for index, expire_time in enumerate(expire_times):
            log.debug("SchedulerService:__schedule: scheduling: - " +
                      task.event_origin + " - Now: " + str(self.__now()) +
                      " - Expire: " + str(expire_time) + " - ID: " + id +
                      " - Index:" + str(index))
            spawn = gevent.spawn_later(expire_time, self._expire_callback, id,
                                       index)
            spawns.append(spawn)
        return id

    def __reschedule(self, id, index):
        if self._no_reschedule:
            log.debug(
                "SchedulerService:__reschedule: process quitting, refusing to reschedule %s",
                id)
            return False

        task = self.__get_entry(id)
        expire_time = self.__get_reschedule_expire_time(task, index)
        if expire_time:
            log.debug("SchedulerService:__reschedule: rescheduling: - " +
                      task.event_origin + " - Now: " + str(self.__now()) +
                      " - Expire: " + str(expire_time) + " - ID: " + id +
                      " -Index:" + str(index))
            spawn = gevent.spawn_later(expire_time, self._expire_callback, id,
                                       index)
            self.__update_entry(id=id, index=index, spawn=spawn)

            return True
        else:
            log.debug(
                "SchedulerService:__reschedule: timer expired. Removed from RR  : - "
                + task.event_origin + " - Now: " + str(self.__now()) +
                " - Expire: " + str(expire_time) + " - ID: " + id +
                " -Index:" + str(index))
        return False

    def __create_entry(self, task, spawns, id):
        self.schedule_entries[id] = {"task": task, "spawns": spawns}

    def __update_entry(self, id, index, spawn=None, interval=None):
        if spawn is not None:
            self.schedule_entries[id]["spawns"][index] = spawn
        if interval is not None:
            self.schedule_entries[id]["task"].interval = interval

    def __get_entry_all(self, id):
        return self.schedule_entries[id]

    def __get_spawns(self, id):
        return self.schedule_entries[id]["spawns"]

    def __get_entry(self, id):
        return self.schedule_entries[id]["task"]

    def __delete(self, id, index, force=False):
        if id in self.schedule_entries:
            task = self.__get_entry(id)
            if force and type(task) == TimeOfDayTimer:
                log.debug("SchedulerService:__delete: entry deleted " + id +
                          " -Index:" + str(index))
                del self.schedule_entries[id]
                self.clients.resource_registry.delete(id)
            elif type(task) == TimeOfDayTimer:
                task = self.__get_entry(id)
                task.times_of_day[index] = None
                # Delete if all the timers are set to none
                are_all_timers_expired = True
                for time_of_day in task.times_of_day:
                    if time_of_day is not None:
                        are_all_timers_expired = False
                        break
                if are_all_timers_expired:
                    log.debug("SchedulerService:__delete: entry deleted " +
                              id + " -Index:" + str(index))
                    del self.schedule_entries[id]
                    self.clients.resource_registry.delete(id)
            else:
                log.debug("SchedulerService:__delete: entry deleted " + id +
                          " -Index:" + str(index))
                del self.schedule_entries[id]
                self.clients.resource_registry.delete(id)
            return True
        return False

    def __is_timer_valid(self, task):
        # Validate event_origin is set
        if not task.event_origin:
            log.error(
                "SchedulerService.__is_timer_valid: event_origin is not set")
            return False
            # Validate the timer is set correctly
        if type(task) == IntervalTimer:
            if (task.end_time != -1
                    and (self.__now_posix(self.__now()) >= task.end_time)):
                log.error(
                    "SchedulerService.__is_timer_valid: IntervalTimer is set to incorrect value"
                )
                return False
        elif type(task) == TimeOfDayTimer:
            for time_of_day in task.times_of_day:
                time_of_day['hour'] = int(time_of_day['hour'])
                time_of_day['minute'] = int(time_of_day['minute'])
                time_of_day['second'] = int(time_of_day['second'])
                if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or
                    (time_of_day['minute'] < 0 or time_of_day['minute'] > 59)
                        or
                    (time_of_day['second'] < 0 or time_of_day['second'] > 61)):
                    log.error(
                        "SchedulerService.__is_timer_valid: TimeOfDayTimer is set to incorrect value"
                    )
                    return False
        else:
            return False

        return True

    def _stop_pending_timers(self):
        """
        Safely stops all pending and active timers.

        For all timers still waiting to run, calls kill on them. For active timers, let
        them exit naturally and prevent the reschedule by setting the _no_reschedule flag.
        """
        # prevent reschedules
        self._no_reschedule = True

        gls = []
        for timer_id in self.schedule_entries:
            spawns = self.__get_spawns(timer_id)

            for spawn in spawns:
                gls.append(spawn)
                # only kill spawns that haven't started yet
                if spawn._start_event is not None:
                    spawn.kill()

            log.debug("_stop_pending_timers: timer %s deleted", timer_id)

        self.schedule_entries.clear()

        # wait for running gls to finish up
        gevent.joinall(gls, timeout=10)

        # allow reschedules from here on out
        self._no_reschedule = False

    def on_system_restart(self):
        '''
        On system restart, get timer data from Resource Registry and restore the Scheduler state
        '''
        # Remove all active timers
        # When this method is called, there should not be any active timers but if it is called from test, this helps
        # to remove current active timer and restore them from Resource Regstiry
        self._stop_pending_timers()

        # Restore the timer from Resource Registry
        scheduler_entries, _ = self.clients.resource_registry.find_resources(
            RT.SchedulerEntry, id_only=False)
        for scheduler_entry in scheduler_entries:
            self.__schedule(scheduler_entry, scheduler_entry._id)
            log.debug("SchedulerService:on_system_restart: timer restored: " +
                      scheduler_entry._id)

    def create_timer(self, scheduler_entry=None):
        """
        Create a timer which will send TimerEvents as requested for a given schedule.
        The schedule request is expressed through a specific subtype of TimerSchedulerEntry.
        The task is delivered as a TimeEvent to which processes can subscribe. The creator
        defines the fields of the task. A GUID-based id prefixed by readable process name
        is recommended for the origin. Because the delivery of the task is via the ION Exchange
        there is potential for a small deviation in precision.
        Returns a timer_id which can be used to cancel the timer.

        @param timer__schedule    TimerSchedulerEntry
        @retval timer_id    str
        @throws BadRequest    if timer is misformed and can not be scheduled
        """
        ##scheduler_entry = scheduler_entry.entry
        status = self.__is_timer_valid(scheduler_entry.entry)
        if not status:
            raise BadRequest
        id = self.__schedule(scheduler_entry)
        if not id:
            raise BadRequest
        return id

    def cancel_timer(self, timer_id=''):
        """
        Cancels an existing timer which has not reached its expire time.

        @param timer_id    str
        @throws NotFound    if timer_id doesn't exist
        """
        #try:
        try:
            spawns = self.__get_spawns(timer_id)
            for spawn in spawns:
                spawn.kill()
            log.debug("SchedulerService: cancel_timer: id: " + str(timer_id))
            self.__delete(id=timer_id, index=None, force=True)
        except:
            log.error(
                "SchedulerService: cancel_timer: timer id doesn't exist: " +
                str(timer_id))
            raise BadRequest

    def create_interval_timer(self,
                              start_time="",
                              interval=0,
                              end_time="",
                              event_origin="",
                              event_subtype=""):
        if (end_time != -1 and
            (self.__now_posix(self.__now()) >= end_time)) or not event_origin:
            log.error(
                "SchedulerService.create_interval_timer: event_origin is not set"
            )
            raise BadRequest
        if start_time == "now":
            start_time = self.__now_posix(self.__now())
        log.debug(
            "SchedulerService:create_interval_timer start_time: %s interval: %s end_time: %s event_origin: %s"
            % (start_time, interval, end_time, event_origin))
        interval_timer = IonObject(
            "IntervalTimer", {
                "start_time": start_time,
                "interval": interval,
                "end_time": end_time,
                "event_origin": event_origin,
                "event_subtype": event_subtype
            })
        se = IonObject(RT.SchedulerEntry, {"entry": interval_timer})
        return self.create_timer(se)

    def create_time_of_day_timer(self,
                                 times_of_day=None,
                                 expires='',
                                 event_origin='',
                                 event_subtype=''):
        # Validate the timer
        if not event_origin:
            log.error(
                "SchedulerService.create_time_of_day_timer: event_origin is set to invalid value"
            )
            raise BadRequest
        for time_of_day in times_of_day:
            time_of_day['hour'] = int(time_of_day['hour'])
            time_of_day['minute'] = int(time_of_day['minute'])
            time_of_day['second'] = int(time_of_day['second'])
            log.debug(
                "SchedulerService:create_time_of_day_timer - hour: %d minute: %d second: %d expires: %d event_origin: %s"
                % (time_of_day['hour'], time_of_day['minute'],
                   time_of_day['second'], time_of_day['second'], event_origin))
            if ((time_of_day['hour'] < 0 or time_of_day['hour'] > 23) or
                (time_of_day['minute'] < 0 or time_of_day['minute'] > 59) or
                (time_of_day['second'] < 0 or time_of_day['second'] > 61)):
                log.error(
                    "SchedulerService:create_time_of_day_timer: TimeOfDayTimer is set to invalid value"
                )
                raise BadRequest

        time_of_day_timer = IonObject(
            "TimeOfDayTimer", {
                "times_of_day": times_of_day,
                "expires": expires,
                "event_origin": event_origin,
                "event_subtype": event_subtype
            })

        se = IonObject(RT.SchedulerEntry, {"entry": time_of_day_timer})
        return self.create_timer(se)
class SystemManagementService(BaseSystemManagementService):
    """ container management requests are handled by the event listener
        ion.processes.event.container_manager.ContainerManager
        which must be running on each container.
    """
    def on_start(self, *a, **b):
        super(SystemManagementService, self).on_start(*a, **b)
        self.sender = EventPublisher(process=self)

    def on_quit(self, *a, **b):
        self.sender.close()

    def perform_action(self, predicate, action):
        userid = None  # get from context
        self.sender.publish_event(event_type=OT.ContainerManagementRequest,
                                  origin=userid,
                                  predicate=predicate,
                                  action=action)

    def set_log_level(self, logger='', level='', recursive=False):
        self.perform_action(
            ALL_CONTAINERS_INSTANCE,
            IonObject(OT.ChangeLogLevel,
                      logger=logger,
                      level=level,
                      recursive=recursive))

    def reset_policy_cache(self, headers=None, timeout=None):
        """Clears and reloads the policy caches in all of the containers.

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE,
                            IonObject(OT.ResetPolicyCache))

    def trigger_garbage_collection(self):
        """Triggers a garbage collection in all containers

        @throws BadRequest    None
        """
        self.perform_action(ALL_CONTAINERS_INSTANCE,
                            IonObject(OT.TriggerGarbageCollection))

    def trigger_container_snapshot(self,
                                   snapshot_id='',
                                   include_snapshots=None,
                                   exclude_snapshots=None,
                                   take_at_time='',
                                   clear_all=False,
                                   persist_snapshot=True,
                                   snapshot_kwargs=None):

        if not snapshot_id:
            snapshot_id = get_ion_ts()
        if not snapshot_kwargs:
            snapshot_kwargs = {}

        self.perform_action(
            ALL_CONTAINERS_INSTANCE,
            IonObject(OT.TriggerContainerSnapshot,
                      snapshot_id=snapshot_id,
                      include_snapshots=include_snapshots,
                      exclude_snapshots=exclude_snapshots,
                      take_at_time=take_at_time,
                      clear_all=clear_all,
                      persist_snapshot=persist_snapshot,
                      snapshot_kwargs=snapshot_kwargs))
        log.info("Event to trigger container snapshots sent. snapshot_id=%s" %
                 snapshot_id)

    def start_gevent_block(self, alarm_mode=False):
        self.perform_action(
            ALL_CONTAINERS_INSTANCE,
            IonObject(OT.StartGeventBlock, alarm_mode=alarm_mode))

    def stop_gevent_block(self):
        self.perform_action(ALL_CONTAINERS_INSTANCE,
                            IonObject(OT.StopGeventBlock))

    def prepare_system_shutdown(self, mode=''):
        self.perform_action(ALL_CONTAINERS_INSTANCE,
                            IonObject(OT.PrepareSystemShutdown, mode=mode))
Example #10
0
class TransformWorker(TransformStreamListener):
    CACHE_LIMIT=CFG.get_safe('container.ingestion_cache',5)

    # Status publishes after a set of granules has been processed
    STATUS_INTERVAL = 100

    def __init__(self, *args,**kwargs):
        super(TransformWorker, self).__init__(*args, **kwargs)

        # the set of data processes hosted by this worker
        self._dataprocesses = {}
        self._streamid_map = {}
        self._publisher_map = {}

        self._transforms = {}


    def on_start(self): #pragma no cover
        #super(TransformWorker,self).on_start()
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion
        #todo: check how to manage multi queue subscription (transform scenario 3)

        TransformStreamProcess.on_start(self)

        #todo: can the subscription be changed or updated when new dataprocesses are added ?
        self.queue_name = self.CFG.get_safe('process.queue_name',self.id)
        self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback)
        self.thread_lock = RLock()

        self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.start_listener()

        #todo: determine and publish appropriate set of status events
        self.event_publisher = EventPublisher(OT.DataProcessStatusEvent)



        url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        filepath = self.download_egg(url)
        print filepath
        import pkg_resources
        pkg_resources.working_set.add_entry('ion_example-0.1-py2.7.egg')
        from ion_example.add_arrays import add_arrays


    def on_quit(self): #pragma no cover
        self.event_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        super(TransformWorker, self).on_quit()

    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            self.subscriber_thread = None



    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.debug('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s', msg)
            return


        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        dp_id_list = self.retrieve_dataprocess_for_stream(stream_id)

        for dp_id in dp_id_list:

            function, argument_list = self.retrieve_function_and_define_args(dp_id)

            args = []
            rdt = RecordDictionaryTool.load_from_granule(msg)

            #create the input arguments list
            #todo: this logic is tied to the example funcation, generalize
            for func_param, record_param in argument_list.iteritems():
                args.append(rdt[record_param])
            try:
                #run the calc
                #todo: nothing in the data process resource to specify multi-out map
                result = function(*args)

                out_stream_definition, output_parameter = self.retrieve_dp_output_params(dp_id)

                rdt = RecordDictionaryTool(stream_definition_id=out_stream_definition)
                publisher = self._publisher_map.get(dp_id,'')

                rdt[ output_parameter ] = result

                if publisher:
                    publisher.publish(rdt.to_granule())
                else:
                    log.error('Publisher not found for data process %s', dp_id)

                self.update_dp_metrics( dp_id )

            except ImportError:
                log.error('Error running transform')

    def retrieve_dataprocess_for_stream(self, stream_id):
        # if any data procrocesses apply to this stream
        dp_id_list = []
        if stream_id in self._streamid_map:
            dp_id_list = self._streamid_map[stream_id]
        else:
            dp_id_list = self.load_data_process(stream_id)
        return dp_id_list


    def retrieve_function_and_define_args(self, dataprocess_id):
        import importlib
        argument_list = {}
        args = []
        #load the details of this data process
        dataprocess_info = self._dataprocesses[dataprocess_id]
        try:
            #todo: load once into a 'set' of modules?
            #load the associated transform function
            egg = self.download_egg(dataprocess_info.get_safe('uri',''))
            import pkg_resources
            pkg_resources.working_set.add_entry(egg)

            module = importlib.import_module(dataprocess_info.get_safe('module', '') )
            function = getattr(module, dataprocess_info.get_safe('function','') )
            arguments = dataprocess_info.get_safe('arguments', '')
            argument_list = dataprocess_info.get_safe('argument_map', {})
        except ImportError:
            log.error('Error running transform')

        return function, argument_list

    def retrieve_dp_output_params(self, dataprocess_id):
        dataprocess_info = self._dataprocesses[dataprocess_id]
        out_stream_definition = dataprocess_info.get_safe('out_stream_def', '')
        output_parameter = dataprocess_info.get_safe('output_param','')
        return out_stream_definition, output_parameter


    def update_dp_metrics(self, dataprocess_id):
        #update metrics
        dataprocess_info = self._dataprocesses[dataprocess_id]
        dataprocess_info.granule_counter += 1
        if dataprocess_info.granule_counter % self.STATUS_INTERVAL == 0:
            #publish a status update event
            self.event_publisher.publish_event(origin=dataprocess_id, origin_type='DataProcess', status=DataProcessStatusType.NORMAL,
                                   description='data process status update. %s granules processed'% dataprocess_info.granule_counter )


    def load_data_process(self, stream_id=""):

        dpms_client = DataProcessManagementServiceClient()

        dataprocess_details = dpms_client.read_data_process_for_stream(stream_id)
        dataprocess_details = DotDict(dataprocess_details or {})
        dataprocess_id = dataprocess_details.dataprocess_id

        #set metrics attributes
        dataprocess_details.granule_counter = 0

        self._dataprocesses[dataprocess_id] = dataprocess_details

        #add the stream id to the map
        if 'in_stream_id' in dataprocess_details:
            if dataprocess_details['in_stream_id'] in self._streamid_map:
                (self._streamid_map[ dataprocess_details['in_stream_id'] ]).append(dataprocess_id)
            else:
                self._streamid_map[ dataprocess_details['in_stream_id'] ]  = [dataprocess_id]
        #todo: add transform worker id
        self.event_publisher.publish_event(origin=dataprocess_id, origin_type='DataProcess', status=DataProcessStatusType.NORMAL,
                                           description='data process loaded into transform worker')

        #create a publisher for output stream
        self.create_publisher(dataprocess_id, dataprocess_details)

        return [dataprocess_id]


    def create_publisher(self, dataprocess_id, dataprocess_details):
        #todo: create correct publisher type for the transform type
        #todo: DataMonitor, Event Monitor get EventPublishers
        #todo: DataProcess, EventProcess get stream publishers
        out_stream_route = dataprocess_details.get('out_stream_route', '')
        out_stream_id = dataprocess_details.get('out_stream_id', '')
        publisher = StreamPublisher(process=self, stream_id=out_stream_id, stream_route=out_stream_route)

        self._publisher_map[dataprocess_id] = publisher

    @classmethod
    def download_egg(cls, url):
        '''
        Downloads an egg from the URL specified into the cache directory
        Returns the full path to the egg
        '''
        # Get the filename based on the URL
        filename = url.split('/')[-1]
        # Store it in the $TMPDIR
        egg_cache = gettempdir()
        path = os.path.join(egg_cache, filename)
        r = requests.get(url, stream=True)
        if r.status_code == 200:
            # Download the file using requests stream
            with open(path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
                        f.flush()
            return path
        raise IOError("Couldn't download the file at %s" % url)
class UploadCalibrationProcessing(ImmediateProcess):
    """
    Upload Calibration Processing Process

    This process provides the capability to ION clients and operators to process uploaded calibration
    coefficients to calibrate data products.

    This parameters that this process accepts as configurations are:
        - fuc_id: The FileUploadContext identifier, required, stores where the file was written
    """
    def on_start(self):

        ImmediateProcess.on_start(self)

        # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions
        fuc_id = self.CFG.get_safe('process.fuc_id',
                                   None)  # FileUploadContext ID

        # Clients
        self.object_store = self.container.object_store
        self.resource_registry = self.container.resource_registry
        self.event_publisher = EventPublisher(OT.ResetQCEvent)
        self.data_product_management = DataProductManagementServiceProcessClient(
            process=self)
        self.create_map()

        # run process
        if fuc_id:
            self.process(fuc_id)

        # cleanup
        self.event_publisher.close()

    def process(self, fuc_id):

        # get the Object (dict) containing details of the uploaded file
        fuc = self.object_store.read(fuc_id)

        if fuc['filetype'] == 'ZIP':
            raise BadRequest(
                "ZIP format not determined by project scientists yet (2014-04-21)"
            )
            #self.process_zip(fuc)
        else:
            self.process_csv(fuc)

    def create_map(self):
        '''
        Creates a map from property numbers to datasets
        '''
        self.property_map = {}

        for instrument_device in self.resource_registry.find_resources(
                restype=RT.InstrumentDevice)[0]:
            if instrument_device.ooi_property_number:
                self.property_map[
                    instrument_device.
                    ooi_property_number] = self.data_products_for_device(
                        instrument_device)

    def data_products_for_device(self, device):
        data_products, _ = self.resource_registry.find_objects(
            device, PRED.hasOutputProduct, id_only=True)
        return data_products

    def dataset_for_data_product(self, data_product):
        datasets, _ = self.resource_registry.find_objects(data_product,
                                                          PRED.hasDataset,
                                                          id_only=True)
        return datasets[0]

    def do_something_with_the_update(self, updates):
        for property_no, calibration_update in updates.iteritems():
            # Check to see if we even have an instrument with this property number
            if property_no not in self.property_map:
                continue

            # Get the data product listings for this instrument
            data_products = self.property_map[property_no]
            # Go through each data product and update the data IF
            #  - There is a set of parameters that match those in the calibration

            for data_product in data_products:
                self.update_data_product(data_product, calibration_update)

    def update_data_product(self, data_product, calibration_update):
        parameters = [
            p.name
            for p in self.data_product_management.get_data_product_parameters(
                data_product)
        ]

        dataset_updates = []
        for cal_name in calibration_update.iterkeys():
            if cal_name in parameters:
                dataset_id = self.dataset_for_data_product(data_product)
                dataset_updates.append(dataset_id)

        for dataset in dataset_updates:
            self.apply_to_dataset(dataset, calibration_update)

    def apply_to_dataset(self, dataset, calibration_update):
        cov = DatasetManagementService._get_coverage(dataset, mode='r+')
        try:
            self.set_sparse_values(cov, calibration_update)
            self.publish_calibration_event(dataset, calibration_update.keys())

        finally:
            cov.close()

    def set_sparse_values(self, cov, calibration_update):
        for calibration_name, updates in calibration_update.iteritems():
            if calibration_name not in cov.list_parameters():
                continue

            for update in updates:
                np_dict = {}
                self.check_units(cov, calibration_name, update['units'])
                start_date = self.ntp_from_iso(update['start_date'])
                np_dict[calibration_name] = ConstantOverTime(
                    calibration_name, update['value'], time_start=start_date)

                cov.set_parameter_values(np_dict)

    def check_units(self, cov, calibration_name, units):
        pass

    def publish_calibration_event(self, dataset, calibrations):
        publisher = EventPublisher(OT.DatasetCalibrationEvent)
        publisher.publish_event(origin=dataset, calibrations=calibrations)

    def ntp_from_iso(self, iso):
        return TimeUtils.ntp_from_iso(iso)

    def process_csv(self, fuc):

        # CSV file open here
        csv_filename = fuc.get('path', None)
        if csv_filename is None:
            raise BadRequest("uploaded file has no path")

        # keep track of the number of calibrations we actually process
        nupdates = 0

        updates = {
        }  # keys are reference_designators, use to update object store after parsing CSV

        with open(csv_filename, 'rb') as csvfile:
            # eliminate blank lines
            csvfile = (row for row in csvfile if len(row.strip()) > 0)
            # eliminate commented lines
            csvfile = (row for row in csvfile if not row.startswith('#'))
            # open CSV reader
            csv_reader = csv.reader(csvfile,
                                    delimiter=',')  # skip commented lines
            # iterate the rows returned by csv.reader
            for row in csv_reader:
                if len(row) != 6:
                    log.warn("invalid calibration line %s" % ','.join(row))
                    continue
                try:
                    ipn = row[0]  # instrument_property_number
                    name = row[1]  # calibration_name
                    value = float(row[2])  # calibration_value
                    units = row[3]
                    description = row[4]  # description
                    start_date = row[5]  # start_date TODO date object?
                    d = {
                        'value': value,
                        'units': units,
                        'description': description,
                        'start_date': start_date
                    }
                except ValueError as e:
                    continue  #TODO error message? or just skip?
                # get ipn key
                if ipn not in updates:
                    updates[ipn] = {}  # initialize empty array
                if name not in updates[ipn]:
                    updates[ipn][name] = []  # will be array of dicts
                updates[ipn][name].append(d)

                nupdates = nupdates + 1

        self.do_something_with_the_update(updates)
        # insert the updates into object store
        self.update_object_store(updates)

        # update FileUploadContext object (change status to complete)
        fuc['status'] = 'UploadCalibrationProcessing process complete - %d updates added to object store' % nupdates
        self.object_store.update_doc(fuc)

        # remove uploaded file
        try:
            os.remove(csv_filename)
        except OSError:
            pass  # TODO take action to get this removed

    def process_zip(self, fuc):
        pass

    def update_object_store(self, updates):
        '''inserts the updates into object store'''
        for i in updates:  # loops the instrument_property_number(ipn) in the updates object
            try:  # if i exists in object_store, read it
                ipn = self.object_store.read(i)  #TODO: what will this be?
            except:  # if does not yet exist in object_store, create it (can't use update_doc because need to set id)
                ipn = self.object_store.create_doc(
                    {'_type': 'CALIBRATION'}, i
                )  # CAUTION: this returns a tuple, not a dict like read() returns
                ipn = self.object_store.read(
                    i)  # read so we have a dict like we expect
            # merge all from updates[i] into dict destined for the object_store (ipn)
            for name in updates[
                    i]:  # loops the calibration_names under each IPN in updates
                #TODO: if name not initialized, will append work? if so, can use same op for both
                if name not in ipn:  # if name doesn't exist, we can just add the entire object (dict of lists)
                    ipn[name] = updates[i][name]
                else:  # if it does, we need to append to each of the lists
                    ipn[name].append(
                        updates[i][name])  # append the list from updates
            # store updated ipn keyed object in object_store (should overwrite full object, contains all previous too)
            self.object_store.update_doc(ipn)
            # publish ResetQCEvent event (one for each instrument_property_number [AKA ipn])
            self.event_publisher.publish_event(origin=i)
Example #12
0
class TransformWorker(TransformStreamListener):
    CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5)

    # Status publishes after a set of granules has been processed
    STATUS_INTERVAL = 100

    def __init__(self, *args, **kwargs):
        super(TransformWorker, self).__init__(*args, **kwargs)

        # the set of data processes hosted by this worker
        self._dataprocesses = {}
        self._streamid_map = {}
        self._publisher_map = {}

        self._transforms = {}

    def on_start(self):  #pragma no cover
        #super(TransformWorker,self).on_start()
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion
        #todo: check how to manage multi queue subscription (transform scenario 3)

        TransformStreamProcess.on_start(self)

        #todo: can the subscription be changed or updated when new dataprocesses are added ?
        self.queue_name = self.CFG.get_safe('process.queue_name', self.id)
        self.subscriber = StreamSubscriber(process=self,
                                           exchange_name=self.queue_name,
                                           callback=self.receive_callback)
        self.thread_lock = RLock()

        self._rpc_server = self.container.proc_manager._create_listening_endpoint(
            from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.start_listener()

        #todo: determine and publish appropriate set of status events
        self.event_publisher = EventPublisher(OT.DataProcessStatusEvent)

    def on_quit(self):  #pragma no cover
        self.event_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        super(TransformWorker, self).on_quit()

    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(
                self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            self.subscriber_thread = None

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.debug('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s',
                      msg)
            return

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('received granule for stream rdt %s', rdt)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        dp_id_list = self.retrieve_dataprocess_for_stream(stream_id)

        for dp_id in dp_id_list:

            function, argument_list, context = self.retrieve_function_and_define_args(
                stream_id, dp_id)

            args = []
            rdt = RecordDictionaryTool.load_from_granule(msg)

            #create the input arguments list
            #todo: this logic is tied to the example function, generalize
            #todo: how to inject params not in the granule such as stream_id, dp_id, etc?
            for func_param, record_param in argument_list.iteritems():
                args.append(rdt[record_param])
            if context:
                args.append(context)

            try:
                #run the calc
                #todo: nothing in the data process resource to specify multi-out map
                result = ''
                try:
                    result = function(*args)
                    log.debug('recv_packet  result: %s', result)
                except:
                    log.error('Error running transform %s with args %s.',
                              dp_id,
                              args,
                              exc_info=True)
                    raise

                out_stream_definition, output_parameter = self.retrieve_dp_output_params(
                    dp_id)

                if out_stream_definition and output_parameter:
                    rdt_out = RecordDictionaryTool(
                        stream_definition_id=out_stream_definition)
                    publisher = self._publisher_map.get(dp_id, '')

                    for param in rdt:
                        if param in rdt_out:
                            rdt_out[param] = rdt[param]
                    rdt_out[output_parameter] = result

                    if publisher:
                        log.debug('output rdt: %s', rdt)
                        publisher.publish(rdt_out.to_granule())
                    else:
                        log.error('Publisher not found for data process %s',
                                  dp_id)

                self.update_dp_metrics(dp_id)

            except ImportError:
                log.error('Error running transform')

    def retrieve_dataprocess_for_stream(self, stream_id):
        # if any data procrocesses apply to this stream
        dp_id_list = []
        if stream_id in self._streamid_map:
            dp_id_list = self._streamid_map[stream_id]
        else:
            dp_id_list = self.load_data_process(stream_id)
        return dp_id_list

    def retrieve_function_and_define_args(self, stream_id, dataprocess_id):
        import importlib
        argument_list = {}
        function = ''
        context = {}

        #load the details of this data process
        dataprocess_info = self._dataprocesses[dataprocess_id]

        try:
            #todo: load once into a 'set' of modules?
            #load the associated transform function
            egg_uri = dataprocess_info.get_safe('uri', '')
            if egg_uri:
                egg = self.download_egg(egg_uri)
                import pkg_resources
                pkg_resources.working_set.add_entry(egg)
            else:
                log.warning(
                    'No uri provided for module in data process definition.')

            module = importlib.import_module(
                dataprocess_info.get_safe('module', ''))

            function = getattr(module,
                               dataprocess_info.get_safe('function', ''))
            arguments = dataprocess_info.get_safe('arguments', '')
            argument_list = dataprocess_info.get_safe('argument_map', {})

            if self.has_context_arg(function, argument_list):
                context = self.create_context_arg(stream_id, dataprocess_id)

        except ImportError:
            log.error('Error running transform')
        log.debug('retrieve_function_and_define_args  argument_list: %s',
                  argument_list)
        return function, argument_list, context

    def retrieve_dp_output_params(self, dataprocess_id):
        dataprocess_info = self._dataprocesses[dataprocess_id]
        out_stream_definition = dataprocess_info.get_safe('out_stream_def', '')
        output_parameter = dataprocess_info.get_safe('output_param', '')
        return out_stream_definition, output_parameter

    def update_dp_metrics(self, dataprocess_id):
        #update metrics
        dataprocess_info = self._dataprocesses[dataprocess_id]
        dataprocess_info.granule_counter += 1
        if dataprocess_info.granule_counter % self.STATUS_INTERVAL == 0:
            #publish a status update event
            self.event_publisher.publish_event(
                origin=dataprocess_id,
                origin_type='DataProcess',
                status=DataProcessStatusType.NORMAL,
                description='data process status update. %s granules processed'
                % dataprocess_info.granule_counter)

    def load_data_process(self, stream_id=""):

        dpms_client = DataProcessManagementServiceClient()

        dataprocess_details_list = dpms_client.read_data_process_for_stream(
            stream_id)

        dataprocess_ids = []
        #this returns a list of data process info dicts
        for dataprocess_details in dataprocess_details_list:

            dataprocess_details = DotDict(dataprocess_details or {})
            dataprocess_id = dataprocess_details.dataprocess_id

            #set metrics attributes
            dataprocess_details.granule_counter = 0

            self._dataprocesses[dataprocess_id] = dataprocess_details
            log.debug('load_data_process  dataprocess_id: %s', dataprocess_id)
            log.debug('load_data_process  dataprocess_details: %s',
                      dataprocess_details)

            # validate details
            # if not outstream info avaialable log a warning but TF may publish an event so proceed
            if not dataprocess_details.out_stream_def or not dataprocess_details.output_param:
                log.warning(
                    'No output stream details provided for data process %s, will not publish a granule',
                    dataprocess_id)

            #add the stream id to the map
            if 'in_stream_id' in dataprocess_details:
                if dataprocess_details['in_stream_id'] in self._streamid_map:
                    (self._streamid_map[dataprocess_details['in_stream_id']]
                     ).append(dataprocess_id)
                else:
                    self._streamid_map[dataprocess_details['in_stream_id']] = [
                        dataprocess_id
                    ]
            #todo: add transform worker id
            self.event_publisher.publish_event(
                origin=dataprocess_id,
                origin_type='DataProcess',
                status=DataProcessStatusType.NORMAL,
                description='data process loaded into transform worker')

            #create a publisher for output stream
            self.create_publisher(dataprocess_id, dataprocess_details)
            dataprocess_ids.append(dataprocess_id)

        return dataprocess_ids

    def create_publisher(self, dataprocess_id, dataprocess_details):
        #todo: create correct publisher type for the transform type
        #todo: DataMonitor, Event Monitor get EventPublishers
        #todo: DataProcess, EventProcess get stream publishers
        out_stream_route = dataprocess_details.get('out_stream_route', '')
        out_stream_id = dataprocess_details.get('out_stream_id', '')
        publisher = StreamPublisher(process=self,
                                    stream_id=out_stream_id,
                                    stream_route=out_stream_route)

        self._publisher_map[dataprocess_id] = publisher

    @classmethod
    def download_egg(cls, url):
        '''
        Downloads an egg from the URL specified into the cache directory
        Returns the full path to the egg
        '''
        # Get the filename based on the URL
        filename = url.split('/')[-1]
        # Store it in the $TMPDIR
        egg_cache = gettempdir()
        path = os.path.join(egg_cache, filename)
        r = requests.get(url, stream=True)
        if r.status_code == 200:
            # Download the file using requests stream
            with open(path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
                        f.flush()
            return path
        raise IOError("Couldn't download the file at %s" % url)

    def has_context_arg(self, func, argument_map):
        import inspect
        argspec = inspect.getargspec(func)
        return argspec.args != argument_map and 'context' in argspec.args

    def create_context_arg(self, stream_id, dataprocess_id):
        context = DotDict()
        context.stream_id = stream_id
        context.dataprocess_id = dataprocess_id
        return context
class UploadCalibrationProcessing(ImmediateProcess):
    """
    Upload Calibration Processing Process

    This process provides the capability to ION clients and operators to process uploaded calibration
    coefficients to calibrate data products.

    This parameters that this process accepts as configurations are:
        - fuc_id: The FileUploadContext identifier, required, stores where the file was written
    """

    def on_start(self):

        ImmediateProcess.on_start(self)

        # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions
        fuc_id = self.CFG.get_safe('process.fuc_id',None) # FileUploadContext ID

        # Clients
        self.object_store = self.container.object_store
        self.event_publisher = EventPublisher(OT.ResetQCEvent)

        # run process
        self.process(fuc_id)

        # cleanup
        self.event_publisher.close()

    def process(self,fuc_id):

        # get the Object (dict) containing details of the uploaded file
        fuc = self.object_store.read(fuc_id)

        if fuc['filetype'] == 'ZIP':
            raise BadRequest("ZIP format not determined by project scientists yet (2014-04-21)")
            #self.process_zip(fuc)
        else:
            self.process_csv(fuc)

    def process_csv(self, fuc):

        # CSV file open here
        csv_filename = fuc.get('path', None)
        if csv_filename is None:
            raise BadRequest("uploaded file has no path")

        # keep track of the number of calibrations we actually process
        nupdates = 0

        updates = {} # keys are reference_designators, use to update object store after parsing CSV

        with open(csv_filename, 'rb') as csvfile:
            # eliminate blank lines
            csvfile = (row for row in csvfile if len(row.strip()) > 0)
            # eliminate commented lines
            csvfile = (row for row in csvfile if not row.startswith('#'))
            # open CSV reader
            csv_reader = csv.reader(csvfile, delimiter=',') # skip commented lines
            # iterate the rows returned by csv.reader
            for row in csv_reader:
                if len(row) != 6:
                    log.warn("invalid calibration line %s" % ','.join(row))
                    continue
                try:
                    ipn = row[0] # instrument_property_number
                    name = row[1] # calibration_name
                    value = float(row[2]) # calibration_value
                    units = row[3]
                    description = row[4] # description
                    start_date = row[5] # start_date TODO date object?
                    d = {
                        'value':value,
                        'units':units,
                        'description':description,
                        'start_date':start_date
                    }
                except ValueError as e:
                    continue #TODO error message? or just skip?
                # get ipn key
                if ipn not in updates:
                    updates[ipn] = {} # initialize empty array
                if name not in updates[ipn]:
                    updates[ipn][name] = [] # will be array of dicts
                updates[ipn][name].append(d)
                
                nupdates = nupdates + 1

        # insert the updates into object store
        self.update_object_store(updates)

        # update FileUploadContext object (change status to complete)
        fuc['status'] = 'UploadCalibrationProcessing process complete - %d updates added to object store' % nupdates
        self.object_store.update_doc(fuc)

        # remove uploaded file
        try:
            os.remove(csv_filename)
        except OSError:
            pass # TODO take action to get this removed
    
    def process_zip(self,fuc):
        pass

    def update_object_store(self, updates):
        '''inserts the updates into object store'''
        for i in updates: # loops the instrument_property_number(ipn) in the updates object
            try: # if i exists in object_store, read it                           
                ipn = self.object_store.read(i) #TODO: what will this be?
            except: # if does not yet exist in object_store, create it (can't use update_doc because need to set id)
                ipn = self.object_store.create_doc({'_type':'CALIBRATION'},i) # CAUTION: this returns a tuple, not a dict like read() returns
                ipn = self.object_store.read(i) # read so we have a dict like we expect
            # merge all from updates[i] into dict destined for the object_store (ipn)
            for name in updates[i]: # loops the calibration_names under each IPN in updates
                #TODO: if name not initialized, will append work? if so, can use same op for both
                if name not in ipn: # if name doesn't exist, we can just add the entire object (dict of lists)
                    ipn[name] = updates[i][name]
                else: # if it does, we need to append to each of the lists
                    ipn[name].append(updates[i][name]) # append the list from updates
            # store updated ipn keyed object in object_store (should overwrite full object, contains all previous too)
            self.object_store.update_doc(ipn)
            # publish ResetQCEvent event (one for each instrument_property_number [AKA ipn])
            self.event_publisher.publish_event(origin=i)