Example #1
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(
            old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        new_list = [x for x in curr_list if x not in old_list]

        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(
            new_list[len(new_list) - 1][0], date_extraction_pattern,
            date_pattern)

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
    def _get_data(cls, config):
        parser = get_safe(config, 'parser', None)
        ext_dset_res = get_safe(config, 'external_dataset_res', None)
        if ext_dset_res and parser:
            #CBM: Not in use yet...
#            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
#            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
#            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
#            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
#            var_lst = ext_dset_res.dataset_description.parameters['variables']

            max_rec = get_safe(config, 'max_records', 1)
            dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
            tx_yml = get_safe(config, 'taxonomy')
            ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool

            cnt = cls._calc_iter_cnt(len(parser.sensor_map), max_rec)
            for x in xrange(cnt):
                rdt = RecordDictionaryTool(taxonomy=ttool)

                for name in parser.sensor_map:
                    d = parser.data_map[name][x*max_rec:(x+1)*max_rec]
                    rdt[name]=d

                g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                yield g
        else:
            log.warn('No parser object found in config')
    def set_configuration(self, config):
        log.warn("DRIVER: set_configuration")
        """
        expect configuration to have:
        - parser module/class
        - directory, wildcard to find data files
        - optional timestamp of last granule
        - optional poll rate
        - publish info
        """
        log.error("Log level: %s", log.getEffectiveLevel())
        log.debug('using configuration: %s', config)
        self.config = config
        self.max_records = get_safe(config, 'max_records', 100)
        self.stream_config = self.CFG.get('stream_config', {})
        if len(self.stream_config) == 1:
            stream_cfg = self.stream_config.values()[0]
        elif len(self.stream_config) > 1:
            stream_cfg = self.stream_config.values()[0]

        stream_id = stream_cfg['stream_id']
        stream_route = IonObject(OT.StreamRoute, routing_key=stream_cfg['routing_key'], exchange_point=stream_cfg['exchange_point'])
        param_dict = stream_cfg['stream_def_dict']['parameter_dictionary']
        self.publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)
        self.parameter_dictionary = ParameterDictionary.load(param_dict)
        self.time_field = self.parameter_dictionary.get_temporal_context()
        self.latest_granule_time = get_safe(config, 'last_time', 0)
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        #compare the last read files (old_list) with the current directory contents (curr_list)
        #if the file names are the same (curr_file[0] and old_file[0]) check the size of the
        #current file (curr_file[2]) with the file position when the last file was read (old_file[3])
        #if there's more data now that was read last time, add the file to the list
        new_list = []
        for curr_file in curr_list:
            found = False
            for old_file in old_list:
                if curr_file[0] == old_file[0]:      #if filenames are the same, that means the file is still in the directory, and was previously read
                    found = True
                    if curr_file[2] > old_file[3]:   #f2[2] is the current file size, f2[3] is the last read file size
                        new_list.append((curr_file[0], curr_file[1], curr_file[2], old_file[-1]))     #add it in if the current file size is bigger than the last time
            if not found:
                new_list.append(curr_file)

        config['set_new_data_check'] = curr_list

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
    def _get_data(cls, config):
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
    #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
    #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
    #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
    #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
    #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
    #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
                #tx_yml = get_safe(config, 'taxonomy')
                #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
                pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

                cnt = calculate_iteration_count(len(parser.sensor_map), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(param_dictionary=pdict)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x*max_rec:(x+1)*max_rec]
                        rdt[name]=d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
                    yield g
            except SlocumParseException as spe:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
Example #6
0
    def _acquire_data(cls, config, unlock_new_data_callback):
        """
        Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls:
             BaseDataHandler._new_data_constraints (only if config does not contain 'constraints')
             BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter
        @param config Dict containing configuration parameters, may include constraints, formatters, etc
        @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore
        """
        stream_id = get_safe(config, 'stream_id')
        if not stream_id:
            raise ConfigurationError('Configuration does not contain required \'stream_id\' key')
        #TODO: Configure the publisher
        publisher=None

        constraints = get_safe(config,'constraints')
        if not constraints:
            gevent.getcurrent().link(unlock_new_data_callback)
            constraints = cls._new_data_constraints(config)
            config['constraints']=constraints

        cls._publish_data(publisher, config, cls._get_data(config))

        # Publish a 'TestFinished' event
        if get_safe(config,'TESTING'):
            log.debug('Publish TestingFinished event')
            pub = EventPublisher('DeviceCommonLifecycleEvent')
            pub.publish_event(origin='BaseDataHandler._acquire_data', description='TestingFinished')
Example #7
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        new_list = [x for x in curr_list if x not in old_list]

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Example #8
0
    def set_configuration(self, config):
        log.warn("DRIVER: set_configuration")
        """
        expect configuration to have:
        - parser module/class
        - directory, wildcard to find data files
        - optional timestamp of last granule
        - optional poll rate
        - publish info
        """
        log.error("Log level: %s", log.getEffectiveLevel())
        log.debug('using configuration: %s', config)
        self.config = config
        self.max_records = get_safe(config, 'max_records', 100)
        self.stream_config = self.CFG.get('stream_config', {})
        if len(self.stream_config) == 1:
            stream_cfg = self.stream_config.values()[0]
        elif len(self.stream_config) > 1:
            stream_cfg = self.stream_config.values()[0]

        stream_id = stream_cfg['stream_id']
        stream_route = IonObject(OT.StreamRoute,
                                 routing_key=stream_cfg['routing_key'],
                                 exchange_point=stream_cfg['exchange_point'])
        param_dict = stream_cfg['stream_def_dict']['parameter_dictionary']
        self.publisher = StandaloneStreamPublisher(stream_id=stream_id,
                                                   stream_route=stream_route)
        self.parameter_dictionary = ParameterDictionary.load(param_dict)
        self.time_field = self.parameter_dictionary.get_temporal_context()
        self.latest_granule_time = get_safe(config, 'last_time', 0)
Example #9
0
    def _get_data(cls, config):
        parser = get_safe(config, 'parser', None)
        ext_dset_res = get_safe(config, 'external_dataset_res', None)
        if ext_dset_res and parser:
            #CBM: Not in use yet...
            #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
            #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
            #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
            #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
            #            var_lst = ext_dset_res.dataset_description.parameters['variables']

            max_rec = get_safe(config, 'max_records', 1)
            dprod_id = get_safe(config, 'data_producer_id',
                                'unknown data producer')
            tx_yml = get_safe(config, 'taxonomy')
            ttool = TaxyTool.load(
                tx_yml
            )  #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool

            cnt = cls._calc_iter_cnt(len(parser.sensor_map), max_rec)
            for x in xrange(cnt):
                rdt = RecordDictionaryTool(taxonomy=ttool)

                for name in parser.sensor_map:
                    d = parser.data_map[name][x * max_rec:(x + 1) * max_rec]
                    rdt[name] = d

                g = build_granule(data_producer_id=dprod_id,
                                  taxonomy=ttool,
                                  record_dictionary=rdt)
                yield g
        else:
            log.warn('No parser object found in config')
    def _acquire_data(cls, config, publisher, unlock_new_data_callback):
        """
        Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls:
             BaseDataHandler._new_data_constraints (only if config does not contain 'constraints')
             BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter
        @param config Dict containing configuration parameters, may include constraints, formatters, etc
        @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore
        """
        log.debug('start _acquire_data: config={0}'.format(config))

        cls._init_acquisition_cycle(config)

        constraints = get_safe(config, 'constraints')
        if not constraints:
            gevent.getcurrent().link(unlock_new_data_callback)
            constraints = cls._new_data_constraints(config)
            if constraints is None:
                raise InstrumentParameterException(
                    "Data constraints returned from _new_data_constraints cannot be None"
                )
            config['constraints'] = constraints

        cls._publish_data(publisher, cls._get_data(config))

        # Publish a 'TestFinished' event
        if get_safe(config, 'TESTING'):
            log.debug('Publish TestingFinished event')
            pub = EventPublisher('DeviceCommonLifecycleEvent')
            pub.publish_event(origin='BaseDataHandler._acquire_data',
                              description='TestingFinished')
    def _acquire_data(cls, config, publisher, unlock_new_data_callback):
        """
        Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls:
             BaseDataHandler._new_data_constraints (only if config does not contain 'constraints')
             BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter
        @param config Dict containing configuration parameters, may include constraints, formatters, etc
        @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore
        """
        log.debug('start _acquire_data: config={0}'.format(config))

        cls._init_acquisition_cycle(config)

        constraints = get_safe(config,'constraints')
        if not constraints:
            gevent.getcurrent().link(unlock_new_data_callback)
            constraints = cls._new_data_constraints(config)
            if constraints is None:
                raise InstrumentParameterException("Data constraints returned from _new_data_constraints cannot be None")
            config['constraints'] = constraints

        cls._publish_data(publisher, cls._get_data(config))

        # Publish a 'TestFinished' event
        if get_safe(config,'TESTING'):
            log.debug('Publish TestingFinished event')
            pub = EventPublisher('DeviceCommonLifecycleEvent')
            pub.publish_event(origin='BaseDataHandler._acquire_data', description='TestingFinished')
Example #12
0
    def _make_management_call(self, url, method="get", data=None):
        """
        Makes a call to the Rabbit HTTP management API using the passed in HTTP method.
        """
        log.debug("Calling rabbit API management (%s): %s", method, url)

        meth = getattr(requests, method)

        try:
            mgmt_cfg_key = CFG.get_safe("container.messaging.management.server", "rabbit_manage")
            mgmt_cfg = CFG.get_safe("server." + mgmt_cfg_key)
            username = get_safe(mgmt_cfg, "username") or "guest"
            password = get_safe(mgmt_cfg, "password") or "guest"

            with gevent.timeout.Timeout(10):
                r = meth(url, auth=(username, password), data=data)
            r.raise_for_status()

            if not r.content == "":
                content = json.loads(r.content)
            else:
                content = None

        except gevent.timeout.Timeout as ex:
            raise Timeout(str(ex))
        except requests.exceptions.Timeout as ex:
            raise Timeout(str(ex))
        except (requests.exceptions.ConnectionError, socket.error) as ex:
            raise ServiceUnavailable(str(ex))
        except requests.exceptions.RequestException as ex:
            # the generic base exception all requests' exceptions inherit from, raise our
            # general server error too.
            raise ServerError(str(ex))

        return content
    def execute(input=None, context=None, config=None, params=None, state=None, fileName = None):

        stream_definition_id = params
        mpl_allowed_numerical_types = ['int32', 'int64', 'uint32', 'uint64', 'float32', 'float64']

        if stream_definition_id == None:
            log.error("Matplotlib transform: Need a output stream definition to process graphs")
            return None

        # parse the incoming data
        rdt = RecordDictionaryTool.load_from_granule(input)

        # build a list of fields/variables that need to be plotted. Use the list provided by the UI
        # since the retrieved granule might have extra fields.
        fields = rdt.fields
        resolution = "640x480"
        if config:
            if 'parameters' in config:
                fields = config['parameters']
            if 'resolution' in config:
                resolution = config['resolution']

        vardict = {}
        vardict['time'] = get_safe(rdt, 'time')
        if vardict['time'] == None:
            print "Matplotlib transform: Did not receive a time field to work with"
            log.error("Matplotlib transform: Did not receive a time field to work with")
            return None

        for field in fields:
            if field == 'time':
                continue

            # only consider fields which are supposed to be numbers.
            if (rdt[field] != None) and (rdt[field].dtype not in mpl_allowed_numerical_types):
                continue

            vardict[field] = get_safe(rdt, field)

            print

        arrLen = len(vardict['time'])
        # init the graph_data structure for storing values
        graph_data = {}
        for varname in vardict.keys():
            graph_data[varname] = []

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        for varname in vardict.keys():  # psd.list_field_names():
            if vardict[varname] == None:
                # create an array of zeros to compensate for missing values
                graph_data[varname].extend([0.0]*arrLen)
            else:
                graph_data[varname].extend(vardict[varname])

        out_granule = VizTransformMatplotlibGraphsAlgorithm.render_graphs(graph_data, stream_definition_id, fileName, resolution=resolution)

        return out_granule
Example #14
0
    def execute_acquire_data(self, *args):
        """
        Creates a copy of self._dh_config, creates a publisher, and spawns a greenlet to perform a data acquisition cycle
        If the args[0] is a dict, any entries keyed with one of the 'PATCHABLE_CONFIG_KEYS' are used to patch the config
        Greenlet binds to BaseDataHandler._acquire_data and passes the publisher and config
        Disallows multiple "new data" (unconstrained) requests using BaseDataHandler._semaphore lock
        Called from:
                      InstrumentAgent._handler_observatory_execute_resource
                       |-->  ExternalDataAgent._handler_streaming_execute_resource

        @parameter args First argument can be a config dictionary
        """
        log.debug('Executing acquire_data: args = {0}'.format(args))

        # Make a copy of the config to ensure no cross-pollution
        config = self._dh_config.copy()

        # Patch the config if mods are passed in
        try:
            config_mods = args[0]
            if not isinstance(config_mods, dict):
                raise IndexError()

            log.debug('Configuration modifications provided: {0}'.format(config_mods))
            for k in self._params['PATCHABLE_CONFIG_KEYS']:
                p=get_safe(config_mods, k)
                if not p is None:
                    config[k] = p

        except IndexError:
            log.info('No configuration modifications were provided')

        # Verify that there is a stream_id member in the config
        stream_id = get_safe(config, 'stream_id')
        if not stream_id:
            raise ConfigurationError('Configuration does not contain required \'stream_id\' member')

        isNew = get_safe(config, 'constraints') is None

        if isNew and not self._semaphore.acquire(blocking=False):
            log.warn('Already acquiring new data - action not duplicated')
            return

        ndc = None
        if isNew:
            # Get the NewDataCheck attachment and add it's content to the config
            ext_ds_id = get_safe(config,'external_dataset_res_id')
            if ext_ds_id:
                ndc = self._find_new_data_check_attachment(ext_ds_id)

        config['new_data_check'] = ndc

            # Create a publisher to pass into the greenlet
        publisher = self._stream_registrar.create_publisher(stream_id=stream_id)

        # Spawn a greenlet to do the data acquisition and publishing
        g = spawn(self._acquire_data, config, publisher, self._unlock_new_data_callback, self._update_new_data_check_attachment)
        log.debug('** Spawned {0}'.format(g))
        self._glet_queue.append(g)
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        #todo: use only flat dicts for now, may change later...
#        rdt0 = rdt['coordinates']
#        rdt1 = rdt['data']

        pressure = get_safe(rdt, 'pres') #psd.get_values('conductivity')

        longitude = get_safe(rdt, 'lon') # psd.get_values('longitude')
        latitude = get_safe(rdt, 'lat')  #psd.get_values('latitude')
        time = get_safe(rdt, 'time') # psd.get_values('time')
        height = get_safe(rdt, 'height') # psd.get_values('time')

        log.warn('Got pressure: %s' % str(pressure))


        # L1
        # 1) The algorithm input is the L0 pressure data product (p_hex) and, in the case of the SBE 37IM, the pressure range (P_rng) from metadata.
        # 2) Convert the hexadecimal string to a decimal string
        # 3) For the SBE 37IM only, convert the pressure range (P_rng) from psia to dbar SBE 37IM
        #    Convert P_rng (input from metadata) from psia to dbar
        # 4) Perform scaling operation
        #    SBE 37IM
        #    L1 pressure data product (in dbar):


        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        scaled_pressure = pressure

        for i in xrange(len(pressure)):
            #todo: get pressure range from metadata (if present) and include in calc
            scaled_pressure[i] = ( pressure[i])

        root_rdt = RecordDictionaryTool(taxonomy=self.tx)

        #todo: use only flat dicts for now, may change later...
#        data_rdt = RecordDictionaryTool(taxonomy=self.tx)
#        coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        root_rdt['pres'] = scaled_pressure
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

#        root_rdt['coordinates'] = coord_rdt
#        root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L1_pressure', taxonomy=self.tx, record_dictionary=root_rdt)

        return psc.close_stream_granule()
Example #16
0
    def get_datastore(self, ds_name, profile=DataStore.DS_PROFILE.BASIC, config=None):
        """
        Factory method to get a datastore instance from given name, profile and config.
        This is the central point to cache these instances, to decide persistent or mock
        and to force clean the store on first use.
        @param ds_name  Logical name of datastore (will be scoped with sysname)
        @param profile  One of known constants determining the use of the store
        @param config  Override config to use
        """
        assert ds_name, "Must provide ds_name"
        if ds_name in self._datastores:
            log.debug("get_datastore(): Found instance of store '%s'" % ds_name)
            return self._datastores[ds_name]

        scoped_name = ("%s_%s" % (get_sys_name(), ds_name)).lower()

        # Imports here to prevent cyclic module dependency
        from pyon.core.bootstrap import CFG

        config = config or CFG

        persistent = not bool(get_safe(config, "system.mockdb"))
        force_clean = bool(get_safe(config, "system.force_clean"))

        log.info(
            "get_datastore(): Create instance of store '%s' {persistent=%s, force_clean=%s, scoped_name=%s}"
            % (ds_name, persistent, force_clean, scoped_name)
        )

        # Persistent (CouchDB) or MockDB?
        if persistent:
            # Use inline import to prevent circular import dependency
            from pyon.datastore.couchdb.couchdb_datastore import CouchDB_DataStore

            new_ds = CouchDB_DataStore(datastore_name=scoped_name, profile=profile)
        else:
            # Use inline import to prevent circular import dependency
            from pyon.datastore.mockdb.mockdb_datastore import MockDB_DataStore

            new_ds = MockDB_DataStore(datastore_name=scoped_name)  # , profile=profile)

        # Clean the store instance
        if force_clean:
            try:
                new_ds.delete_datastore(scoped_name)
            except NotFound as nf:
                pass

        # Create store if not existing
        if not new_ds.datastore_exists(scoped_name):
            new_ds.create_datastore(scoped_name)

        # Set a few standard datastore instance fields
        new_ds.local_name = ds_name
        new_ds.ds_profile = profile

        self._datastores[ds_name] = new_ds

        return new_ds
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        ext_dset_res = get_safe(config, 'external_dataset_res', None)

        # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
        ds = get_safe(config, 'dataset_object')

        if ext_dset_res and ds:
            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
            var_lst = ext_dset_res.dataset_description.parameters['variables']

            t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1)))
            #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints
            if isinstance(t_slice, str):
                t_slice = eval(t_slice)

            lon = ds.variables[x_vname][:]
            lat = ds.variables[y_vname][:]
            z = ds.variables[z_vname][:]

            t_arr = ds.variables[t_vname][t_slice]
            data_arrays = {}
            for varn in var_lst:
                data_arrays[varn] = ds.variables[varn][t_slice]

            max_rec = get_safe(config, 'max_records', 1)
            #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')

            stream_def = get_safe(config, 'stream_def')

            cnt = calculate_iteration_count(t_arr.size, max_rec)
            for x in xrange(cnt):
                ta = t_arr[x * max_rec:(x + 1) * max_rec]

                # Make a 'master' RecDict
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                # Assign coordinate values to the RecDict
                rdt[x_vname] = lon
                rdt[y_vname] = lat
                rdt[z_vname] = z

                # Assign data values to the RecDict
                rdt[t_vname] = ta
                for key, arr in data_arrays.iteritems():
                    d = arr[x * max_rec:(x + 1) * max_rec]
                    rdt[key] = d

                g = rdt.to_granule()
                yield g

            ds.close()
Example #18
0
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        parser_mod = get_safe(config, 'parser_mod', '')
        parser_cls = get_safe(config, 'parser_cls', '')

        module = __import__(parser_mod, fromlist=[parser_cls])
        classobj = getattr(module, parser_cls)

        for f in new_flst:
            try:
                size = os.stat(f[0]).st_size
                try:
                    #find the new data check index in config
                    index = -1
                    for ndc in config['set_new_data_check']:
                        if ndc[0] == f[0]:
                            index = config['set_new_data_check'].index(ndc)
                            break
                except:
                    log.error('File name not found in attachment')

                parser = classobj(f[0], f[3])

                max_rec = get_safe(config, 'max_records', 1)
                stream_def = get_safe(config, 'stream_def')
                while True:
                    particles = parser.get_records(max_count=max_rec)
                    if not particles:
                        break

                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    populate_rdt(rdt, particles)

                    g = rdt.to_granule()

                    # TODO: record files already read for future additions...
                    #                    #update new data check with the latest file position
                    if 'set_new_data_check' in config and index > -1:
                        # WRONG: should only record this after file finished parsing,
                        # but may not have another yield at that point to trigger update
                        config['set_new_data_check'][index] = (f[0], f[1],
                                                               f[2], size)

                    yield g

#                parser.close()

            except Exception as ex:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
    def get_visualization_image(self, data_product_id='', visualization_parameters=None, callback=''):

        # Error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the retrieval related parameters. Definitely init all parameters first
        query = None
        if visualization_parameters :
            query = {'parameters':[]}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')
                query['parameters'] = visualization_parameters['parameters']

            if 'stride_time' in visualization_parameters:
                query['stride_time'] = visualization_parameters['stride_time']
            if 'start_time' in visualization_parameters:
                query['start_time'] = visualization_parameters['start_time']
            if 'end_time' in visualization_parameters:
                query['end_time'] = visualization_parameters['end_time']

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, True)
        if ds_ids is None or not ds_ids:
            return None

        # Ideally just need the latest granule to figure out the list of images
        #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
        retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query)

        if retrieved_granule is None:
            return None

        # send the granule through the transform to get the matplotlib graphs
        mpl_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True)
        mpl_stream_def = self.clients.pubsub_management.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id)
        mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=visualization_parameters, params=mpl_stream_def)

        if mpl_data_granule == None:
            return None

        mpl_rdt = RecordDictionaryTool.load_from_granule(mpl_data_granule)

        ret_dict = dict()
        ret_dict['content_type'] = (get_safe(mpl_rdt, "content_type"))[0]
        ret_dict['image_name'] = (get_safe(mpl_rdt, "image_name"))[0]
        # reason for encoding as base64 string is otherwise message pack complains about the bit stream
        ret_dict['image_obj'] = base64.encodestring((get_safe(mpl_rdt, "image_obj"))[0])

        if callback == '':
            return ret_dict
        else:
            return callback + "(" + simplejson.dumps(ret_dict) + ")"
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, "constraints.new_files", [])
        parser_mod = get_safe(config, "parser_mod", "")
        parser_cls = get_safe(config, "parser_cls", "")

        module = __import__(parser_mod, fromlist=[parser_cls])
        classobj = getattr(module, parser_cls)

        for f in new_flst:
            try:
                size = os.stat(f[0]).st_size
                try:
                    # find the new data check index in config
                    index = -1
                    for ndc in config["set_new_data_check"]:
                        if ndc[0] == f[0]:
                            index = config["set_new_data_check"].index(ndc)
                            break
                except:
                    log.error("File name not found in attachment")

                parser = classobj(f[0], f[3])

                max_rec = get_safe(config, "max_records", 1)
                stream_def = get_safe(config, "stream_def")
                while True:
                    particles = parser.get_records(max_count=max_rec)
                    if not particles:
                        break

                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    populate_rdt(rdt, particles)

                    g = rdt.to_granule()

                    # TODO: record files already read for future additions...
                    #                    #update new data check with the latest file position
                    if "set_new_data_check" in config and index > -1:
                        # WRONG: should only record this after file finished parsing,
                        # but may not have another yield at that point to trigger update
                        config["set_new_data_check"][index] = (f[0], f[1], f[2], size)

                    yield g

            #                parser.close()

            except Exception as ex:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error("Error parsing data file '{0}': {1}".format(f, ex))
Example #21
0
    def __init__(self,
                 datastore_name=None,
                 host=None,
                 port=None,
                 username=None,
                 password=None,
                 config=None,
                 newlog=None,
                 scope=None,
                 **kwargs):
        """
        @param datastore_name  Name of datastore within server. Should be scoped by caller with sysname
        @param config  A standard config dict with connection params
        @param scope  Identifier to prefix the datastore name (e.g. sysname)
        """
        global log
        if newlog:
            log = newlog

        # Connection
        self.host = host or get_safe(config,
                                     'server.couchdb.host') or 'localhost'
        self.port = port or get_safe(config, 'server.couchdb.port') or 5984
        self.username = username or get_safe(config, 'server.couchdb.username')
        self.password = password or get_safe(config, 'server.couchdb.password')
        if self.username and self.password:
            connection_str = "http://%s:%s@%s:%s" % (
                self.username, self.password, self.host, self.port)
            log.debug(
                "Using username:password authentication to connect to datastore"
            )
        else:
            connection_str = "http://%s:%s" % (self.host, self.port)

        # TODO: Potential security risk to emit password into log.
        log.info('Connecting to CouchDB server: %s' % connection_str)
        self.server = couchdb.Server(connection_str)

        self._datastore_cache = {}

        # Datastore (couch database) handling. Scope with given scope (sysname) and make all lowercase
        self.scope = scope
        if self.scope:
            self.datastore_name = ("%s_%s" % (self.scope, datastore_name)
                                   ).lower() if datastore_name else None
        else:
            self.datastore_name = datastore_name.lower(
            ) if datastore_name else None

        # Just to test existence of the datastore
        if self.datastore_name:
            try:
                ds, _ = self._get_datastore()
            except NotFound:
                self.create_datastore()
                ds, _ = self._get_datastore()
    def validate_messages(self, msgs):
        msg = msgs

        rdt = RecordDictionaryTool.load_from_granule(msg.body)

        vardict = {}
        vardict['temp'] = get_safe(rdt, 'temp')
        vardict['time'] = get_safe(rdt, 'time')
        print vardict['time']
        print vardict['temp']
Example #23
0
    def _new_data_constraints(cls, config):
        """
        Returns a constraints dictionary with
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        """
        #TODO: Sort out what the config needs to look like - dataset_in??
        ext_dset_res = get_safe(config, 'external_dataset_res', None)
        log.debug('ExternalDataset Resource: {0}'.format(ext_dset_res))
        if ext_dset_res:
            #TODO: Use the external dataset resource to determine what data is new (i.e. pull 'old' fingerprint from here)
            log.debug('ext_dset_res.dataset_description = {0}'.format(ext_dset_res.dataset_description))
            log.debug('ext_dset_res.update_description = {0}'.format(ext_dset_res.update_description))
#            base_fingerprint = ext_dset_res.update_description
            base_nd_check = get_safe(ext_dset_res.update_description.parameters,'new_data_check')
#            base_nd_check = '\x83\xa7content\xdc\x00\xc9\xceM\xa0\xf3\x00\xceM\xa2D\x80\xceM\xa3\x96\x00\xceM\xa4\xe7\x80\xceM\xa69\x00\xceM\xa7\x8a\x80\xceM\xa8\xdc\x00\xceM\xaa-\x80\xceM\xab\x7f\x00\xceM\xac\xd0\x80\xceM\xae"\x00\xceM\xafs\x80\xceM\xb0\xc5\x00\xceM\xb2\x16\x80\xceM\xb3h\x00\xceM\xb4\xb9\x80\xceM\xb6\x0b\x00\xceM\xb7\\\x80\xceM\xb8\xae\x00\xceM\xb9\xff\x80\xceM\xbbQ\x00\xceM\xbc\xa2\x80\xceM\xbd\xf4\x00\xceM\xbfE\x80\xceM\xc0\x97\x00\xceM\xc1\xe8\x80\xceM\xc3:\x00\xceM\xc4\x8b\x80\xceM\xc5\xdd\x00\xceM\xc7.\x80\xceM\xc8\x80\x00\xceM\xc9\xd1\x80\xceM\xcb#\x00\xceM\xcct\x80\xceM\xcd\xc6\x00\xceM\xcf\x17\x80\xceM\xd0i\x00\xceM\xd1\xba\x80\xceM\xd3\x0c\x00\xceM\xd4]\x80\xceM\xd5\xaf\x00\xceM\xd7\x00\x80\xceM\xd8R\x00\xceM\xd9\xa3\x80\xceM\xda\xf5\x00\xceM\xdcF\x80\xceM\xdd\x98\x00\xceM\xde\xe9\x80\xceM\xe0;\x00\xceM\xe1\x8c\x80\xceM\xe2\xde\x00\xceM\xe4/\x80\xceM\xe5\x81\x00\xceM\xe6\xd2\x80\xceM\xe8$\x00\xceM\xe9u\x80\xceM\xea\xc7\x00\xceM\xec\x18\x80\xceM\xedj\x00\xceM\xee\xbb\x80\xceM\xf0\r\x00\xceM\xf1^\x80\xceM\xf2\xb0\x00\xceM\xf4\x01\x80\xceM\xf5S\x00\xceM\xf6\xa4\x80\xceM\xf7\xf6\x00\xceM\xf9G\x80\xceM\xfa\x99\x00\xceM\xfb\xea\x80\xceM\xfd<\x00\xceM\xfe\x8d\x80\xceM\xff\xdf\x00\xceN\x010\x80\xceN\x02\x82\x00\xceN\x03\xd3\x80\xceN\x05%\x00\xceN\x06v\x80\xceN\x07\xc8\x00\xceN\t\x19\x80\xceN\nk\x00\xceN\x0b\xbc\x80\xceN\r\x0e\x00\xceN\x0e_\x80\xceN\x0f\xb1\x00\xceN\x11\x02\x80\xceN\x12T\x00\xceN\x13\xa5\x80\xceN\x14\xf7\x00\xceN\x16H\x80\xceN\x17\x9a\x00\xceN\x18\xeb\x80\xceN\x1a=\x00\xceN\x1b\x8e\x80\xceN\x1c\xe0\x00\xceN\x1e1\x80\xceN\x1f\x83\x00\xceN \xd4\x80\xceN"&\x00\xceN#w\x80\xceN$\xc9\x00\xceN&\x1a\x80\xceN\'l\x00\xceN(\xbd\x80\xceN*\x0f\x00\xceN+`\x80\xceN,\xb2\x00\xceN.\x03\x80\xceN/U\x00\xceN0\xa6\x80\xceN1\xf8\x00\xceN3I\x80\xceN4\x9b\x00\xceN5\xec\x80\xceN7>\x00\xceN8\x8f\x80\xceN9\xe1\x00\xceN;2\x80\xceN<\x84\x00\xceN=\xd5\x80\xceN?\'\x00\xceN@x\x80\xceNA\xca\x00\xceNC\x1b\x80\xceNDm\x00\xceNE\xbe\x80\xceNG\x10\x00\xceNHa\x80\xceNI\xb3\x00\xceNK\x04\x80\xceNLV\x00\xceNM\xa7\x80\xceNN\xf9\x00\xceNPJ\x80\xceNQ\x9c\x00\xceNR\xed\x80\xceNT?\x00\xceNU\x90\x80\xceNV\xe2\x00\xceNX3\x80\xceNY\x85\x00\xceNZ\xd6\x80\xceN\\(\x00\xceN]y\x80\xceN^\xcb\x00\xceN`\x1c\x80\xceNan\x00\xceNb\xbf\x80\xceNd\x11\x00\xceNeb\x80\xceNf\xb4\x00\xceNh\x05\x80\xceNiW\x00\xceNj\xa8\x80\xceNk\xfa\x00\xceNmK\x80\xceNn\x9d\x00\xceNo\xee\x80\xceNq@\x00\xceNr\x91\x80\xceNs\xe3\x00\xceNu4\x80\xceNv\x86\x00\xceNw\xd7\x80\xceNy)\x00\xceNzz\x80\xceN{\xcc\x00\xceN}\x1d\x80\xceN~o\x00\xceN\x7f\xc0\x80\xceN\x81\x12\x00\xceN\x82c\x80\xceN\x83\xb5\x00\xceN\x85\x06\x80\xceN\x86X\x00\xceN\x87\xa9\x80\xceN\x88\xfb\x00\xceN\x8aL\x80\xceN\x8b\x9e\x00\xceN\x8c\xef\x80\xceN\x8eA\x00\xceN\x8f\x92\x80\xceN\x90\xe4\x00\xceN\x925\x80\xceN\x93\x87\x00\xceN\x94\xd8\x80\xceN\x96*\x00\xceN\x97{\x80\xceN\x98\xcd\x00\xceN\x9a\x1e\x80\xceN\x9bp\x00\xceN\x9c\xc1\x80\xceN\x9e\x13\x00\xceN\x9fd\x80\xceN\xa0\xb6\x00\xceN\xa2\x07\x80\xceN\xa3Y\x00\xceN\xa4\xaa\x80\xceN\xa5\xfc\x00\xceN\xa7M\x80\xceN\xa8\x9f\x00\xa6header\x83\xa2nd\x01\xa5shape\x91\xcc\xc9\xa4type\xa5int32\xad__ion_array__\xc3'

#            log.warn(base_nd_check)

#
#
#
#            log.warn(old_arr)


            t_slice = slice(None)
            if base_nd_check:
                t_new_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
                t_new_arr = ds.variables[t_new_vname][t_slice]

                new_data = msgpack.packb(t_new_arr, default=encode_ion)
                if new_data != base_nd_check:
                    #new time data has arrived, figure out what's different and build the new slice
                    first_index = -1
                    last_index = -1
                    t_old_arr = msgpack.unpackb(base_nd_check, object_hook=decode_ion)
                    for old_data in t_old_arr:
                        if not old_data in t_new_arr:
                            if first_index == -1:
                                first_index = np.nonzero(t_new_arr == old_data)[0][0]
                                last_index = np.nonzero(t_new_arr == old_data)[0][0]
                            else:
                                last_index = np.nonzero(t_new_arr == old_data)[0][0]

                    t_slice = slice(first_index, last_index)


                #TG: Get new temporal data and encode it
                #TG: Compare the old with the new, if different, decode old and sort out what's different
                #TG: Build appropriate temproral_slice

            return {
                'temporal_slice':t_slice
            }

        return None
Example #24
0
    def get_datastore(self, ds_name, profile=DataStore.DS_PROFILE.BASIC, config=None):
        """
        Factory method to get a datastore instance from given name, profile and config.
        This is the central point to cache these instances, to decide persistent or mock
        and to force clean the store on first use.
        @param ds_name  Logical name of datastore (will be scoped with sysname)
        @param profile  One of known constants determining the use of the store
        @param config  Override config to use
        """
        assert ds_name, "Must provide ds_name"
        if ds_name in self._datastores:
            log.debug("get_datastore(): Found instance of store '%s'" % ds_name)
            return self._datastores[ds_name]

        scoped_name = DatastoreManager.get_scoped_name(ds_name)

        # Imports here to prevent cyclic module dependency
        from pyon.core.bootstrap import CFG
        config = config or CFG

        if self.persistent is None:
            self.persistent = not bool(get_safe(config, "system.mockdb"))
        if self.force_clean is None:
            self.force_clean = bool(get_safe(config, "system.force_clean"))

        # Create a datastore instance
        log.info("get_datastore(): Create instance of store '%s' {persistent=%s, scoped_name=%s}" % (
            ds_name, self.persistent, scoped_name))
        new_ds = DatastoreManager.get_datastore_instance(ds_name, self.persistent, profile)

        # Clean the store instance
        # TBD: Do we really want to do it here? or make it more manual?
        if self.force_clean:
            log.info("get_datastore(): Force clean store '%s'" % ds_name)
            try:
                new_ds.delete_datastore(scoped_name)
            except NotFound:
                pass

        # Create store if not existing
        if not new_ds.datastore_exists(scoped_name):
            new_ds.create_datastore(scoped_name)
        else:
            if self.persistent:
                # NOTE: This may be expensive if called more than once per container
                # If views exist and are dropped and recreated
                new_ds._define_views(profile=profile, keepviews=True)

        # Set a few standard datastore instance fields
        new_ds.local_name = ds_name
        new_ds.ds_profile = profile

        self._datastores[ds_name] = new_ds

        return new_ds
    def _constraints_for_new_request(cls, config):
        """
        Returns a constraints dictionary with
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        """
        #TODO: Sort out what the config needs to look like - dataset_in??
        ext_dset_res = get_safe(config, 'external_dataset_res', None)
        log.debug('ExternalDataset Resource: {0}'.format(ext_dset_res))
        if ext_dset_res:
            #TODO: Use the external dataset resource to determine what data is new (i.e. pull 'old' fingerprint from here)
            log.debug('ext_dset_res.dataset_description = {0}'.format(ext_dset_res.dataset_description))
            log.debug('ext_dset_res.update_description = {0}'.format(ext_dset_res.update_description))
#            base_fingerprint = ext_dset_res.update_description
            base_nd_check = get_safe(ext_dset_res.update_description.parameters,'new_data_check')
#            base_nd_check = '\x83\xa7content\xdc\x00\xc9\xceM\xa0\xf3\x00\xceM\xa2D\x80\xceM\xa3\x96\x00\xceM\xa4\xe7\x80\xceM\xa69\x00\xceM\xa7\x8a\x80\xceM\xa8\xdc\x00\xceM\xaa-\x80\xceM\xab\x7f\x00\xceM\xac\xd0\x80\xceM\xae"\x00\xceM\xafs\x80\xceM\xb0\xc5\x00\xceM\xb2\x16\x80\xceM\xb3h\x00\xceM\xb4\xb9\x80\xceM\xb6\x0b\x00\xceM\xb7\\\x80\xceM\xb8\xae\x00\xceM\xb9\xff\x80\xceM\xbbQ\x00\xceM\xbc\xa2\x80\xceM\xbd\xf4\x00\xceM\xbfE\x80\xceM\xc0\x97\x00\xceM\xc1\xe8\x80\xceM\xc3:\x00\xceM\xc4\x8b\x80\xceM\xc5\xdd\x00\xceM\xc7.\x80\xceM\xc8\x80\x00\xceM\xc9\xd1\x80\xceM\xcb#\x00\xceM\xcct\x80\xceM\xcd\xc6\x00\xceM\xcf\x17\x80\xceM\xd0i\x00\xceM\xd1\xba\x80\xceM\xd3\x0c\x00\xceM\xd4]\x80\xceM\xd5\xaf\x00\xceM\xd7\x00\x80\xceM\xd8R\x00\xceM\xd9\xa3\x80\xceM\xda\xf5\x00\xceM\xdcF\x80\xceM\xdd\x98\x00\xceM\xde\xe9\x80\xceM\xe0;\x00\xceM\xe1\x8c\x80\xceM\xe2\xde\x00\xceM\xe4/\x80\xceM\xe5\x81\x00\xceM\xe6\xd2\x80\xceM\xe8$\x00\xceM\xe9u\x80\xceM\xea\xc7\x00\xceM\xec\x18\x80\xceM\xedj\x00\xceM\xee\xbb\x80\xceM\xf0\r\x00\xceM\xf1^\x80\xceM\xf2\xb0\x00\xceM\xf4\x01\x80\xceM\xf5S\x00\xceM\xf6\xa4\x80\xceM\xf7\xf6\x00\xceM\xf9G\x80\xceM\xfa\x99\x00\xceM\xfb\xea\x80\xceM\xfd<\x00\xceM\xfe\x8d\x80\xceM\xff\xdf\x00\xceN\x010\x80\xceN\x02\x82\x00\xceN\x03\xd3\x80\xceN\x05%\x00\xceN\x06v\x80\xceN\x07\xc8\x00\xceN\t\x19\x80\xceN\nk\x00\xceN\x0b\xbc\x80\xceN\r\x0e\x00\xceN\x0e_\x80\xceN\x0f\xb1\x00\xceN\x11\x02\x80\xceN\x12T\x00\xceN\x13\xa5\x80\xceN\x14\xf7\x00\xceN\x16H\x80\xceN\x17\x9a\x00\xceN\x18\xeb\x80\xceN\x1a=\x00\xceN\x1b\x8e\x80\xceN\x1c\xe0\x00\xceN\x1e1\x80\xceN\x1f\x83\x00\xceN \xd4\x80\xceN"&\x00\xceN#w\x80\xceN$\xc9\x00\xceN&\x1a\x80\xceN\'l\x00\xceN(\xbd\x80\xceN*\x0f\x00\xceN+`\x80\xceN,\xb2\x00\xceN.\x03\x80\xceN/U\x00\xceN0\xa6\x80\xceN1\xf8\x00\xceN3I\x80\xceN4\x9b\x00\xceN5\xec\x80\xceN7>\x00\xceN8\x8f\x80\xceN9\xe1\x00\xceN;2\x80\xceN<\x84\x00\xceN=\xd5\x80\xceN?\'\x00\xceN@x\x80\xceNA\xca\x00\xceNC\x1b\x80\xceNDm\x00\xceNE\xbe\x80\xceNG\x10\x00\xceNHa\x80\xceNI\xb3\x00\xceNK\x04\x80\xceNLV\x00\xceNM\xa7\x80\xceNN\xf9\x00\xceNPJ\x80\xceNQ\x9c\x00\xceNR\xed\x80\xceNT?\x00\xceNU\x90\x80\xceNV\xe2\x00\xceNX3\x80\xceNY\x85\x00\xceNZ\xd6\x80\xceN\\(\x00\xceN]y\x80\xceN^\xcb\x00\xceN`\x1c\x80\xceNan\x00\xceNb\xbf\x80\xceNd\x11\x00\xceNeb\x80\xceNf\xb4\x00\xceNh\x05\x80\xceNiW\x00\xceNj\xa8\x80\xceNk\xfa\x00\xceNmK\x80\xceNn\x9d\x00\xceNo\xee\x80\xceNq@\x00\xceNr\x91\x80\xceNs\xe3\x00\xceNu4\x80\xceNv\x86\x00\xceNw\xd7\x80\xceNy)\x00\xceNzz\x80\xceN{\xcc\x00\xceN}\x1d\x80\xceN~o\x00\xceN\x7f\xc0\x80\xceN\x81\x12\x00\xceN\x82c\x80\xceN\x83\xb5\x00\xceN\x85\x06\x80\xceN\x86X\x00\xceN\x87\xa9\x80\xceN\x88\xfb\x00\xceN\x8aL\x80\xceN\x8b\x9e\x00\xceN\x8c\xef\x80\xceN\x8eA\x00\xceN\x8f\x92\x80\xceN\x90\xe4\x00\xceN\x925\x80\xceN\x93\x87\x00\xceN\x94\xd8\x80\xceN\x96*\x00\xceN\x97{\x80\xceN\x98\xcd\x00\xceN\x9a\x1e\x80\xceN\x9bp\x00\xceN\x9c\xc1\x80\xceN\x9e\x13\x00\xceN\x9fd\x80\xceN\xa0\xb6\x00\xceN\xa2\x07\x80\xceN\xa3Y\x00\xceN\xa4\xaa\x80\xceN\xa5\xfc\x00\xceN\xa7M\x80\xceN\xa8\x9f\x00\xa6header\x83\xa2nd\x01\xa5shape\x91\xcc\xc9\xa4type\xa5int32\xad__ion_array__\xc3'

#            log.warn(base_nd_check)

#
#
#
#            log.warn(old_arr)


            t_slice = slice(None)
            if base_nd_check:
                t_new_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
                t_new_arr = ds.variables[t_new_vname][t_slice]

                new_data = msgpack.packb(t_new_arr, default=encode_ion)
                if new_data != base_nd_check:
                    #new time data has arrived, figure out what's different and build the new slice
                    first_index = -1
                    last_index = -1
                    t_old_arr = msgpack.unpackb(base_nd_check, object_hook=decode_ion)
                    for old_data in t_old_arr:
                        if not old_data in t_new_arr:
                            if first_index == -1:
                                first_index = np.nonzero(t_new_arr == old_data)[0][0]
                                last_index = np.nonzero(t_new_arr == old_data)[0][0]
                            else:
                                last_index = np.nonzero(t_new_arr == old_data)[0][0]

                    t_slice = slice(first_index, last_index)


                #TG: Get new temporal data and encode it
                #TG: Compare the old with the new, if different, decode old and sort out what's different
                #TG: Build appropriate temproral_slice

            return {
                'temporal_slice':t_slice
            }

        return None
    def validate_messages(self, msgs):
        msg = msgs


        rdt = RecordDictionaryTool.load_from_granule(msg.body)

        vardict = {}
        vardict['temp'] = get_safe(rdt, 'temp')
        vardict['time'] = get_safe(rdt, 'time')
        print vardict['time']
        print vardict['temp']
Example #27
0
    def _acquire_sample(cls, config, publisher, unlock_new_data_callback,
                        update_new_data_check_attachment):
        """
        Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls:
             BaseDataHandler._constraints_for_new_request (only if config does not contain 'constraints')
             BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter
        @param config Dict containing configuration parameters, may include constraints, formatters, etc
        @param publisher the publisher used to publish data
        @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore
        @param update_new_data_check_attachment classmethod to update the external dataset resources file list attachment
        @throws InstrumentParameterException if the data constraints are not a dictionary
        @retval None
        """
        log.debug('start _acquire_sample: config={0}'.format(config))

        cls._init_acquisition_cycle(config)

        constraints = get_safe(config, 'constraints')
        if not constraints:
            gevent.getcurrent().link(unlock_new_data_callback)
            try:
                constraints = cls._constraints_for_new_request(config)
            except NoNewDataWarning:
                #log.info(nndw.message)
                if get_safe(config, 'TESTING'):
                    #log.debug('Publish TestingFinished event')
                    pub = EventPublisher('DeviceCommonLifecycleEvent')
                    pub.publish_event(origin='BaseDataHandler._acquire_sample',
                                      description='TestingFinished')
                return

            if constraints is None:
                raise InstrumentParameterException(
                    "Data constraints returned from _constraints_for_new_request cannot be None"
                )
            config['constraints'] = constraints
        elif isinstance(constraints, dict):
            addnl_constr = cls._constraints_for_historical_request(config)
            if not addnl_constr is None and isinstance(addnl_constr, dict):
                constraints.update(addnl_constr)
        else:
            raise InstrumentParameterException(
                'Data constraints must be of type \'dict\':  {0}'.format(
                    constraints))

        cls._publish_data(publisher, cls._get_data(config), config,
                          update_new_data_check_attachment)

        # Publish a 'TestFinished' event
        if get_safe(config, 'TESTING'):
            #log.debug('Publish TestingFinished event')
            pub = EventPublisher(OT.DeviceCommonLifecycleEvent)
            pub.publish_event(origin='BaseDataHandler._acquire_sample',
                              description='TestingFinished')
Example #28
0
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        count = get_safe(config, 'constraints.count',1)
        array_len = get_safe(config, 'constraints.array_len',1)

        for i in xrange(count):
            time.sleep(0.1)
            yield npr.random_sample(array_len)
    def validate_messages(self, msgs):

        cc = self.container
        assertions = self.assertTrue

        rdt = RecordDictionaryTool.load_from_granule(msgs.body)

        vardict = {}
        vardict['temp'] = get_safe(rdt, 'temp')
        vardict['time'] = get_safe(rdt, 'time')
        print vardict['time']
        print vardict['temp']
Example #30
0
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        process_instance = self._create_process_instance(process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or name
        log.debug("Stream Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        process_instance.stream_subscriber = StreamSubscriber(process=process_instance, exchange_name=listen_name, callback=process_instance.call_process)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance, publish_streams)

        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=process_instance.id,
                                               process=process_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            self._cleanup_method(process_instance.id, rsvc)
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc, process_instance.stream_subscriber],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % process_instance._proc_name)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance
Example #31
0
    def __init__(
        self,
        datastore_name=None,
        host=None,
        port=None,
        username=None,
        password=None,
        config=None,
        newlog=None,
        scope=None,
        **kwargs
    ):
        """
        @param datastore_name  Name of datastore within server. Should be scoped by caller with sysname
        @param config  A standard config dict with connection params
        @param scope  Identifier to prefix the datastore name (e.g. sysname)
        """
        global log
        if newlog:
            log = newlog

        # Connection
        self.host = host or get_safe(config, "server.couchdb.host") or "localhost"
        self.port = port or get_safe(config, "server.couchdb.port") or 5984
        self.username = username or get_safe(config, "server.couchdb.username")
        self.password = password or get_safe(config, "server.couchdb.password")
        if self.username and self.password:
            connection_str = "http://%s:%s@%s:%s" % (self.username, self.password, self.host, self.port)
            log.debug("Using username:password authentication to connect to datastore")
        else:
            connection_str = "http://%s:%s" % (self.host, self.port)

        # TODO: Potential security risk to emit password into log.
        log.info("Connecting to CouchDB server: %s" % connection_str)
        self.server = couchdb.Server(connection_str)

        self._datastore_cache = {}

        # Datastore (couch database) handling. Scope with given scope (sysname) and make all lowercase
        self.scope = scope
        if self.scope:
            self.datastore_name = ("%s_%s" % (self.scope, datastore_name)).lower() if datastore_name else None
        else:
            self.datastore_name = datastore_name.lower() if datastore_name else None

        # Just to test existence of the datastore
        if self.datastore_name:
            try:
                ds, _ = self._get_datastore()
            except NotFound:
                self.create_datastore()
                ds, _ = self._get_datastore()
    def execute(self, granule):
        """
        Example process to double the salinity value
        """
        # Use the PointSupplementStreamParser to pull data from a granule
        #psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=packet)
        rdt = RecordDictionaryTool.load_from_granule(granule)

        salinity = get_safe(rdt, 'salinity')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')
        height = get_safe(rdt, 'height')
#        #  pull data from a granule
#        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)
#
#        longitude = psd.get_values('longitude')
#        latitude = psd.get_values('latitude')
#        height = psd.get_values('height')
#        time = psd.get_values('time')

#        salinity = psd.get_values('salinity')

        salinity *= 2.0

        print ('Doubled salinity: %s' % str(salinity))


        # Use the constructor to put data into a granule
#        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
#
#        for i in xrange(len(salinity)):
#            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
#            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='salinity', value=salinity[i])
#
#        return psc.close_stream_granule()
        root_rdt = RecordDictionaryTool(taxonomy=self.tx)

        #data_rdt = RecordDictionaryTool(taxonomy=self.tx)
        #coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        root_rdt['salinity'] = salinity
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

        #root_rdt['coordinates'] = coord_rdt
        #root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L2_salinity', taxonomy=self.tx, record_dictionary=root_rdt)
Example #33
0
    def get_server_config(cls, config=None):
        default_server = get_safe(config, "container.datastore.default_server", "postgresql")

        server_cfg = get_safe(config, "server.%s" % default_server, None)
        if not server_cfg:
            # Support tests that mock out the CFG
            pg_cfg = get_safe(config, "server.postgresql", None)
            if pg_cfg:
                server_cfg = pg_cfg
            else:
                raise BadRequest("No datastore config available!")

        return server_cfg
Example #34
0
    def _constraints_for_new_request(cls, config):
        """
        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        @retval constraints dictionary
        """
        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        #Not exactly the prettiest method, but here goes:
        #old_list comes in as a list of lists: [[]]
        #curr_list comes in as a list of tuples: [()]
        #each needs to be a set of tuples for set.difference to work properly
        #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        #files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0],
                                                 date_extraction_pattern,
                                                 date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.debug('constraints_for_new_request: {0}'.format(ret))

        return ret
    def on_init(self):
        self.create_workflow_timeout = get_safe(self.CFG, 'create_workflow_timeout', 60)
        self.terminate_workflow_timeout = get_safe(self.CFG, 'terminate_workflow_timeout', 60)
#        self.monitor_timeout = get_safe(self.CFG, 'user_queue_monitor_timeout', 300)
#        self.monitor_queue_size = get_safe(self.CFG, 'user_queue_monitor_size', 100)
#
#        #Setup and event object for use by the queue monitoring greenlet
#        self.monitor_event = gevent.event.Event()
#        self.monitor_event.clear()
#
#
#        #Start up queue monitor
#        self._process.thread_manager.spawn(self.user_vis_queue_monitor)
        return
Example #36
0
    def get_server_config(cls, config=None):
        default_server = get_safe(config, "container.datastore.default_server",
                                  "postgresql")

        server_cfg = get_safe(config, "server.%s" % default_server, None)
        if not server_cfg:
            # Support tests that mock out the CFG
            pg_cfg = get_safe(config, "server.postgresql", None)
            if pg_cfg:
                server_cfg = pg_cfg
            else:
                raise BadRequest("No datastore config available!")

        return server_cfg
Example #37
0
    def validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:
            rdt = RecordDictionaryTool.load_from_granule(message)

            try:
                temp = get_safe(rdt, 'temp')
            #                psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message)
            #                temp = psd.get_values('temperature')
            #                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info( 'temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                #psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)
                #log.info( psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                #assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = get_safe(rdt, 'salinity')
                #salinity = psd.get_values('salinity')
                log.info( 'salinity=' + str(numpy.nanmin(salinity)))

                # Check to see if salinity has values
                assertions(salinity != None)

                assertions(isinstance(salinity, numpy.ndarray))
                assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(len(first_salinity_values) == len(second_salinity_values))
                    for idx in range(0,len(first_salinity_values)):
                        assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])
    def _acquire_sample(cls, config, publisher, unlock_new_data_callback, update_new_data_check_attachment):
        """
        Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls:
             BaseDataHandler._constraints_for_new_request (only if config does not contain 'constraints')
             BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter
        @param config Dict containing configuration parameters, may include constraints, formatters, etc
        @param publisher the publisher used to publish data
        @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore
        @param update_new_data_check_attachment classmethod to update the external dataset resources file list attachment
        @throws InstrumentParameterException if the data constraints are not a dictionary
        @retval None
        """
        log.debug('start _acquire_sample: config={0}'.format(config))

        cls._init_acquisition_cycle(config)

        constraints = get_safe(config, 'constraints')
        if not constraints:
            gevent.getcurrent().link(unlock_new_data_callback)
            try:
                constraints = cls._constraints_for_new_request(config)
            except NoNewDataWarning:
                #log.info(nndw.message)
                if get_safe(config, 'TESTING'):
                    #log.debug('Publish TestingFinished event')
                    pub = EventPublisher('DeviceCommonLifecycleEvent')
                    pub.publish_event(origin='BaseDataHandler._acquire_sample', description='TestingFinished')
                return

            if constraints is None:
                raise InstrumentParameterException("Data constraints returned from _constraints_for_new_request cannot be None")
            config['constraints'] = constraints
        elif isinstance(constraints, dict):
            addnl_constr = cls._constraints_for_historical_request(config)
            if not addnl_constr is None and isinstance(addnl_constr, dict):
                constraints.update(addnl_constr)
        else:
            raise InstrumentParameterException('Data constraints must be of type \'dict\':  {0}'.format(constraints))

        cls._publish_data(publisher, cls._get_data(config))

        if 'set_new_data_check' in config:
            update_new_data_check_attachment(config['external_dataset_res_id'], config['set_new_data_check'])

        # Publish a 'TestFinished' event
        if get_safe(config, 'TESTING'):
            #log.debug('Publish TestingFinished event')
            pub = EventPublisher('DeviceCommonLifecycleEvent')
            pub.publish_event(origin='BaseDataHandler._acquire_sample', description='TestingFinished')
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        #todo: use only flat dicts for now, may change later...
#        rdt0 = rdt['coordinates']
#        rdt1 = rdt['data']

        temperature = get_safe(rdt, 'temp')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')
        height = get_safe(rdt, 'height')

        log.warn('Got temperature: %s' % str(temperature))


        # The L1 temperature data product algorithm takes the L0 temperature data product and converts it into Celcius.
        # Once the hexadecimal string is converted to decimal, only scaling (dividing by a factor and adding an offset) is
        # required to produce the correct decimal representation of the data in Celsius.
        # The scaling function differs by CTD make/model as described below.
        #    SBE 37IM, Output Format 0
        #    1) Standard conversion from 5-character hex string (Thex) to decimal (tdec)
        #    2) Scaling: T [C] = (tdec / 10,000) - 10

        root_rdt = RecordDictionaryTool(param_dictionary=self.temp)

        #todo: use only flat dicts for now, may change later...
#        data_rdt = RecordDictionaryTool(taxonomy=self.tx)
#        coord_rdt = RecordDictionaryTool(taxonomy=self.tx)

        scaled_temperature = temperature

        for i in xrange(len(temperature)):
            scaled_temperature[i] = ( temperature[i] / 10000.0) - 10

        root_rdt['temp'] = scaled_temperature
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude
        root_rdt['height'] = height

        #todo: use only flat dicts for now, may change later...
#        root_rdt['coordinates'] = coord_rdt
#        root_rdt['data'] = data_rdt

        return build_granule(data_producer_id='ctd_L1_temperature', param_dictionary=self.temp, record_dictionary=root_rdt)
Example #40
0
    def _constraints_for_new_request(cls, config):
        """
        Returns a constraints dictionary with
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        @retval dict that contains the constraints for retrieval of new data from the external dataset or None
        """
        #TODO: Sort out what the config needs to look like - dataset_in??
        ext_dset_res = get_safe(config, 'external_dataset_res', None)
        #log.debug('ExternalDataset Resource: {0}'.format(ext_dset_res))
        if ext_dset_res:
            #TODO: Use the external dataset resource to determine what data is new (i.e. pull 'old' fingerprint from here)
            #log.debug('ext_dset_res.dataset_description = {0}'.format(ext_dset_res.dataset_description))
            #log.debug('ext_dset_res.update_description = {0}'.format(ext_dset_res.update_description))

            # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
            ds = get_safe(config, 'dataset_object')

            base_nd_check = get_safe(
                ext_dset_res.update_description.parameters, 'new_data_check')

            t_slice = slice(None)
            if base_nd_check:
                t_new_vname = ext_dset_res.dataset_description.parameters[
                    'temporal_dimension']
                t_new_arr = ds.variables[t_new_vname][t_slice]

                new_data = msgpack.packb(t_new_arr, default=encode_ion)
                if new_data != base_nd_check:
                    #new time data has arrived, figure out what's different and build the new slice
                    first_index = -1
                    last_index = -1
                    t_old_arr = msgpack.unpackb(base_nd_check,
                                                object_hook=decode_ion)
                    for new_data in t_new_arr:
                        if not new_data in t_old_arr:
                            if first_index == -1:
                                first_index = np.nonzero(
                                    t_new_arr == new_data)[0][0]
                                last_index = np.nonzero(
                                    t_new_arr == new_data)[0][0]
                            else:
                                last_index = np.nonzero(
                                    t_new_arr == new_data)[0][0]

                    t_slice = slice(first_index, last_index)

            return {'temporal_slice': t_slice}

        return None
Example #41
0
    def _constraints_for_new_request(cls, config):
#        """
#        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
#        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
#        """
#        # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen
#        max_rec = get_safe(config, 'max_records', 1)
#        return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],}

        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config,'ds_params.base_url')
        list_pattern = get_safe(config,'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        #Not exactly the prettiest method, but here goes:
        #old_list comes in as a list of lists: [[]]
        #curr_list comes in as a list of tuples: [()]
        #each needs to be a set of tuples for set.difference to work properly
        #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        #files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.debug('constraints_for_new_request: {0}'.format(ret))

        return ret
Example #42
0
    def _spawn_stream_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)

        listen_name = get_safe(config, "process.listen_name") or name
        service_instance._proc_listen_name = listen_name

        service_instance.stream_subscriber_registrar = StreamSubscriberRegistrar(
            process=service_instance, container=self.container)
        sub = service_instance.stream_subscriber_registrar.create_subscriber(
            exchange_name=listen_name)

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc, sub],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_stream_process for %s" % service_instance._proc_name)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        self._service_init(service_instance)
        self._service_start(service_instance)

        proc.start_listeners()

        return service_instance
    def execute(input=None, context=None, config=None, params=None, state=None):
        log.debug('Matplotlib transform: Received Viz Data Packet')
        stream_definition_id = params

        # parse the incoming data
        rdt = RecordDictionaryTool.load_from_granule(input)

        # build a list of fields/variables that need to be plotted. Use the list provided by the UI
        # since the retrieved granule might have extra fields. Why ? Ans : Bugs, baby, bugs !
        fields = []
        if config:
            if config['parameters']:
                fields = config['parameters']
        else:
            fields = rdt.fields

        vardict = {}
        vardict['time'] = get_safe(rdt, 'time')
        if vardict['time'] == None:
            log.error("Matplotlib transform: Did not receive a time field to work with")
            return None

        for field in fields:
            if field == 'time':
                continue

            vardict[field] = get_safe(rdt, field)

            print

        arrLen = len(vardict['time'])
        # init the graph_data structure for storing values
        graph_data = {}
        for varname in vardict.keys():
            graph_data[varname] = []

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        for varname in vardict.keys():  # psd.list_field_names():
            if vardict[varname] == None:
                # create an array of zeros to compensate for missing values
                graph_data[varname].extend([0.0]*arrLen)
            else:
                graph_data[varname].extend(vardict[varname])

        out_granule = VizTransformMatplotlibGraphsAlgorithm.render_graphs(graph_data, stream_definition_id)

        return out_granule
Example #44
0
    def execute_resource(self, resource_id='', command=None):
        """Execute command on the resource represented by agent.
        """
        res_type = self._get_resource_type(resource_id)
        if self._has_agent(res_type):
            rac = ResourceAgentClient(resource_id=resource_id)
            return rac.execute_resource(resource_id=resource_id,
                                        command=command)

        cmd_res = None
        res_interface = self._get_type_interface(res_type)

        target = get_safe(res_interface,
                          "commands.%s.execute" % command.command, None)
        if target:
            res = self._call_execute(target, resource_id, res_type,
                                     command.args, command.kwargs)
            cmd_res = AgentCommandResult(command_id=command.command_id,
                                         command=command.command,
                                         ts_execute=get_ion_ts(),
                                         status=0)
        else:
            log.warn("execute_resource(): command %s not defined",
                     command.command)

        return cmd_res
Example #45
0
    def _stop_driver(self):
        """
        Unload the DataHandler instance
        Called from:
                    InstrumentAgent._handler_inactive_reset,
                    InstrumentAgent._handler_idle_reset,
                    InstrumentAgent._handler_stopped_reset,
                    InstrumentAgent._handler_observatory_reset
        @retval None.
        """
        dvr_mod = get_safe(self._dvr_config, 'dvr_mod', None)
        dvr_cls = get_safe(self._dvr_config, 'dvr_cls', None)

        self._dvr_client = None
        log.info('ExternalDatasetAgent \'{0}\' unloaded DataHandler \'{1}.{2}\''.format(self._proc_name,dvr_mod,dvr_cls))
        return None
Example #46
0
    def _spawn_simple_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        process_instance = self._create_process_instance(
            process_id, name, module, cls, config)
        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance,
                                                  publish_streams)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_simple_process for %s" % process_instance.id)

        self._process_init(process_instance)
        self._process_start(process_instance)

        return process_instance
Example #47
0
    def _spawn_service_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        listen_name = get_safe(config,
                               "process.listen_name") or service_instance.name
        log.debug("Service Process (%s) listen_name: %s", name, listen_name)

        self._set_service_endpoint(service_instance, listen_name)
        self._set_service_endpoint(service_instance, service_instance.id)

        # Directory registration
        self.container.directory.register_safe("/Services",
                                               listen_name,
                                               interface=service_instance.name)
        self.container.directory.register_safe("/Services/%s" % listen_name,
                                               service_instance.id)

        return service_instance
 def on_init(self):
     self.create_workflow_timeout = get_safe(self.CFG,
                                             'create_workflow_timeout', 60)
     self.terminate_workflow_timeout = get_safe(
         self.CFG, 'terminate_workflow_timeout', 60)
     #        self.monitor_timeout = get_safe(self.CFG, 'user_queue_monitor_timeout', 300)
     #        self.monitor_queue_size = get_safe(self.CFG, 'user_queue_monitor_size', 100)
     #
     #        #Setup and event object for use by the queue monitoring greenlet
     #        self.monitor_event = gevent.event.Event()
     #        self.monitor_event.clear()
     #
     #
     #        #Start up queue monitor
     #        self._process.thread_manager.spawn(self.user_vis_queue_monitor)
     return
Example #49
0
    def _spawn_standalone_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        self._service_init(service_instance)

        self._service_start(service_instance)

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name)
        self.proc_sup.ensure_ready(
            proc, "_spawn_standalone_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Add publishers if any...
        publish_streams = get_safe(config, "process.publish_streams")
        self._set_publisher_endpoints(service_instance, publish_streams)

        return service_instance
Example #50
0
    def validate_highcharts_transform_results(self, results):

        assertions = self.assertTrue

        # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases
        if isinstance(results,Granule):
            results =[results]

        for g in results:

            if isinstance(g,Granule):

                rdt = RecordDictionaryTool.load_from_granule(g)
                hc_data_arr = get_safe(rdt, 'hc_data')

                if hc_data_arr == None:
                    log.debug("hc_data in granule is None")
                    continue

                assertions(len(hc_data_arr) >= 0) # Need to come up with a better check

                hc_data = hc_data_arr[0]
                assertions(len(hc_data) >= 0)

                assertions(len(hc_data[0]["name"]) >= 0)
                assertions(len(hc_data[0]["data"]) >= 0)
Example #51
0
    def execute_resource(self, resource_id='', command=None):
        """Execute command on the resource represented by agent.
        @param resource_id The id of the resource agennt.
        @param command An AgentCommand containing the command.
        @retval result An AgentCommandResult containing the result.
        @throws BadRequest if the command was malformed.
        @throws NotFound if the command is not available in current state.
        @throws ResourceError if the resource produced an error during execution.

        @param resource_id    str
        @param command    AgentCommand
        @retval result    AgentCommandResult
        @throws BadRequest    if the command was malformed.
        @throws NotFound    if the command is not implemented in the agent.
        @throws ResourceError    if the resource produced an error.
        """
        res_type = self._get_resource_type(resource_id)
        if self._has_agent(res_type):
            rac = ResourceAgentClient(resource_id=resource_id)
            return rac.execute_resource(resource_id=resource_id, command=command)

        cmd_res = None
        res_interface = self._get_type_interface(res_type)

        target = get_safe(res_interface, "commands.%s.execute" % command.command, None)
        if target:
            res = self._call_execute(target, resource_id, res_type, command.args, command.kwargs)
            cmd_res = AgentCommandResult(command_id=command.command_id,
                command=command.command,
                ts_execute=get_ion_ts(),
                status=0)
        else:
            log.warn("execute_resource(): command %s not defined", command.command)

        return cmd_res
Example #52
0
    def _get_data(cls, config):
        parser = get_safe(config, 'parser')
        if parser:
            log.warn('Header Info:\n{0}'.format(parser.header_map))
            log.warn('Tables Available: {0}'.format(parser.table_map.keys()))

        return []
    def get_resource(self, resource_id='', params=None):
        """Return the value of the given resource parameter.
        @param resource_id The id of the resource agennt.
        @param params A list of parameters names to query.
        @retval A dict of parameter name-value pairs.
        @throws BadRequest if the command was malformed.
        @throws NotFound if the resource does not support the parameter.

        @param resource_id    str
        @param params    list
        @retval result    AgentCommandResult
        @throws NotFound    if the parameter does not exist.
        """
        res_type = self._get_resource_type(resource_id)
        if self._has_agent(res_type):
            rac = ResourceAgentClient(resource_id=resource_id)
            return rac.get_resource(resource_id=resource_id, params=params)

        res_interface = self._get_type_interface(res_type)

        get_result = {}
        for param in params:
            getter = get_safe(res_interface, "params.%s.get" % param, None)
            if getter:
                get_res = self._call_getter(getter, resource_id, res_type)
                get_result['param'] = get_res
            else:
                get_result['param'] = None

        return get_result
Example #54
0
    def _create_driver_plugin(self):
        try:
            # Ensure the egg cache directory exists. ooi.reflections will fail
            # somewhat silently when this directory doesn't exists.
            if not os.path.isdir(EGG_CACHE_DIR):
                os.makedirs(EGG_CACHE_DIR)

            log.debug("getting plugin config")
            uri = get_safe(self._dvr_config, 'dvr_egg')
            module_name = self._dvr_config['dvr_mod']
            class_name = self._dvr_config['dvr_cls']
            config = self._dvr_config['startup_config']
        except:
            log.error('error in configuration', exc_info=True)
            raise

        egg_name = None
        egg_repo = None
        memento = self._get_state(DSA_STATE_KEY)

        log.warn("Get driver object: %s, %s, %s, %s", class_name, module_name,
                 egg_name, egg_repo)
        if uri:
            egg_name = uri.split('/')[-1] if uri.startswith('http') else uri
            egg_repo = uri[0:len(uri) - len(egg_name) -
                           1] if uri.startswith('http') else None

        log.info("instantiate driver plugin %s.%s", module_name, class_name)
        params = [
            config, memento, self.publish_callback,
            self.persist_state_callback, self.exception_callback
        ]
        return EGG_CACHE.get_object(class_name, module_name, egg_name,
                                    egg_repo, params)
    def set_resource(self, resource_id='', params=None):
        """Set the value of the given resource parameters.
        @param resource_id The id of the resource agent.
        @param params A dict of resource parameter name-value pairs.
        @throws BadRequest if the command was malformed.
        @throws NotFound if a parameter is not supported by the resource.
        @throws ResourceError if the resource encountered an error while setting
        the parameters.

        @param resource_id    str
        @param params    dict
        @throws BadRequest    if the command was malformed.
        @throws NotFound    if the parameter does not exist.
        @throws ResourceError    if the resource failed while trying to set the parameter.
        """
        res_type = self._get_resource_type(resource_id)
        if self._has_agent(res_type):
            rac = ResourceAgentClient(resource_id=resource_id)
            return rac.set_resource(resource_id=resource_id, params=params)

        res_interface = self._get_type_interface(res_type)

        for param in params:
            setter = get_safe(res_interface, "params.%s.set" % param, None)
            if setter:
                self._call_setter(setter, params[param], resource_id, res_type)
            else:
                log.warn("set_resource(): param %s not defined", param)
Example #56
0
    def _validate_driver_config(self):
        """
        Verify the agent configuration contains a driver config.  called by uninitialize_initialize handler
        in the IA class
        """
        log.debug("Driver Config: %s", self._dvr_config)
        out = True

        for key in ('startup_config', 'dvr_mod', 'dvr_cls'):
            if key not in self._dvr_config:
                log.error('missing key: %s', key)
                out = False

        for key in ('stream_config', ):
            if key not in self.CFG:
                log.error('missing key: %s', key)
                out = False

        if get_safe(self._dvr_config, 'max_records', 100) < 1:
            log.error(
                'max_records=%d, must be at least 1 or unset (default 100)',
                self.max_records)
            out = False

        return out
 def __init__(self, sysname=None, orgname=None, config=None):
     self.orgname = orgname or get_safe(config, 'system.root_org', 'ION')
     sysname = sysname or get_default_sysname()
     self.datastore_name = "resources"
     self.datastore = DatastoreFactory.get_datastore(datastore_name=self.datastore_name, config=config,
                                                     scope=sysname, profile=DataStore.DS_PROFILE.DIRECTORY,
                                                     variant=DatastoreFactory.DS_BASE)
Example #58
0
    def validate_mpl_graphs_transform_results(self, results):

        cc = self.container
        assertions = self.assertTrue

        # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases
        if isinstance(results, Granule):
            results = [results]

        found_data = False
        for g in results:
            if isinstance(g, Granule):
                rdt = RecordDictionaryTool.load_from_granule(g)

                graphs = get_safe(rdt, 'matplotlib_graphs')

                if graphs == None:
                    continue

                for graph in graphs[0]:

                    # At this point only dictionaries containing image data should be passed
                    # For some reason non dictionary values are filtering through.
                    if not isinstance(graph, dict):
                        continue

                    assertions(
                        graph['viz_product_type'] == 'matplotlib_graphs')
                    # check to see if the list (numpy array) contains actual images
                    assertions(
                        imghdr.what(graph['image_name'], h=graph['image_obj'])
                        == 'png')
                    found_data = True
        return found_data
Example #59
0
    def _spawn_agent_process(self, process_id, name, module, cls, config):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        service_instance = self._create_service_instance(
            process_id, name, module, cls, config)
        if not isinstance(service_instance, ResourceAgent):
            raise ContainerConfigError(
                "Agent process must extend ResourceAgent")

        # Set the resource ID if we get it through the config
        resource_id = get_safe(service_instance.CFG, "agent.resource_id")
        if resource_id:
            service_instance.resource_id = resource_id

        rsvc = ProcessRPCServer(node=self.container.node,
                                from_name=service_instance.id,
                                service=service_instance,
                                process=service_instance)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc)

        proc = self.proc_sup.spawn(name=service_instance.id,
                                   service=service_instance,
                                   listeners=[rsvc],
                                   proc_name=service_instance._proc_name,
                                   cleanup_method=cleanup)
        self.proc_sup.ensure_ready(
            proc, "_spawn_agent_process for %s" % service_instance.id)

        # map gproc to service_instance
        self._spawned_proc_to_process[proc.proc] = service_instance

        # set service's reference to process
        service_instance._process = proc

        # Now call the on_init of the agent.
        self._service_init(service_instance)

        if not service_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._service_start(service_instance)

        proc.start_listeners()

        if service_instance.resource_id:
            # look to load any existing policies for this resource
            if self._is_policy_management_service_available(
            ) and self.container.governance_controller:
                self.container.governance_controller.update_resource_access_policy(
                    service_instance.resource_id)
        else:
            log.warn("Agent process id=%s does not define resource_id!!" %
                     service_instance.id)

        return service_instance
Example #60
0
 def _init_acquisition_cycle(cls, config):
     ext_dset_res = get_safe(config, 'external_dataset_res', None)
     if ext_dset_res:
         ds_url = ext_dset_res.dataset_description.parameters[
             'dataset_path']
         log.debug('Instantiate a SlocumParser for dataset: \'{0}\''.format(
             ds_url))
         config['parser'] = RuvParser(ds_url)