def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list( old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) new_list = [x for x in curr_list if x not in old_list] ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename( new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def _get_data(cls, config): parser = get_safe(config, 'parser', None) ext_dset_res = get_safe(config, 'external_dataset_res', None) if ext_dset_res and parser: #CBM: Not in use yet... # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') tx_yml = get_safe(config, 'taxonomy') ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool cnt = cls._calc_iter_cnt(len(parser.sensor_map), max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(taxonomy=ttool) for name in parser.sensor_map: d = parser.data_map[name][x*max_rec:(x+1)*max_rec] rdt[name]=d g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) yield g else: log.warn('No parser object found in config')
def set_configuration(self, config): log.warn("DRIVER: set_configuration") """ expect configuration to have: - parser module/class - directory, wildcard to find data files - optional timestamp of last granule - optional poll rate - publish info """ log.error("Log level: %s", log.getEffectiveLevel()) log.debug('using configuration: %s', config) self.config = config self.max_records = get_safe(config, 'max_records', 100) self.stream_config = self.CFG.get('stream_config', {}) if len(self.stream_config) == 1: stream_cfg = self.stream_config.values()[0] elif len(self.stream_config) > 1: stream_cfg = self.stream_config.values()[0] stream_id = stream_cfg['stream_id'] stream_route = IonObject(OT.StreamRoute, routing_key=stream_cfg['routing_key'], exchange_point=stream_cfg['exchange_point']) param_dict = stream_cfg['stream_def_dict']['parameter_dictionary'] self.publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) self.parameter_dictionary = ParameterDictionary.load(param_dict) self.time_field = self.parameter_dictionary.get_temporal_context() self.latest_granule_time = get_safe(config, 'last_time', 0)
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list(old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) #compare the last read files (old_list) with the current directory contents (curr_list) #if the file names are the same (curr_file[0] and old_file[0]) check the size of the #current file (curr_file[2]) with the file position when the last file was read (old_file[3]) #if there's more data now that was read last time, add the file to the list new_list = [] for curr_file in curr_list: found = False for old_file in old_list: if curr_file[0] == old_file[0]: #if filenames are the same, that means the file is still in the directory, and was previously read found = True if curr_file[2] > old_file[3]: #f2[2] is the current file size, f2[3] is the last read file size new_list.append((curr_file[0], curr_file[1], curr_file[2], old_file[-1])) #add it in if the current file size is bigger than the last time if not found: new_list.append(curr_file) config['set_new_data_check'] = curr_list ret['new_files'] = new_list ret['bounding_box'] = {} ret['vars'] = [] return ret
def _get_data(cls, config): new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) cnt = calculate_iteration_count(len(parser.sensor_map), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(param_dictionary=pdict) for name in parser.sensor_map: d = parser.data_map[name][x*max_rec:(x+1)*max_rec] rdt[name]=d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict) yield g except SlocumParseException as spe: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def _acquire_data(cls, config, unlock_new_data_callback): """ Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls: BaseDataHandler._new_data_constraints (only if config does not contain 'constraints') BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter @param config Dict containing configuration parameters, may include constraints, formatters, etc @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore """ stream_id = get_safe(config, 'stream_id') if not stream_id: raise ConfigurationError('Configuration does not contain required \'stream_id\' key') #TODO: Configure the publisher publisher=None constraints = get_safe(config,'constraints') if not constraints: gevent.getcurrent().link(unlock_new_data_callback) constraints = cls._new_data_constraints(config) config['constraints']=constraints cls._publish_data(publisher, config, cls._get_data(config)) # Publish a 'TestFinished' event if get_safe(config,'TESTING'): log.debug('Publish TestingFinished event') pub = EventPublisher('DeviceCommonLifecycleEvent') pub.publish_event(origin='BaseDataHandler._acquire_data', description='TestingFinished')
def _constraints_for_new_request(cls, config): old_list = get_safe(config, 'new_data_check') or [] # CBM: Fix this when the DotList crap is sorted out old_list = list(old_list) # NOTE that the internal tuples are also DotList objects ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new new_list = [x for x in curr_list if x not in old_list] if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] return ret
def _get_data(cls, config): parser = get_safe(config, 'parser', None) ext_dset_res = get_safe(config, 'external_dataset_res', None) if ext_dset_res and parser: #CBM: Not in use yet... # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') tx_yml = get_safe(config, 'taxonomy') ttool = TaxyTool.load( tx_yml ) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool cnt = cls._calc_iter_cnt(len(parser.sensor_map), max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(taxonomy=ttool) for name in parser.sensor_map: d = parser.data_map[name][x * max_rec:(x + 1) * max_rec] rdt[name] = d g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) yield g else: log.warn('No parser object found in config')
def _acquire_data(cls, config, publisher, unlock_new_data_callback): """ Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls: BaseDataHandler._new_data_constraints (only if config does not contain 'constraints') BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter @param config Dict containing configuration parameters, may include constraints, formatters, etc @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore """ log.debug('start _acquire_data: config={0}'.format(config)) cls._init_acquisition_cycle(config) constraints = get_safe(config, 'constraints') if not constraints: gevent.getcurrent().link(unlock_new_data_callback) constraints = cls._new_data_constraints(config) if constraints is None: raise InstrumentParameterException( "Data constraints returned from _new_data_constraints cannot be None" ) config['constraints'] = constraints cls._publish_data(publisher, cls._get_data(config)) # Publish a 'TestFinished' event if get_safe(config, 'TESTING'): log.debug('Publish TestingFinished event') pub = EventPublisher('DeviceCommonLifecycleEvent') pub.publish_event(origin='BaseDataHandler._acquire_data', description='TestingFinished')
def _acquire_data(cls, config, publisher, unlock_new_data_callback): """ Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls: BaseDataHandler._new_data_constraints (only if config does not contain 'constraints') BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter @param config Dict containing configuration parameters, may include constraints, formatters, etc @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore """ log.debug('start _acquire_data: config={0}'.format(config)) cls._init_acquisition_cycle(config) constraints = get_safe(config,'constraints') if not constraints: gevent.getcurrent().link(unlock_new_data_callback) constraints = cls._new_data_constraints(config) if constraints is None: raise InstrumentParameterException("Data constraints returned from _new_data_constraints cannot be None") config['constraints'] = constraints cls._publish_data(publisher, cls._get_data(config)) # Publish a 'TestFinished' event if get_safe(config,'TESTING'): log.debug('Publish TestingFinished event') pub = EventPublisher('DeviceCommonLifecycleEvent') pub.publish_event(origin='BaseDataHandler._acquire_data', description='TestingFinished')
def _make_management_call(self, url, method="get", data=None): """ Makes a call to the Rabbit HTTP management API using the passed in HTTP method. """ log.debug("Calling rabbit API management (%s): %s", method, url) meth = getattr(requests, method) try: mgmt_cfg_key = CFG.get_safe("container.messaging.management.server", "rabbit_manage") mgmt_cfg = CFG.get_safe("server." + mgmt_cfg_key) username = get_safe(mgmt_cfg, "username") or "guest" password = get_safe(mgmt_cfg, "password") or "guest" with gevent.timeout.Timeout(10): r = meth(url, auth=(username, password), data=data) r.raise_for_status() if not r.content == "": content = json.loads(r.content) else: content = None except gevent.timeout.Timeout as ex: raise Timeout(str(ex)) except requests.exceptions.Timeout as ex: raise Timeout(str(ex)) except (requests.exceptions.ConnectionError, socket.error) as ex: raise ServiceUnavailable(str(ex)) except requests.exceptions.RequestException as ex: # the generic base exception all requests' exceptions inherit from, raise our # general server error too. raise ServerError(str(ex)) return content
def execute(input=None, context=None, config=None, params=None, state=None, fileName = None): stream_definition_id = params mpl_allowed_numerical_types = ['int32', 'int64', 'uint32', 'uint64', 'float32', 'float64'] if stream_definition_id == None: log.error("Matplotlib transform: Need a output stream definition to process graphs") return None # parse the incoming data rdt = RecordDictionaryTool.load_from_granule(input) # build a list of fields/variables that need to be plotted. Use the list provided by the UI # since the retrieved granule might have extra fields. fields = rdt.fields resolution = "640x480" if config: if 'parameters' in config: fields = config['parameters'] if 'resolution' in config: resolution = config['resolution'] vardict = {} vardict['time'] = get_safe(rdt, 'time') if vardict['time'] == None: print "Matplotlib transform: Did not receive a time field to work with" log.error("Matplotlib transform: Did not receive a time field to work with") return None for field in fields: if field == 'time': continue # only consider fields which are supposed to be numbers. if (rdt[field] != None) and (rdt[field].dtype not in mpl_allowed_numerical_types): continue vardict[field] = get_safe(rdt, field) print arrLen = len(vardict['time']) # init the graph_data structure for storing values graph_data = {} for varname in vardict.keys(): graph_data[varname] = [] # If code reached here, the graph data storage has been initialized. Just add values # to the list for varname in vardict.keys(): # psd.list_field_names(): if vardict[varname] == None: # create an array of zeros to compensate for missing values graph_data[varname].extend([0.0]*arrLen) else: graph_data[varname].extend(vardict[varname]) out_granule = VizTransformMatplotlibGraphsAlgorithm.render_graphs(graph_data, stream_definition_id, fileName, resolution=resolution) return out_granule
def execute_acquire_data(self, *args): """ Creates a copy of self._dh_config, creates a publisher, and spawns a greenlet to perform a data acquisition cycle If the args[0] is a dict, any entries keyed with one of the 'PATCHABLE_CONFIG_KEYS' are used to patch the config Greenlet binds to BaseDataHandler._acquire_data and passes the publisher and config Disallows multiple "new data" (unconstrained) requests using BaseDataHandler._semaphore lock Called from: InstrumentAgent._handler_observatory_execute_resource |--> ExternalDataAgent._handler_streaming_execute_resource @parameter args First argument can be a config dictionary """ log.debug('Executing acquire_data: args = {0}'.format(args)) # Make a copy of the config to ensure no cross-pollution config = self._dh_config.copy() # Patch the config if mods are passed in try: config_mods = args[0] if not isinstance(config_mods, dict): raise IndexError() log.debug('Configuration modifications provided: {0}'.format(config_mods)) for k in self._params['PATCHABLE_CONFIG_KEYS']: p=get_safe(config_mods, k) if not p is None: config[k] = p except IndexError: log.info('No configuration modifications were provided') # Verify that there is a stream_id member in the config stream_id = get_safe(config, 'stream_id') if not stream_id: raise ConfigurationError('Configuration does not contain required \'stream_id\' member') isNew = get_safe(config, 'constraints') is None if isNew and not self._semaphore.acquire(blocking=False): log.warn('Already acquiring new data - action not duplicated') return ndc = None if isNew: # Get the NewDataCheck attachment and add it's content to the config ext_ds_id = get_safe(config,'external_dataset_res_id') if ext_ds_id: ndc = self._find_new_data_check_attachment(ext_ds_id) config['new_data_check'] = ndc # Create a publisher to pass into the greenlet publisher = self._stream_registrar.create_publisher(stream_id=stream_id) # Spawn a greenlet to do the data acquisition and publishing g = spawn(self._acquire_data, config, publisher, self._unlock_new_data_callback, self._update_new_data_check_attachment) log.debug('** Spawned {0}'.format(g)) self._glet_queue.append(g)
def execute(self, granule): """Processes incoming data!!!! """ rdt = RecordDictionaryTool.load_from_granule(granule) #todo: use only flat dicts for now, may change later... # rdt0 = rdt['coordinates'] # rdt1 = rdt['data'] pressure = get_safe(rdt, 'pres') #psd.get_values('conductivity') longitude = get_safe(rdt, 'lon') # psd.get_values('longitude') latitude = get_safe(rdt, 'lat') #psd.get_values('latitude') time = get_safe(rdt, 'time') # psd.get_values('time') height = get_safe(rdt, 'height') # psd.get_values('time') log.warn('Got pressure: %s' % str(pressure)) # L1 # 1) The algorithm input is the L0 pressure data product (p_hex) and, in the case of the SBE 37IM, the pressure range (P_rng) from metadata. # 2) Convert the hexadecimal string to a decimal string # 3) For the SBE 37IM only, convert the pressure range (P_rng) from psia to dbar SBE 37IM # Convert P_rng (input from metadata) from psia to dbar # 4) Perform scaling operation # SBE 37IM # L1 pressure data product (in dbar): # Use the constructor to put data into a granule psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output']) ### Assumes the config argument for output streams is known and there is only one 'output'. ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the ### application level like this! scaled_pressure = pressure for i in xrange(len(pressure)): #todo: get pressure range from metadata (if present) and include in calc scaled_pressure[i] = ( pressure[i]) root_rdt = RecordDictionaryTool(taxonomy=self.tx) #todo: use only flat dicts for now, may change later... # data_rdt = RecordDictionaryTool(taxonomy=self.tx) # coord_rdt = RecordDictionaryTool(taxonomy=self.tx) root_rdt['pres'] = scaled_pressure root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height # root_rdt['coordinates'] = coord_rdt # root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L1_pressure', taxonomy=self.tx, record_dictionary=root_rdt) return psc.close_stream_granule()
def get_datastore(self, ds_name, profile=DataStore.DS_PROFILE.BASIC, config=None): """ Factory method to get a datastore instance from given name, profile and config. This is the central point to cache these instances, to decide persistent or mock and to force clean the store on first use. @param ds_name Logical name of datastore (will be scoped with sysname) @param profile One of known constants determining the use of the store @param config Override config to use """ assert ds_name, "Must provide ds_name" if ds_name in self._datastores: log.debug("get_datastore(): Found instance of store '%s'" % ds_name) return self._datastores[ds_name] scoped_name = ("%s_%s" % (get_sys_name(), ds_name)).lower() # Imports here to prevent cyclic module dependency from pyon.core.bootstrap import CFG config = config or CFG persistent = not bool(get_safe(config, "system.mockdb")) force_clean = bool(get_safe(config, "system.force_clean")) log.info( "get_datastore(): Create instance of store '%s' {persistent=%s, force_clean=%s, scoped_name=%s}" % (ds_name, persistent, force_clean, scoped_name) ) # Persistent (CouchDB) or MockDB? if persistent: # Use inline import to prevent circular import dependency from pyon.datastore.couchdb.couchdb_datastore import CouchDB_DataStore new_ds = CouchDB_DataStore(datastore_name=scoped_name, profile=profile) else: # Use inline import to prevent circular import dependency from pyon.datastore.mockdb.mockdb_datastore import MockDB_DataStore new_ds = MockDB_DataStore(datastore_name=scoped_name) # , profile=profile) # Clean the store instance if force_clean: try: new_ds.delete_datastore(scoped_name) except NotFound as nf: pass # Create store if not existing if not new_ds.datastore_exists(scoped_name): new_ds.create_datastore(scoped_name) # Set a few standard datastore instance fields new_ds.local_name = ds_name new_ds.ds_profile = profile self._datastores[ds_name] = new_ds return new_ds
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ ext_dset_res = get_safe(config, 'external_dataset_res', None) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds = get_safe(config, 'dataset_object') if ext_dset_res and ds: t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] var_lst = ext_dset_res.dataset_description.parameters['variables'] t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1))) #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints if isinstance(t_slice, str): t_slice = eval(t_slice) lon = ds.variables[x_vname][:] lat = ds.variables[y_vname][:] z = ds.variables[z_vname][:] t_arr = ds.variables[t_vname][t_slice] data_arrays = {} for varn in var_lst: data_arrays[varn] = ds.variables[varn][t_slice] max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(t_arr.size, max_rec) for x in xrange(cnt): ta = t_arr[x * max_rec:(x + 1) * max_rec] # Make a 'master' RecDict rdt = RecordDictionaryTool(stream_definition_id=stream_def) # Assign coordinate values to the RecDict rdt[x_vname] = lon rdt[y_vname] = lat rdt[z_vname] = z # Assign data values to the RecDict rdt[t_vname] = ta for key, arr in data_arrays.iteritems(): d = arr[x * max_rec:(x + 1) * max_rec] rdt[key] = d g = rdt.to_granule() yield g ds.close()
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) parser_mod = get_safe(config, 'parser_mod', '') parser_cls = get_safe(config, 'parser_cls', '') module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: size = os.stat(f[0]).st_size try: #find the new data check index in config index = -1 for ndc in config['set_new_data_check']: if ndc[0] == f[0]: index = config['set_new_data_check'].index(ndc) break except: log.error('File name not found in attachment') parser = classobj(f[0], f[3]) max_rec = get_safe(config, 'max_records', 1) stream_def = get_safe(config, 'stream_def') while True: particles = parser.get_records(max_count=max_rec) if not particles: break rdt = RecordDictionaryTool(stream_definition_id=stream_def) populate_rdt(rdt, particles) g = rdt.to_granule() # TODO: record files already read for future additions... # #update new data check with the latest file position if 'set_new_data_check' in config and index > -1: # WRONG: should only record this after file finished parsing, # but may not have another yield at that point to trigger update config['set_new_data_check'][index] = (f[0], f[1], f[2], size) yield g # parser.close() except Exception as ex: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
def get_visualization_image(self, data_product_id='', visualization_parameters=None, callback=''): # Error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") if visualization_parameters == {}: visualization_parameters = None # Extract the retrieval related parameters. Definitely init all parameters first query = None if visualization_parameters : query = {'parameters':[]} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] if 'stride_time' in visualization_parameters: query['stride_time'] = visualization_parameters['stride_time'] if 'start_time' in visualization_parameters: query['start_time'] = visualization_parameters['start_time'] if 'end_time' in visualization_parameters: query['end_time'] = visualization_parameters['end_time'] # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, True) if ds_ids is None or not ds_ids: return None # Ideally just need the latest granule to figure out the list of images #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query) if retrieved_granule is None: return None # send the granule through the transform to get the matplotlib graphs mpl_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True) mpl_stream_def = self.clients.pubsub_management.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id) mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=visualization_parameters, params=mpl_stream_def) if mpl_data_granule == None: return None mpl_rdt = RecordDictionaryTool.load_from_granule(mpl_data_granule) ret_dict = dict() ret_dict['content_type'] = (get_safe(mpl_rdt, "content_type"))[0] ret_dict['image_name'] = (get_safe(mpl_rdt, "image_name"))[0] # reason for encoding as base64 string is otherwise message pack complains about the bit stream ret_dict['image_obj'] = base64.encodestring((get_safe(mpl_rdt, "image_obj"))[0]) if callback == '': return ret_dict else: return callback + "(" + simplejson.dumps(ret_dict) + ")"
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, "constraints.new_files", []) parser_mod = get_safe(config, "parser_mod", "") parser_cls = get_safe(config, "parser_cls", "") module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: size = os.stat(f[0]).st_size try: # find the new data check index in config index = -1 for ndc in config["set_new_data_check"]: if ndc[0] == f[0]: index = config["set_new_data_check"].index(ndc) break except: log.error("File name not found in attachment") parser = classobj(f[0], f[3]) max_rec = get_safe(config, "max_records", 1) stream_def = get_safe(config, "stream_def") while True: particles = parser.get_records(max_count=max_rec) if not particles: break rdt = RecordDictionaryTool(stream_definition_id=stream_def) populate_rdt(rdt, particles) g = rdt.to_granule() # TODO: record files already read for future additions... # #update new data check with the latest file position if "set_new_data_check" in config and index > -1: # WRONG: should only record this after file finished parsing, # but may not have another yield at that point to trigger update config["set_new_data_check"][index] = (f[0], f[1], f[2], size) yield g # parser.close() except Exception as ex: # TODO: Decide what to do here, raise an exception or carry on log.error("Error parsing data file '{0}': {1}".format(f, ex))
def __init__(self, datastore_name=None, host=None, port=None, username=None, password=None, config=None, newlog=None, scope=None, **kwargs): """ @param datastore_name Name of datastore within server. Should be scoped by caller with sysname @param config A standard config dict with connection params @param scope Identifier to prefix the datastore name (e.g. sysname) """ global log if newlog: log = newlog # Connection self.host = host or get_safe(config, 'server.couchdb.host') or 'localhost' self.port = port or get_safe(config, 'server.couchdb.port') or 5984 self.username = username or get_safe(config, 'server.couchdb.username') self.password = password or get_safe(config, 'server.couchdb.password') if self.username and self.password: connection_str = "http://%s:%s@%s:%s" % ( self.username, self.password, self.host, self.port) log.debug( "Using username:password authentication to connect to datastore" ) else: connection_str = "http://%s:%s" % (self.host, self.port) # TODO: Potential security risk to emit password into log. log.info('Connecting to CouchDB server: %s' % connection_str) self.server = couchdb.Server(connection_str) self._datastore_cache = {} # Datastore (couch database) handling. Scope with given scope (sysname) and make all lowercase self.scope = scope if self.scope: self.datastore_name = ("%s_%s" % (self.scope, datastore_name) ).lower() if datastore_name else None else: self.datastore_name = datastore_name.lower( ) if datastore_name else None # Just to test existence of the datastore if self.datastore_name: try: ds, _ = self._get_datastore() except NotFound: self.create_datastore() ds, _ = self._get_datastore()
def validate_messages(self, msgs): msg = msgs rdt = RecordDictionaryTool.load_from_granule(msg.body) vardict = {} vardict['temp'] = get_safe(rdt, 'temp') vardict['time'] = get_safe(rdt, 'time') print vardict['time'] print vardict['temp']
def _new_data_constraints(cls, config): """ Returns a constraints dictionary with @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict """ #TODO: Sort out what the config needs to look like - dataset_in?? ext_dset_res = get_safe(config, 'external_dataset_res', None) log.debug('ExternalDataset Resource: {0}'.format(ext_dset_res)) if ext_dset_res: #TODO: Use the external dataset resource to determine what data is new (i.e. pull 'old' fingerprint from here) log.debug('ext_dset_res.dataset_description = {0}'.format(ext_dset_res.dataset_description)) log.debug('ext_dset_res.update_description = {0}'.format(ext_dset_res.update_description)) # base_fingerprint = ext_dset_res.update_description base_nd_check = get_safe(ext_dset_res.update_description.parameters,'new_data_check') # base_nd_check = '\x83\xa7content\xdc\x00\xc9\xceM\xa0\xf3\x00\xceM\xa2D\x80\xceM\xa3\x96\x00\xceM\xa4\xe7\x80\xceM\xa69\x00\xceM\xa7\x8a\x80\xceM\xa8\xdc\x00\xceM\xaa-\x80\xceM\xab\x7f\x00\xceM\xac\xd0\x80\xceM\xae"\x00\xceM\xafs\x80\xceM\xb0\xc5\x00\xceM\xb2\x16\x80\xceM\xb3h\x00\xceM\xb4\xb9\x80\xceM\xb6\x0b\x00\xceM\xb7\\\x80\xceM\xb8\xae\x00\xceM\xb9\xff\x80\xceM\xbbQ\x00\xceM\xbc\xa2\x80\xceM\xbd\xf4\x00\xceM\xbfE\x80\xceM\xc0\x97\x00\xceM\xc1\xe8\x80\xceM\xc3:\x00\xceM\xc4\x8b\x80\xceM\xc5\xdd\x00\xceM\xc7.\x80\xceM\xc8\x80\x00\xceM\xc9\xd1\x80\xceM\xcb#\x00\xceM\xcct\x80\xceM\xcd\xc6\x00\xceM\xcf\x17\x80\xceM\xd0i\x00\xceM\xd1\xba\x80\xceM\xd3\x0c\x00\xceM\xd4]\x80\xceM\xd5\xaf\x00\xceM\xd7\x00\x80\xceM\xd8R\x00\xceM\xd9\xa3\x80\xceM\xda\xf5\x00\xceM\xdcF\x80\xceM\xdd\x98\x00\xceM\xde\xe9\x80\xceM\xe0;\x00\xceM\xe1\x8c\x80\xceM\xe2\xde\x00\xceM\xe4/\x80\xceM\xe5\x81\x00\xceM\xe6\xd2\x80\xceM\xe8$\x00\xceM\xe9u\x80\xceM\xea\xc7\x00\xceM\xec\x18\x80\xceM\xedj\x00\xceM\xee\xbb\x80\xceM\xf0\r\x00\xceM\xf1^\x80\xceM\xf2\xb0\x00\xceM\xf4\x01\x80\xceM\xf5S\x00\xceM\xf6\xa4\x80\xceM\xf7\xf6\x00\xceM\xf9G\x80\xceM\xfa\x99\x00\xceM\xfb\xea\x80\xceM\xfd<\x00\xceM\xfe\x8d\x80\xceM\xff\xdf\x00\xceN\x010\x80\xceN\x02\x82\x00\xceN\x03\xd3\x80\xceN\x05%\x00\xceN\x06v\x80\xceN\x07\xc8\x00\xceN\t\x19\x80\xceN\nk\x00\xceN\x0b\xbc\x80\xceN\r\x0e\x00\xceN\x0e_\x80\xceN\x0f\xb1\x00\xceN\x11\x02\x80\xceN\x12T\x00\xceN\x13\xa5\x80\xceN\x14\xf7\x00\xceN\x16H\x80\xceN\x17\x9a\x00\xceN\x18\xeb\x80\xceN\x1a=\x00\xceN\x1b\x8e\x80\xceN\x1c\xe0\x00\xceN\x1e1\x80\xceN\x1f\x83\x00\xceN \xd4\x80\xceN"&\x00\xceN#w\x80\xceN$\xc9\x00\xceN&\x1a\x80\xceN\'l\x00\xceN(\xbd\x80\xceN*\x0f\x00\xceN+`\x80\xceN,\xb2\x00\xceN.\x03\x80\xceN/U\x00\xceN0\xa6\x80\xceN1\xf8\x00\xceN3I\x80\xceN4\x9b\x00\xceN5\xec\x80\xceN7>\x00\xceN8\x8f\x80\xceN9\xe1\x00\xceN;2\x80\xceN<\x84\x00\xceN=\xd5\x80\xceN?\'\x00\xceN@x\x80\xceNA\xca\x00\xceNC\x1b\x80\xceNDm\x00\xceNE\xbe\x80\xceNG\x10\x00\xceNHa\x80\xceNI\xb3\x00\xceNK\x04\x80\xceNLV\x00\xceNM\xa7\x80\xceNN\xf9\x00\xceNPJ\x80\xceNQ\x9c\x00\xceNR\xed\x80\xceNT?\x00\xceNU\x90\x80\xceNV\xe2\x00\xceNX3\x80\xceNY\x85\x00\xceNZ\xd6\x80\xceN\\(\x00\xceN]y\x80\xceN^\xcb\x00\xceN`\x1c\x80\xceNan\x00\xceNb\xbf\x80\xceNd\x11\x00\xceNeb\x80\xceNf\xb4\x00\xceNh\x05\x80\xceNiW\x00\xceNj\xa8\x80\xceNk\xfa\x00\xceNmK\x80\xceNn\x9d\x00\xceNo\xee\x80\xceNq@\x00\xceNr\x91\x80\xceNs\xe3\x00\xceNu4\x80\xceNv\x86\x00\xceNw\xd7\x80\xceNy)\x00\xceNzz\x80\xceN{\xcc\x00\xceN}\x1d\x80\xceN~o\x00\xceN\x7f\xc0\x80\xceN\x81\x12\x00\xceN\x82c\x80\xceN\x83\xb5\x00\xceN\x85\x06\x80\xceN\x86X\x00\xceN\x87\xa9\x80\xceN\x88\xfb\x00\xceN\x8aL\x80\xceN\x8b\x9e\x00\xceN\x8c\xef\x80\xceN\x8eA\x00\xceN\x8f\x92\x80\xceN\x90\xe4\x00\xceN\x925\x80\xceN\x93\x87\x00\xceN\x94\xd8\x80\xceN\x96*\x00\xceN\x97{\x80\xceN\x98\xcd\x00\xceN\x9a\x1e\x80\xceN\x9bp\x00\xceN\x9c\xc1\x80\xceN\x9e\x13\x00\xceN\x9fd\x80\xceN\xa0\xb6\x00\xceN\xa2\x07\x80\xceN\xa3Y\x00\xceN\xa4\xaa\x80\xceN\xa5\xfc\x00\xceN\xa7M\x80\xceN\xa8\x9f\x00\xa6header\x83\xa2nd\x01\xa5shape\x91\xcc\xc9\xa4type\xa5int32\xad__ion_array__\xc3' # log.warn(base_nd_check) # # # # log.warn(old_arr) t_slice = slice(None) if base_nd_check: t_new_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] t_new_arr = ds.variables[t_new_vname][t_slice] new_data = msgpack.packb(t_new_arr, default=encode_ion) if new_data != base_nd_check: #new time data has arrived, figure out what's different and build the new slice first_index = -1 last_index = -1 t_old_arr = msgpack.unpackb(base_nd_check, object_hook=decode_ion) for old_data in t_old_arr: if not old_data in t_new_arr: if first_index == -1: first_index = np.nonzero(t_new_arr == old_data)[0][0] last_index = np.nonzero(t_new_arr == old_data)[0][0] else: last_index = np.nonzero(t_new_arr == old_data)[0][0] t_slice = slice(first_index, last_index) #TG: Get new temporal data and encode it #TG: Compare the old with the new, if different, decode old and sort out what's different #TG: Build appropriate temproral_slice return { 'temporal_slice':t_slice } return None
def get_datastore(self, ds_name, profile=DataStore.DS_PROFILE.BASIC, config=None): """ Factory method to get a datastore instance from given name, profile and config. This is the central point to cache these instances, to decide persistent or mock and to force clean the store on first use. @param ds_name Logical name of datastore (will be scoped with sysname) @param profile One of known constants determining the use of the store @param config Override config to use """ assert ds_name, "Must provide ds_name" if ds_name in self._datastores: log.debug("get_datastore(): Found instance of store '%s'" % ds_name) return self._datastores[ds_name] scoped_name = DatastoreManager.get_scoped_name(ds_name) # Imports here to prevent cyclic module dependency from pyon.core.bootstrap import CFG config = config or CFG if self.persistent is None: self.persistent = not bool(get_safe(config, "system.mockdb")) if self.force_clean is None: self.force_clean = bool(get_safe(config, "system.force_clean")) # Create a datastore instance log.info("get_datastore(): Create instance of store '%s' {persistent=%s, scoped_name=%s}" % ( ds_name, self.persistent, scoped_name)) new_ds = DatastoreManager.get_datastore_instance(ds_name, self.persistent, profile) # Clean the store instance # TBD: Do we really want to do it here? or make it more manual? if self.force_clean: log.info("get_datastore(): Force clean store '%s'" % ds_name) try: new_ds.delete_datastore(scoped_name) except NotFound: pass # Create store if not existing if not new_ds.datastore_exists(scoped_name): new_ds.create_datastore(scoped_name) else: if self.persistent: # NOTE: This may be expensive if called more than once per container # If views exist and are dropped and recreated new_ds._define_views(profile=profile, keepviews=True) # Set a few standard datastore instance fields new_ds.local_name = ds_name new_ds.ds_profile = profile self._datastores[ds_name] = new_ds return new_ds
def _constraints_for_new_request(cls, config): """ Returns a constraints dictionary with @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict """ #TODO: Sort out what the config needs to look like - dataset_in?? ext_dset_res = get_safe(config, 'external_dataset_res', None) log.debug('ExternalDataset Resource: {0}'.format(ext_dset_res)) if ext_dset_res: #TODO: Use the external dataset resource to determine what data is new (i.e. pull 'old' fingerprint from here) log.debug('ext_dset_res.dataset_description = {0}'.format(ext_dset_res.dataset_description)) log.debug('ext_dset_res.update_description = {0}'.format(ext_dset_res.update_description)) # base_fingerprint = ext_dset_res.update_description base_nd_check = get_safe(ext_dset_res.update_description.parameters,'new_data_check') # base_nd_check = '\x83\xa7content\xdc\x00\xc9\xceM\xa0\xf3\x00\xceM\xa2D\x80\xceM\xa3\x96\x00\xceM\xa4\xe7\x80\xceM\xa69\x00\xceM\xa7\x8a\x80\xceM\xa8\xdc\x00\xceM\xaa-\x80\xceM\xab\x7f\x00\xceM\xac\xd0\x80\xceM\xae"\x00\xceM\xafs\x80\xceM\xb0\xc5\x00\xceM\xb2\x16\x80\xceM\xb3h\x00\xceM\xb4\xb9\x80\xceM\xb6\x0b\x00\xceM\xb7\\\x80\xceM\xb8\xae\x00\xceM\xb9\xff\x80\xceM\xbbQ\x00\xceM\xbc\xa2\x80\xceM\xbd\xf4\x00\xceM\xbfE\x80\xceM\xc0\x97\x00\xceM\xc1\xe8\x80\xceM\xc3:\x00\xceM\xc4\x8b\x80\xceM\xc5\xdd\x00\xceM\xc7.\x80\xceM\xc8\x80\x00\xceM\xc9\xd1\x80\xceM\xcb#\x00\xceM\xcct\x80\xceM\xcd\xc6\x00\xceM\xcf\x17\x80\xceM\xd0i\x00\xceM\xd1\xba\x80\xceM\xd3\x0c\x00\xceM\xd4]\x80\xceM\xd5\xaf\x00\xceM\xd7\x00\x80\xceM\xd8R\x00\xceM\xd9\xa3\x80\xceM\xda\xf5\x00\xceM\xdcF\x80\xceM\xdd\x98\x00\xceM\xde\xe9\x80\xceM\xe0;\x00\xceM\xe1\x8c\x80\xceM\xe2\xde\x00\xceM\xe4/\x80\xceM\xe5\x81\x00\xceM\xe6\xd2\x80\xceM\xe8$\x00\xceM\xe9u\x80\xceM\xea\xc7\x00\xceM\xec\x18\x80\xceM\xedj\x00\xceM\xee\xbb\x80\xceM\xf0\r\x00\xceM\xf1^\x80\xceM\xf2\xb0\x00\xceM\xf4\x01\x80\xceM\xf5S\x00\xceM\xf6\xa4\x80\xceM\xf7\xf6\x00\xceM\xf9G\x80\xceM\xfa\x99\x00\xceM\xfb\xea\x80\xceM\xfd<\x00\xceM\xfe\x8d\x80\xceM\xff\xdf\x00\xceN\x010\x80\xceN\x02\x82\x00\xceN\x03\xd3\x80\xceN\x05%\x00\xceN\x06v\x80\xceN\x07\xc8\x00\xceN\t\x19\x80\xceN\nk\x00\xceN\x0b\xbc\x80\xceN\r\x0e\x00\xceN\x0e_\x80\xceN\x0f\xb1\x00\xceN\x11\x02\x80\xceN\x12T\x00\xceN\x13\xa5\x80\xceN\x14\xf7\x00\xceN\x16H\x80\xceN\x17\x9a\x00\xceN\x18\xeb\x80\xceN\x1a=\x00\xceN\x1b\x8e\x80\xceN\x1c\xe0\x00\xceN\x1e1\x80\xceN\x1f\x83\x00\xceN \xd4\x80\xceN"&\x00\xceN#w\x80\xceN$\xc9\x00\xceN&\x1a\x80\xceN\'l\x00\xceN(\xbd\x80\xceN*\x0f\x00\xceN+`\x80\xceN,\xb2\x00\xceN.\x03\x80\xceN/U\x00\xceN0\xa6\x80\xceN1\xf8\x00\xceN3I\x80\xceN4\x9b\x00\xceN5\xec\x80\xceN7>\x00\xceN8\x8f\x80\xceN9\xe1\x00\xceN;2\x80\xceN<\x84\x00\xceN=\xd5\x80\xceN?\'\x00\xceN@x\x80\xceNA\xca\x00\xceNC\x1b\x80\xceNDm\x00\xceNE\xbe\x80\xceNG\x10\x00\xceNHa\x80\xceNI\xb3\x00\xceNK\x04\x80\xceNLV\x00\xceNM\xa7\x80\xceNN\xf9\x00\xceNPJ\x80\xceNQ\x9c\x00\xceNR\xed\x80\xceNT?\x00\xceNU\x90\x80\xceNV\xe2\x00\xceNX3\x80\xceNY\x85\x00\xceNZ\xd6\x80\xceN\\(\x00\xceN]y\x80\xceN^\xcb\x00\xceN`\x1c\x80\xceNan\x00\xceNb\xbf\x80\xceNd\x11\x00\xceNeb\x80\xceNf\xb4\x00\xceNh\x05\x80\xceNiW\x00\xceNj\xa8\x80\xceNk\xfa\x00\xceNmK\x80\xceNn\x9d\x00\xceNo\xee\x80\xceNq@\x00\xceNr\x91\x80\xceNs\xe3\x00\xceNu4\x80\xceNv\x86\x00\xceNw\xd7\x80\xceNy)\x00\xceNzz\x80\xceN{\xcc\x00\xceN}\x1d\x80\xceN~o\x00\xceN\x7f\xc0\x80\xceN\x81\x12\x00\xceN\x82c\x80\xceN\x83\xb5\x00\xceN\x85\x06\x80\xceN\x86X\x00\xceN\x87\xa9\x80\xceN\x88\xfb\x00\xceN\x8aL\x80\xceN\x8b\x9e\x00\xceN\x8c\xef\x80\xceN\x8eA\x00\xceN\x8f\x92\x80\xceN\x90\xe4\x00\xceN\x925\x80\xceN\x93\x87\x00\xceN\x94\xd8\x80\xceN\x96*\x00\xceN\x97{\x80\xceN\x98\xcd\x00\xceN\x9a\x1e\x80\xceN\x9bp\x00\xceN\x9c\xc1\x80\xceN\x9e\x13\x00\xceN\x9fd\x80\xceN\xa0\xb6\x00\xceN\xa2\x07\x80\xceN\xa3Y\x00\xceN\xa4\xaa\x80\xceN\xa5\xfc\x00\xceN\xa7M\x80\xceN\xa8\x9f\x00\xa6header\x83\xa2nd\x01\xa5shape\x91\xcc\xc9\xa4type\xa5int32\xad__ion_array__\xc3' # log.warn(base_nd_check) # # # # log.warn(old_arr) t_slice = slice(None) if base_nd_check: t_new_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] t_new_arr = ds.variables[t_new_vname][t_slice] new_data = msgpack.packb(t_new_arr, default=encode_ion) if new_data != base_nd_check: #new time data has arrived, figure out what's different and build the new slice first_index = -1 last_index = -1 t_old_arr = msgpack.unpackb(base_nd_check, object_hook=decode_ion) for old_data in t_old_arr: if not old_data in t_new_arr: if first_index == -1: first_index = np.nonzero(t_new_arr == old_data)[0][0] last_index = np.nonzero(t_new_arr == old_data)[0][0] else: last_index = np.nonzero(t_new_arr == old_data)[0][0] t_slice = slice(first_index, last_index) #TG: Get new temporal data and encode it #TG: Compare the old with the new, if different, decode old and sort out what's different #TG: Build appropriate temproral_slice return { 'temporal_slice':t_slice } return None
def _acquire_sample(cls, config, publisher, unlock_new_data_callback, update_new_data_check_attachment): """ Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls: BaseDataHandler._constraints_for_new_request (only if config does not contain 'constraints') BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter @param config Dict containing configuration parameters, may include constraints, formatters, etc @param publisher the publisher used to publish data @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore @param update_new_data_check_attachment classmethod to update the external dataset resources file list attachment @throws InstrumentParameterException if the data constraints are not a dictionary @retval None """ log.debug('start _acquire_sample: config={0}'.format(config)) cls._init_acquisition_cycle(config) constraints = get_safe(config, 'constraints') if not constraints: gevent.getcurrent().link(unlock_new_data_callback) try: constraints = cls._constraints_for_new_request(config) except NoNewDataWarning: #log.info(nndw.message) if get_safe(config, 'TESTING'): #log.debug('Publish TestingFinished event') pub = EventPublisher('DeviceCommonLifecycleEvent') pub.publish_event(origin='BaseDataHandler._acquire_sample', description='TestingFinished') return if constraints is None: raise InstrumentParameterException( "Data constraints returned from _constraints_for_new_request cannot be None" ) config['constraints'] = constraints elif isinstance(constraints, dict): addnl_constr = cls._constraints_for_historical_request(config) if not addnl_constr is None and isinstance(addnl_constr, dict): constraints.update(addnl_constr) else: raise InstrumentParameterException( 'Data constraints must be of type \'dict\': {0}'.format( constraints)) cls._publish_data(publisher, cls._get_data(config), config, update_new_data_check_attachment) # Publish a 'TestFinished' event if get_safe(config, 'TESTING'): #log.debug('Publish TestingFinished event') pub = EventPublisher(OT.DeviceCommonLifecycleEvent) pub.publish_event(origin='BaseDataHandler._acquire_sample', description='TestingFinished')
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ count = get_safe(config, 'constraints.count',1) array_len = get_safe(config, 'constraints.array_len',1) for i in xrange(count): time.sleep(0.1) yield npr.random_sample(array_len)
def validate_messages(self, msgs): cc = self.container assertions = self.assertTrue rdt = RecordDictionaryTool.load_from_granule(msgs.body) vardict = {} vardict['temp'] = get_safe(rdt, 'temp') vardict['time'] = get_safe(rdt, 'time') print vardict['time'] print vardict['temp']
def _spawn_stream_process(self, process_id, name, module, cls, config): """ Spawn a process acting as a data stream process. Attach to subscription queue with process function. """ process_instance = self._create_process_instance(process_id, name, module, cls, config) listen_name = get_safe(config, "process.listen_name") or name log.debug("Stream Process (%s) listen_name: %s", name, listen_name) process_instance._proc_listen_name = listen_name process_instance.stream_subscriber = StreamSubscriber(process=process_instance, exchange_name=listen_name, callback=process_instance.call_process) # Add publishers if any... publish_streams = get_safe(config, "process.publish_streams") pub_names = self._set_publisher_endpoints(process_instance, publish_streams) rsvc = self._create_listening_endpoint(node=self.container.node, from_name=process_instance.id, process=process_instance) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) def cleanup(*args): self._cleanup_method(process_instance.id, rsvc) for name in pub_names: p = getattr(process_instance, name) p.close() proc = self.proc_sup.spawn(name=process_instance.id, service=process_instance, listeners=[rsvc, process_instance.stream_subscriber], proc_name=process_instance._proc_name, cleanup_method=cleanup) proc.proc._glname = "ION Proc %s" % process_instance._proc_name self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % process_instance._proc_name) # map gproc to process_instance self._spawned_proc_to_process[proc.proc] = process_instance # set service's reference to process process_instance._process = proc self._process_init(process_instance) self._process_start(process_instance) try: proc.start_listeners() except IonProcessError: self._process_quit(process_instance) self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED) raise return process_instance
def __init__( self, datastore_name=None, host=None, port=None, username=None, password=None, config=None, newlog=None, scope=None, **kwargs ): """ @param datastore_name Name of datastore within server. Should be scoped by caller with sysname @param config A standard config dict with connection params @param scope Identifier to prefix the datastore name (e.g. sysname) """ global log if newlog: log = newlog # Connection self.host = host or get_safe(config, "server.couchdb.host") or "localhost" self.port = port or get_safe(config, "server.couchdb.port") or 5984 self.username = username or get_safe(config, "server.couchdb.username") self.password = password or get_safe(config, "server.couchdb.password") if self.username and self.password: connection_str = "http://%s:%s@%s:%s" % (self.username, self.password, self.host, self.port) log.debug("Using username:password authentication to connect to datastore") else: connection_str = "http://%s:%s" % (self.host, self.port) # TODO: Potential security risk to emit password into log. log.info("Connecting to CouchDB server: %s" % connection_str) self.server = couchdb.Server(connection_str) self._datastore_cache = {} # Datastore (couch database) handling. Scope with given scope (sysname) and make all lowercase self.scope = scope if self.scope: self.datastore_name = ("%s_%s" % (self.scope, datastore_name)).lower() if datastore_name else None else: self.datastore_name = datastore_name.lower() if datastore_name else None # Just to test existence of the datastore if self.datastore_name: try: ds, _ = self._get_datastore() except NotFound: self.create_datastore() ds, _ = self._get_datastore()
def execute(self, granule): """ Example process to double the salinity value """ # Use the PointSupplementStreamParser to pull data from a granule #psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=packet) rdt = RecordDictionaryTool.load_from_granule(granule) salinity = get_safe(rdt, 'salinity') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') height = get_safe(rdt, 'height') # # pull data from a granule # psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule) # # longitude = psd.get_values('longitude') # latitude = psd.get_values('latitude') # height = psd.get_values('height') # time = psd.get_values('time') # salinity = psd.get_values('salinity') salinity *= 2.0 print ('Doubled salinity: %s' % str(salinity)) # Use the constructor to put data into a granule # psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output']) # # for i in xrange(len(salinity)): # point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i])) # psc.add_scalar_point_coverage(point_id=point_id, coverage_id='salinity', value=salinity[i]) # # return psc.close_stream_granule() root_rdt = RecordDictionaryTool(taxonomy=self.tx) #data_rdt = RecordDictionaryTool(taxonomy=self.tx) #coord_rdt = RecordDictionaryTool(taxonomy=self.tx) root_rdt['salinity'] = salinity root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height #root_rdt['coordinates'] = coord_rdt #root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L2_salinity', taxonomy=self.tx, record_dictionary=root_rdt)
def get_server_config(cls, config=None): default_server = get_safe(config, "container.datastore.default_server", "postgresql") server_cfg = get_safe(config, "server.%s" % default_server, None) if not server_cfg: # Support tests that mock out the CFG pg_cfg = get_safe(config, "server.postgresql", None) if pg_cfg: server_cfg = pg_cfg else: raise BadRequest("No datastore config available!") return server_cfg
def _constraints_for_new_request(cls, config): """ Returns a constraints dictionary with 'array_len' and 'count' assigned random integers @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict @retval constraints dictionary """ old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config, 'ds_params.base_url') list_pattern = get_safe(config, 'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe( config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new #Not exactly the prettiest method, but here goes: #old_list comes in as a list of lists: [[]] #curr_list comes in as a list of tuples: [()] #each needs to be a set of tuples for set.difference to work properly #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new #files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.debug('constraints_for_new_request: {0}'.format(ret)) return ret
def on_init(self): self.create_workflow_timeout = get_safe(self.CFG, 'create_workflow_timeout', 60) self.terminate_workflow_timeout = get_safe(self.CFG, 'terminate_workflow_timeout', 60) # self.monitor_timeout = get_safe(self.CFG, 'user_queue_monitor_timeout', 300) # self.monitor_queue_size = get_safe(self.CFG, 'user_queue_monitor_size', 100) # # #Setup and event object for use by the queue monitoring greenlet # self.monitor_event = gevent.event.Event() # self.monitor_event.clear() # # # #Start up queue monitor # self._process.thread_manager.spawn(self.user_vis_queue_monitor) return
def validate_messages(self, results): cc = self.container assertions = self.assertTrue first_salinity_values = None for message in results: rdt = RecordDictionaryTool.load_from_granule(message) try: temp = get_safe(rdt, 'temp') # psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message) # temp = psd.get_values('temperature') # log.info(psd.list_field_names()) except KeyError as ke: temp = None if temp is not None: assertions(isinstance(temp, numpy.ndarray)) log.info( 'temperature=' + str(numpy.nanmin(temp))) first_salinity_values = None else: #psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) #log.info( psd.list_field_names()) # Test the handy info method for the names of fields in the stream def #assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = get_safe(rdt, 'salinity') #salinity = psd.get_values('salinity') log.info( 'salinity=' + str(numpy.nanmin(salinity))) # Check to see if salinity has values assertions(salinity != None) assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0 if first_salinity_values is None: first_salinity_values = salinity.tolist() else: second_salinity_values = salinity.tolist() assertions(len(first_salinity_values) == len(second_salinity_values)) for idx in range(0,len(first_salinity_values)): assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])
def _acquire_sample(cls, config, publisher, unlock_new_data_callback, update_new_data_check_attachment): """ Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls: BaseDataHandler._constraints_for_new_request (only if config does not contain 'constraints') BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter @param config Dict containing configuration parameters, may include constraints, formatters, etc @param publisher the publisher used to publish data @param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore @param update_new_data_check_attachment classmethod to update the external dataset resources file list attachment @throws InstrumentParameterException if the data constraints are not a dictionary @retval None """ log.debug('start _acquire_sample: config={0}'.format(config)) cls._init_acquisition_cycle(config) constraints = get_safe(config, 'constraints') if not constraints: gevent.getcurrent().link(unlock_new_data_callback) try: constraints = cls._constraints_for_new_request(config) except NoNewDataWarning: #log.info(nndw.message) if get_safe(config, 'TESTING'): #log.debug('Publish TestingFinished event') pub = EventPublisher('DeviceCommonLifecycleEvent') pub.publish_event(origin='BaseDataHandler._acquire_sample', description='TestingFinished') return if constraints is None: raise InstrumentParameterException("Data constraints returned from _constraints_for_new_request cannot be None") config['constraints'] = constraints elif isinstance(constraints, dict): addnl_constr = cls._constraints_for_historical_request(config) if not addnl_constr is None and isinstance(addnl_constr, dict): constraints.update(addnl_constr) else: raise InstrumentParameterException('Data constraints must be of type \'dict\': {0}'.format(constraints)) cls._publish_data(publisher, cls._get_data(config)) if 'set_new_data_check' in config: update_new_data_check_attachment(config['external_dataset_res_id'], config['set_new_data_check']) # Publish a 'TestFinished' event if get_safe(config, 'TESTING'): #log.debug('Publish TestingFinished event') pub = EventPublisher('DeviceCommonLifecycleEvent') pub.publish_event(origin='BaseDataHandler._acquire_sample', description='TestingFinished')
def execute(self, granule): """Processes incoming data!!!! """ rdt = RecordDictionaryTool.load_from_granule(granule) #todo: use only flat dicts for now, may change later... # rdt0 = rdt['coordinates'] # rdt1 = rdt['data'] temperature = get_safe(rdt, 'temp') longitude = get_safe(rdt, 'lon') latitude = get_safe(rdt, 'lat') time = get_safe(rdt, 'time') height = get_safe(rdt, 'height') log.warn('Got temperature: %s' % str(temperature)) # The L1 temperature data product algorithm takes the L0 temperature data product and converts it into Celcius. # Once the hexadecimal string is converted to decimal, only scaling (dividing by a factor and adding an offset) is # required to produce the correct decimal representation of the data in Celsius. # The scaling function differs by CTD make/model as described below. # SBE 37IM, Output Format 0 # 1) Standard conversion from 5-character hex string (Thex) to decimal (tdec) # 2) Scaling: T [C] = (tdec / 10,000) - 10 root_rdt = RecordDictionaryTool(param_dictionary=self.temp) #todo: use only flat dicts for now, may change later... # data_rdt = RecordDictionaryTool(taxonomy=self.tx) # coord_rdt = RecordDictionaryTool(taxonomy=self.tx) scaled_temperature = temperature for i in xrange(len(temperature)): scaled_temperature[i] = ( temperature[i] / 10000.0) - 10 root_rdt['temp'] = scaled_temperature root_rdt['time'] = time root_rdt['lat'] = latitude root_rdt['lon'] = longitude root_rdt['height'] = height #todo: use only flat dicts for now, may change later... # root_rdt['coordinates'] = coord_rdt # root_rdt['data'] = data_rdt return build_granule(data_producer_id='ctd_L1_temperature', param_dictionary=self.temp, record_dictionary=root_rdt)
def _constraints_for_new_request(cls, config): """ Returns a constraints dictionary with @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict @retval dict that contains the constraints for retrieval of new data from the external dataset or None """ #TODO: Sort out what the config needs to look like - dataset_in?? ext_dset_res = get_safe(config, 'external_dataset_res', None) #log.debug('ExternalDataset Resource: {0}'.format(ext_dset_res)) if ext_dset_res: #TODO: Use the external dataset resource to determine what data is new (i.e. pull 'old' fingerprint from here) #log.debug('ext_dset_res.dataset_description = {0}'.format(ext_dset_res.dataset_description)) #log.debug('ext_dset_res.update_description = {0}'.format(ext_dset_res.update_description)) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds = get_safe(config, 'dataset_object') base_nd_check = get_safe( ext_dset_res.update_description.parameters, 'new_data_check') t_slice = slice(None) if base_nd_check: t_new_vname = ext_dset_res.dataset_description.parameters[ 'temporal_dimension'] t_new_arr = ds.variables[t_new_vname][t_slice] new_data = msgpack.packb(t_new_arr, default=encode_ion) if new_data != base_nd_check: #new time data has arrived, figure out what's different and build the new slice first_index = -1 last_index = -1 t_old_arr = msgpack.unpackb(base_nd_check, object_hook=decode_ion) for new_data in t_new_arr: if not new_data in t_old_arr: if first_index == -1: first_index = np.nonzero( t_new_arr == new_data)[0][0] last_index = np.nonzero( t_new_arr == new_data)[0][0] else: last_index = np.nonzero( t_new_arr == new_data)[0][0] t_slice = slice(first_index, last_index) return {'temporal_slice': t_slice} return None
def _constraints_for_new_request(cls, config): # """ # Returns a constraints dictionary with 'array_len' and 'count' assigned random integers # @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict # """ # # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen # max_rec = get_safe(config, 'max_records', 1) # return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],} old_list = get_safe(config, 'new_data_check') or [] ret = {} base_url = get_safe(config,'ds_params.base_url') list_pattern = get_safe(config,'ds_params.list_pattern') date_pattern = get_safe(config, 'ds_params.date_pattern') date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern') curr_list = list_file_info(base_url, list_pattern) # Determine which files are new #Not exactly the prettiest method, but here goes: #old_list comes in as a list of lists: [[]] #curr_list comes in as a list of tuples: [()] #each needs to be a set of tuples for set.difference to work properly #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new #files that are available curr_set = set(tuple(x) for x in curr_list) old_set = set(tuple(x) for x in old_list) #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly new_list = list(curr_set.difference(old_set)) if len(new_list) is 0: raise NoNewDataWarning() # The curr_list is the new new_data_check - used for the next "new data" evaluation config['set_new_data_check'] = curr_list # The new_list is the set of new files - these will be processed ret['new_files'] = new_list ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern) ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern) ret['bounding_box'] = {} ret['vars'] = [] log.debug('constraints_for_new_request: {0}'.format(ret)) return ret
def _spawn_stream_process(self, process_id, name, module, cls, config): """ Spawn a process acting as a data stream process. Attach to subscription queue with process function. """ service_instance = self._create_service_instance( process_id, name, module, cls, config) listen_name = get_safe(config, "process.listen_name") or name service_instance._proc_listen_name = listen_name service_instance.stream_subscriber_registrar = StreamSubscriberRegistrar( process=service_instance, container=self.container) sub = service_instance.stream_subscriber_registrar.create_subscriber( exchange_name=listen_name) # Add publishers if any... publish_streams = get_safe(config, "process.publish_streams") self._set_publisher_endpoints(service_instance, publish_streams) rsvc = ProcessRPCServer(node=self.container.node, from_name=service_instance.id, service=service_instance, process=service_instance) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc) proc = self.proc_sup.spawn(name=service_instance.id, service=service_instance, listeners=[rsvc, sub], proc_name=service_instance._proc_name, cleanup_method=cleanup) self.proc_sup.ensure_ready( proc, "_spawn_stream_process for %s" % service_instance._proc_name) # map gproc to service_instance self._spawned_proc_to_process[proc.proc] = service_instance # set service's reference to process service_instance._process = proc self._service_init(service_instance) self._service_start(service_instance) proc.start_listeners() return service_instance
def execute(input=None, context=None, config=None, params=None, state=None): log.debug('Matplotlib transform: Received Viz Data Packet') stream_definition_id = params # parse the incoming data rdt = RecordDictionaryTool.load_from_granule(input) # build a list of fields/variables that need to be plotted. Use the list provided by the UI # since the retrieved granule might have extra fields. Why ? Ans : Bugs, baby, bugs ! fields = [] if config: if config['parameters']: fields = config['parameters'] else: fields = rdt.fields vardict = {} vardict['time'] = get_safe(rdt, 'time') if vardict['time'] == None: log.error("Matplotlib transform: Did not receive a time field to work with") return None for field in fields: if field == 'time': continue vardict[field] = get_safe(rdt, field) print arrLen = len(vardict['time']) # init the graph_data structure for storing values graph_data = {} for varname in vardict.keys(): graph_data[varname] = [] # If code reached here, the graph data storage has been initialized. Just add values # to the list for varname in vardict.keys(): # psd.list_field_names(): if vardict[varname] == None: # create an array of zeros to compensate for missing values graph_data[varname].extend([0.0]*arrLen) else: graph_data[varname].extend(vardict[varname]) out_granule = VizTransformMatplotlibGraphsAlgorithm.render_graphs(graph_data, stream_definition_id) return out_granule
def execute_resource(self, resource_id='', command=None): """Execute command on the resource represented by agent. """ res_type = self._get_resource_type(resource_id) if self._has_agent(res_type): rac = ResourceAgentClient(resource_id=resource_id) return rac.execute_resource(resource_id=resource_id, command=command) cmd_res = None res_interface = self._get_type_interface(res_type) target = get_safe(res_interface, "commands.%s.execute" % command.command, None) if target: res = self._call_execute(target, resource_id, res_type, command.args, command.kwargs) cmd_res = AgentCommandResult(command_id=command.command_id, command=command.command, ts_execute=get_ion_ts(), status=0) else: log.warn("execute_resource(): command %s not defined", command.command) return cmd_res
def _stop_driver(self): """ Unload the DataHandler instance Called from: InstrumentAgent._handler_inactive_reset, InstrumentAgent._handler_idle_reset, InstrumentAgent._handler_stopped_reset, InstrumentAgent._handler_observatory_reset @retval None. """ dvr_mod = get_safe(self._dvr_config, 'dvr_mod', None) dvr_cls = get_safe(self._dvr_config, 'dvr_cls', None) self._dvr_client = None log.info('ExternalDatasetAgent \'{0}\' unloaded DataHandler \'{1}.{2}\''.format(self._proc_name,dvr_mod,dvr_cls)) return None
def _spawn_simple_process(self, process_id, name, module, cls, config): """ Spawn a process acting as simple process. No attachments. """ process_instance = self._create_process_instance( process_id, name, module, cls, config) # Add publishers if any... publish_streams = get_safe(config, "process.publish_streams") pub_names = self._set_publisher_endpoints(process_instance, publish_streams) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) def cleanup(*args): for name in pub_names: p = getattr(process_instance, name) p.close() proc = self.proc_sup.spawn(name=process_instance.id, service=process_instance, listeners=[], proc_name=process_instance._proc_name, cleanup_method=cleanup) self.proc_sup.ensure_ready( proc, "_spawn_simple_process for %s" % process_instance.id) self._process_init(process_instance) self._process_start(process_instance) return process_instance
def _spawn_service_process(self, process_id, name, module, cls, config): """ Spawn a process acting as a service worker. Attach to service queue with service definition, attach to service pid """ service_instance = self._create_service_instance( process_id, name, module, cls, config) self._service_init(service_instance) self._service_start(service_instance) listen_name = get_safe(config, "process.listen_name") or service_instance.name log.debug("Service Process (%s) listen_name: %s", name, listen_name) self._set_service_endpoint(service_instance, listen_name) self._set_service_endpoint(service_instance, service_instance.id) # Directory registration self.container.directory.register_safe("/Services", listen_name, interface=service_instance.name) self.container.directory.register_safe("/Services/%s" % listen_name, service_instance.id) return service_instance
def on_init(self): self.create_workflow_timeout = get_safe(self.CFG, 'create_workflow_timeout', 60) self.terminate_workflow_timeout = get_safe( self.CFG, 'terminate_workflow_timeout', 60) # self.monitor_timeout = get_safe(self.CFG, 'user_queue_monitor_timeout', 300) # self.monitor_queue_size = get_safe(self.CFG, 'user_queue_monitor_size', 100) # # #Setup and event object for use by the queue monitoring greenlet # self.monitor_event = gevent.event.Event() # self.monitor_event.clear() # # # #Start up queue monitor # self._process.thread_manager.spawn(self.user_vis_queue_monitor) return
def _spawn_standalone_process(self, process_id, name, module, cls, config): """ Spawn a process acting as standalone process. Attach to service pid. """ service_instance = self._create_service_instance( process_id, name, module, cls, config) self._service_init(service_instance) self._service_start(service_instance) rsvc = ProcessRPCServer(node=self.container.node, from_name=service_instance.id, service=service_instance, process=service_instance) proc = self.proc_sup.spawn(name=service_instance.id, service=service_instance, listeners=[rsvc], proc_name=service_instance._proc_name) self.proc_sup.ensure_ready( proc, "_spawn_standalone_process for %s" % service_instance.id) # map gproc to service_instance self._spawned_proc_to_process[proc.proc] = service_instance # set service's reference to process service_instance._process = proc # Add publishers if any... publish_streams = get_safe(config, "process.publish_streams") self._set_publisher_endpoints(service_instance, publish_streams) return service_instance
def validate_highcharts_transform_results(self, results): assertions = self.assertTrue # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases if isinstance(results,Granule): results =[results] for g in results: if isinstance(g,Granule): rdt = RecordDictionaryTool.load_from_granule(g) hc_data_arr = get_safe(rdt, 'hc_data') if hc_data_arr == None: log.debug("hc_data in granule is None") continue assertions(len(hc_data_arr) >= 0) # Need to come up with a better check hc_data = hc_data_arr[0] assertions(len(hc_data) >= 0) assertions(len(hc_data[0]["name"]) >= 0) assertions(len(hc_data[0]["data"]) >= 0)
def execute_resource(self, resource_id='', command=None): """Execute command on the resource represented by agent. @param resource_id The id of the resource agennt. @param command An AgentCommand containing the command. @retval result An AgentCommandResult containing the result. @throws BadRequest if the command was malformed. @throws NotFound if the command is not available in current state. @throws ResourceError if the resource produced an error during execution. @param resource_id str @param command AgentCommand @retval result AgentCommandResult @throws BadRequest if the command was malformed. @throws NotFound if the command is not implemented in the agent. @throws ResourceError if the resource produced an error. """ res_type = self._get_resource_type(resource_id) if self._has_agent(res_type): rac = ResourceAgentClient(resource_id=resource_id) return rac.execute_resource(resource_id=resource_id, command=command) cmd_res = None res_interface = self._get_type_interface(res_type) target = get_safe(res_interface, "commands.%s.execute" % command.command, None) if target: res = self._call_execute(target, resource_id, res_type, command.args, command.kwargs) cmd_res = AgentCommandResult(command_id=command.command_id, command=command.command, ts_execute=get_ion_ts(), status=0) else: log.warn("execute_resource(): command %s not defined", command.command) return cmd_res
def _get_data(cls, config): parser = get_safe(config, 'parser') if parser: log.warn('Header Info:\n{0}'.format(parser.header_map)) log.warn('Tables Available: {0}'.format(parser.table_map.keys())) return []
def get_resource(self, resource_id='', params=None): """Return the value of the given resource parameter. @param resource_id The id of the resource agennt. @param params A list of parameters names to query. @retval A dict of parameter name-value pairs. @throws BadRequest if the command was malformed. @throws NotFound if the resource does not support the parameter. @param resource_id str @param params list @retval result AgentCommandResult @throws NotFound if the parameter does not exist. """ res_type = self._get_resource_type(resource_id) if self._has_agent(res_type): rac = ResourceAgentClient(resource_id=resource_id) return rac.get_resource(resource_id=resource_id, params=params) res_interface = self._get_type_interface(res_type) get_result = {} for param in params: getter = get_safe(res_interface, "params.%s.get" % param, None) if getter: get_res = self._call_getter(getter, resource_id, res_type) get_result['param'] = get_res else: get_result['param'] = None return get_result
def _create_driver_plugin(self): try: # Ensure the egg cache directory exists. ooi.reflections will fail # somewhat silently when this directory doesn't exists. if not os.path.isdir(EGG_CACHE_DIR): os.makedirs(EGG_CACHE_DIR) log.debug("getting plugin config") uri = get_safe(self._dvr_config, 'dvr_egg') module_name = self._dvr_config['dvr_mod'] class_name = self._dvr_config['dvr_cls'] config = self._dvr_config['startup_config'] except: log.error('error in configuration', exc_info=True) raise egg_name = None egg_repo = None memento = self._get_state(DSA_STATE_KEY) log.warn("Get driver object: %s, %s, %s, %s", class_name, module_name, egg_name, egg_repo) if uri: egg_name = uri.split('/')[-1] if uri.startswith('http') else uri egg_repo = uri[0:len(uri) - len(egg_name) - 1] if uri.startswith('http') else None log.info("instantiate driver plugin %s.%s", module_name, class_name) params = [ config, memento, self.publish_callback, self.persist_state_callback, self.exception_callback ] return EGG_CACHE.get_object(class_name, module_name, egg_name, egg_repo, params)
def set_resource(self, resource_id='', params=None): """Set the value of the given resource parameters. @param resource_id The id of the resource agent. @param params A dict of resource parameter name-value pairs. @throws BadRequest if the command was malformed. @throws NotFound if a parameter is not supported by the resource. @throws ResourceError if the resource encountered an error while setting the parameters. @param resource_id str @param params dict @throws BadRequest if the command was malformed. @throws NotFound if the parameter does not exist. @throws ResourceError if the resource failed while trying to set the parameter. """ res_type = self._get_resource_type(resource_id) if self._has_agent(res_type): rac = ResourceAgentClient(resource_id=resource_id) return rac.set_resource(resource_id=resource_id, params=params) res_interface = self._get_type_interface(res_type) for param in params: setter = get_safe(res_interface, "params.%s.set" % param, None) if setter: self._call_setter(setter, params[param], resource_id, res_type) else: log.warn("set_resource(): param %s not defined", param)
def _validate_driver_config(self): """ Verify the agent configuration contains a driver config. called by uninitialize_initialize handler in the IA class """ log.debug("Driver Config: %s", self._dvr_config) out = True for key in ('startup_config', 'dvr_mod', 'dvr_cls'): if key not in self._dvr_config: log.error('missing key: %s', key) out = False for key in ('stream_config', ): if key not in self.CFG: log.error('missing key: %s', key) out = False if get_safe(self._dvr_config, 'max_records', 100) < 1: log.error( 'max_records=%d, must be at least 1 or unset (default 100)', self.max_records) out = False return out
def __init__(self, sysname=None, orgname=None, config=None): self.orgname = orgname or get_safe(config, 'system.root_org', 'ION') sysname = sysname or get_default_sysname() self.datastore_name = "resources" self.datastore = DatastoreFactory.get_datastore(datastore_name=self.datastore_name, config=config, scope=sysname, profile=DataStore.DS_PROFILE.DIRECTORY, variant=DatastoreFactory.DS_BASE)
def validate_mpl_graphs_transform_results(self, results): cc = self.container assertions = self.assertTrue # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases if isinstance(results, Granule): results = [results] found_data = False for g in results: if isinstance(g, Granule): rdt = RecordDictionaryTool.load_from_granule(g) graphs = get_safe(rdt, 'matplotlib_graphs') if graphs == None: continue for graph in graphs[0]: # At this point only dictionaries containing image data should be passed # For some reason non dictionary values are filtering through. if not isinstance(graph, dict): continue assertions( graph['viz_product_type'] == 'matplotlib_graphs') # check to see if the list (numpy array) contains actual images assertions( imghdr.what(graph['image_name'], h=graph['image_obj']) == 'png') found_data = True return found_data
def _spawn_agent_process(self, process_id, name, module, cls, config): """ Spawn a process acting as agent process. Attach to service pid. """ service_instance = self._create_service_instance( process_id, name, module, cls, config) if not isinstance(service_instance, ResourceAgent): raise ContainerConfigError( "Agent process must extend ResourceAgent") # Set the resource ID if we get it through the config resource_id = get_safe(service_instance.CFG, "agent.resource_id") if resource_id: service_instance.resource_id = resource_id rsvc = ProcessRPCServer(node=self.container.node, from_name=service_instance.id, service=service_instance, process=service_instance) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc) proc = self.proc_sup.spawn(name=service_instance.id, service=service_instance, listeners=[rsvc], proc_name=service_instance._proc_name, cleanup_method=cleanup) self.proc_sup.ensure_ready( proc, "_spawn_agent_process for %s" % service_instance.id) # map gproc to service_instance self._spawned_proc_to_process[proc.proc] = service_instance # set service's reference to process service_instance._process = proc # Now call the on_init of the agent. self._service_init(service_instance) if not service_instance.resource_id: log.warn("New agent pid=%s has no resource_id set" % process_id) self._service_start(service_instance) proc.start_listeners() if service_instance.resource_id: # look to load any existing policies for this resource if self._is_policy_management_service_available( ) and self.container.governance_controller: self.container.governance_controller.update_resource_access_policy( service_instance.resource_id) else: log.warn("Agent process id=%s does not define resource_id!!" % service_instance.id) return service_instance
def _init_acquisition_cycle(cls, config): ext_dset_res = get_safe(config, 'external_dataset_res', None) if ext_dset_res: ds_url = ext_dset_res.dataset_description.parameters[ 'dataset_path'] log.debug('Instantiate a SlocumParser for dataset: \'{0}\''.format( ds_url)) config['parser'] = RuvParser(ds_url)