def process(self, packet): log.debug('(%s): Received Viz Data Packet' % self.name ) #log.debug('(%s): - Processing: %s' % (self.name,packet)) # parse the incoming data psd = PointSupplementStreamParser(stream_definition=self.stream_def.container, stream_granule=packet) # re-arrange incoming data into an easy to parse dictionary vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) if self.initDataFlag: # look at the incoming packet and store for varname in psd.list_field_names(): self.lock.acquire() self.graph_data[varname] = [] self.lock.release() self.initDataFlag = False # If code reached here, the graph data storage has been initialized. Just add values # to the list with self.lock: for varname in psd.list_field_names(): self.graph_data[varname].extend(vardict[varname])
def process(self, packet): log.debug('(%s): Received Viz Data Packet' % self.name) #log.debug('(%s): - Processing: %s' % (self.name,packet)) # parse the incoming data psd = PointSupplementStreamParser( stream_definition=self.stream_def.container, stream_granule=packet) # re-arrange incoming data into an easy to parse dictionary vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) if self.initDataFlag: # look at the incoming packet and store for varname in psd.list_field_names(): self.lock.acquire() self.graph_data[varname] = [] self.lock.release() self.initDataFlag = False # If code reached here, the graph data storage has been initialized. Just add values # to the list with self.lock: for varname in psd.list_field_names(): self.graph_data[varname].extend(vardict[varname])
def execute(self, granule): log.debug("Matplotlib transform: Received Viz Data Packet") # parse the incoming data psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule) # re-arrange incoming data into an easy to parse dictionary vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) if self.initDataFlag: # look at the incoming packet and store for varname in psd.list_field_names(): self.graph_data[varname] = [] self.initDataFlag = False # If code reached here, the graph data storage has been initialized. Just add values # to the list for varname in psd.list_field_names(): self.graph_data[varname].extend(vardict[varname]) if (time.time() - self.lastRenderTime) > self.renderTimeThreshold: self.lastRenderTime = time.time() self.render_graphs() return self.out_granule
def _validate_messages(self, results): cc = self.container assertions = self.assertTrue first_salinity_values = None for message in results: try: psd = PointSupplementStreamParser( stream_definition=self.ctd_stream_def, stream_granule=message) temp = psd.get_values('temperature') log.info(psd.list_field_names()) except KeyError as ke: temp = None if temp is not None: assertions(isinstance(temp, numpy.ndarray)) log.info('temperature=' + str(numpy.nanmin(temp))) first_salinity_values = None else: psd = PointSupplementStreamParser( stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) log.info(psd.list_field_names()) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') log.info('salinity=' + str(numpy.nanmin(salinity))) assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0 if first_salinity_values is None: first_salinity_values = salinity.tolist() else: second_salinity_values = salinity.tolist() assertions( len(first_salinity_values) == len( second_salinity_values)) for idx in range(0, len(first_salinity_values)): assertions(first_salinity_values[idx] * 2.0 == second_salinity_values[idx])
def message_received(granule, h): stream_id = granule.stream_resource_id data_stream_id = granule.data_stream_id data_stream = granule.identifiables[data_stream_id] tstamp = get_datetime(data_stream.timestamp.value) records = granule.identifiables['record_count'].value log.info( 'Received a message from stream %s with time stamp %s and %d records' % (stream_id, tstamp, records)) if stream_id not in stream_defs: stream_defs[stream_id] = pubsub_cli.find_stream_definition( stream_id, id_only=False).container stream_def = stream_defs.get(stream_id) sp = PointSupplementStreamParser(stream_definition=stream_def, stream_granule=granule) last_data = {} for field in sp.list_field_names(): last_data[field] = sp.get_values(field)[-1] log.info('Last values in the message: %s' % str(last_data))
def message_received(granule, h): stream_id = granule.stream_resource_id data_stream_id = granule.data_stream_id data_stream = granule.identifiables[data_stream_id] tstamp = get_datetime(data_stream.timestamp.value) records = granule.identifiables['record_count'].value log.info('Received a message from stream %s with time stamp %s and %d records' % (stream_id, tstamp, records)) if stream_id not in stream_defs: stream_defs[stream_id] = pubsub_cli.find_stream_definition(stream_id, id_only=False).container stream_def = stream_defs.get(stream_id) sp = PointSupplementStreamParser(stream_definition=stream_def, stream_granule=granule) last_data = {} for field in sp.list_field_names(): last_data[field] = sp.get_values(field)[-1] log.info('Last values in the message: %s' % str(last_data))
def get_last_value(self, granule): stream_resource_id = granule.stream_resource_id if not self.def_cache.has_key(stream_resource_id): stream_def = self.ps_cli.find_stream_definition( stream_id=stream_resource_id, id_only=False) self.def_cache[stream_resource_id] = stream_def.container definition = self.def_cache[stream_resource_id] psp = PointSupplementStreamParser(stream_definition=definition, stream_granule=granule) fields = psp.list_field_names() lu = LastUpdate() lu.timestamp = granule.identifiables[ granule.data_stream_id].timestamp.value for field in fields: range_id = definition.identifiables[field].range_id lu.variables[field] = Variable() if definition.identifiables.has_key(field): lu.variables[field].definition = definition.identifiables[ field].definition if definition.identifiables.has_key(range_id): lu.variables[field].units = definition.identifiables[ range_id].unit_of_measure.code lu.variables[field].value = float( psp.get_values(field_name=field)[-1]) return lu
def _validate_messages(self, results): cc = self.container assertions = self.assertTrue first_salinity_values = None for message in results: try: psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message) temp = psd.get_values('temperature') log.info(psd.list_field_names()) except KeyError as ke: temp = None if temp is not None: assertions(isinstance(temp, numpy.ndarray)) log.info( 'temperature=' + str(numpy.nanmin(temp))) first_salinity_values = None else: psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) log.info( psd.list_field_names()) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') log.info( 'salinity=' + str(numpy.nanmin(salinity))) assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0 if first_salinity_values is None: first_salinity_values = salinity.tolist() else: second_salinity_values = salinity.tolist() assertions(len(first_salinity_values) == len(second_salinity_values)) for idx in range(0,len(first_salinity_values)): assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])
def get_last_value(self,granule): stream_resource_id = granule.stream_resource_id if not self.def_cache.has_key(stream_resource_id): stream_def = self.ps_cli.find_stream_definition(stream_id=stream_resource_id, id_only=False) self.def_cache[stream_resource_id] = stream_def.container definition = self.def_cache[stream_resource_id] psp = PointSupplementStreamParser(stream_definition=definition, stream_granule=granule) fields = psp.list_field_names() lu = LastUpdate() lu.timestamp = granule.identifiables[granule.data_stream_id].timestamp.value for field in fields: range_id = definition.identifiables[field].range_id lu.variables[field] = Variable() if definition.identifiables.has_key(field): lu.variables[field].definition = definition.identifiables[field].definition if definition.identifiables.has_key(range_id): lu.variables[field].units = definition.identifiables[range_id].unit_of_measure.code lu.variables[field].value = float(psp.get_values(field_name=field)[-1]) return lu
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.ctd_stream_publisher', 'class':'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class':'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=1 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id = ctd_dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream(stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id = sal_dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query = StreamQuery(stream_ids=[ctd_stream_id,]), exchange_name='salinity_transform_input') # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={'output':sal_stream_id,}, process_definition_id = salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform(transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([sal_stream_id,]), exchange_name = 'salinity_test', name = "test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) >3: result.set(True) subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription(subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
def process(self, packet): log.debug('(%s): Received Viz Data Packet' % (self.name)) element_count_id = 0 expected_range = [] psd = PointSupplementStreamParser(stream_definition=self.stream_def, stream_granule=packet) vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) #if its the first time, init the dataTable if self.initDataTableFlag: # create data description from the variables in the message self.dataDescription = [('time', 'datetime', 'time')] # split the data string to extract variable names for varname in psd.list_field_names(): if varname == 'time': continue self.dataDescription.append((varname, 'number', varname)) self.initDataTableFlag = False # Add the records to the datatable for i in xrange(arrLen): varTuple = [] for varname, _, _ in self.dataDescription: val = float(vardict[varname][i]) if varname == 'time': varTuple.append(datetime.fromtimestamp(val)) else: varTuple.append(val) # Append the tuples to the data table self.dataTableContent.append(varTuple) if self.realtime_flag: # Maintain a sliding window for realtime transform processes realtime_window_size = 100 if len(self.dataTableContent) > realtime_window_size: # always pop the first element till window size is what we want while len(self.dataTableContent) > realtime_window_size: self.dataTableContent.pop(0) if not self.realtime_flag: # This is the historical view part. Make a note of now many records were received data_stream_id = self.stream_def.data_stream_id element_count_id = self.stream_def.identifiables[ data_stream_id].element_count_id # From each granule you can check the constraint on the number of records expected_range = packet.identifiables[ element_count_id].constraint.intervals[0] # The number of records in a given packet is: self.total_num_of_records_recvd += packet.identifiables[ element_count_id].value # submit the Json version of the datatable to the viz service if self.realtime_flag: # create the google viz data table data_table = gviz_api.DataTable(self.dataDescription) data_table.LoadData(self.dataTableContent) # submit resulting table back using the out stream publisher msg = { "viz_product_type": "google_realtime_dt", "data_product_id": self.data_product_id, "data_table": data_table.ToJSonResponse() } self.out_stream_pub.publish(msg) else: # Submit table back to the service if we received all the replay data if self.total_num_of_records_recvd == (expected_range[1] + 1): # If the datatable received was too big, decimate on the fly to a fixed size max_google_dt_len = 1024 if len(self.dataTableContent) > max_google_dt_len: decimation_factor = int( math.ceil( len(self.dataTableContent) / (max_google_dt_len))) tempDataTableContent = [] for i in xrange(0, len(self.dataTableContent), decimation_factor): # check limits if i >= len(self.dataTableContent): break tempDataTableContent.append(self.dataTableContent[i]) self.dataTableContent = tempDataTableContent data_table = gviz_api.DataTable(self.dataDescription) data_table.LoadData(self.dataTableContent) # submit resulting table back using the out stream publisher msg = { "viz_product_type": "google_dt", "data_product_id_token": self.data_product_id_token, "data_table": data_table.ToJSonResponse() } self.out_stream_pub.publish(msg) return # clear the tuple for future use self.varTuple[:] = []
def execute(self, granule): log.debug('(Google DT transform): Received Viz Data Packet' ) self.dataDescription = [] self.dataTableContent = [] element_count_id = 0 expected_range = [] # NOTE : Detect somehow that this is a replay stream with a set number of expected granules. Based on this # calculate the number of expected records and set the self.realtime_window_size bigger or equal to this # number. psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule) vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) #iinit the dataTable # create data description from the variables in the message self.dataDescription = [('time', 'datetime', 'time')] # split the data string to extract variable names for varname in psd.list_field_names(): if varname == 'time': continue self.dataDescription.append((varname, 'number', varname)) # Add the records to the datatable for i in xrange(arrLen): varTuple = [] for varname,_,_ in self.dataDescription: val = float(vardict[varname][i]) if varname == 'time': #varTuple.append(datetime.fromtimestamp(val)) varTuple.append(val) else: varTuple.append(val) # Append the tuples to the data table self.dataTableContent.append (varTuple) # Maintain a sliding window for realtime transform processes if len(self.dataTableContent) > self.realtime_window_size: # always pop the first element till window size is what we want while len(self.dataTableContent) > realtime_window_size: self.dataTableContent.pop(0) """ To Do : Do we need to figure out the how many granules have been received for a replay stream ?? if not self.realtime_flag: # This is the historical view part. Make a note of how many records were received in_data_stream_id = self.incoming_stream_def.data_stream_id element_count_id = self.incoming_stream_def.identifiables[in_data_stream_id].element_count_id # From each granule you can check the constraint on the number of records expected_range = granule.identifiables[element_count_id].constraint.intervals[0] # The number of records in a given packet is: self.total_num_of_records_recvd += packet.identifiables[element_count_id].value """ # define an output container of data # submit the partial datatable to the viz service rdt = RecordDictionaryTool(taxonomy=tx) # submit resulting table back using the out stream publisher. The data_product_id is the input dp_id # responsible for the incoming data msg = {"viz_product_type": "google_realtime_dt", "data_product_id": "FAKE_DATAPRODUCT_ID_0000", "data_table_description": self.dataDescription, "data_table_content": self.dataTableContent} rdt['google_dt_components'] = numpy.array([msg]) log.debug('Google DT transform: Sending a granule') out_granule = build_granule(data_producer_id='google_dt_transform', taxonomy=tx, record_dictionary=rdt) #self.publish(out_granule) # clear the tuple for future use self.varTuple[:] = [] return out_granule
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition( container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.ctd_stream_publisher', 'class': 'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class': 'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream( stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=sal_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery(stream_ids=[ ctd_stream_id, ]), exchange_name='salinity_transform_input' ) # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={ 'output': sal_stream_id, }, process_definition_id=salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform( transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process( process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ sal_stream_id, ]), exchange_name='salinity_test', name="test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser( stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
def test_workflow_components(self): cc = self.container assertions = self.assertTrue #------------------------------- # Create CTD Parsed as the initial data product #------------------------------- # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data') print 'Creating new CDM data product with a stream definition' dp_obj = IonObject(RT.DataProduct,name='ctd_parsed',description='ctd stream test') try: ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id) except Exception as ex: self.fail("failed to create new data product: %s" %ex) print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product, persist_data=True, persist_metadata=True) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) ctd_stream_id = stream_ids[0] ### ### Setup the first transformation ### # Salinity: Data Process Definition log.debug("Create data process definition SalinityTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_salinity', description='create a salinity data product', module='ion.processes.data.transforms.ctd.ctd_L2_salinity', class_name='SalinityTransform', process_source='SalinityTransform source code here...') try: ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Excpetion as ex: self.fail("failed to create new SalinityTransform data process definition: %s" %ex) # create a stream definition for the data from the salinity Transform sal_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='L2_salinity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(sal_stream_def_id, ctd_L2_salinity_dprocdef_id ) # Create the output data product of the transform log.debug("create output data product L2 Salinity") ctd_l2_salinity_output_dp_obj = IonObject(RT.DataProduct, name='L2_Salinity',description='transform output L2 salinity') ctd_l2_salinity_output_dp_id = self.dataproductclient.create_data_product(ctd_l2_salinity_output_dp_obj, sal_stream_def_id) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l2_salinity_output_dp_id, persist_data=True, persist_metadata=True) # Create the Salinity transform data process log.debug("create L2_salinity data_process and start it") try: l2_salinity_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product, {'output':ctd_l2_salinity_output_dp_id}) self.dataprocessclient.activate_data_process(l2_salinity_all_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return") ### ### Setup the second transformation ### # Salinity: Data Process Definition log.debug("Create data process definition SalinityDoublerTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='salinity_doubler', description='create a salinity doubler data product', module='ion.processes.data.transforms.example_double_salinity', class_name='SalinityDoubler', process_source='SalinityDoubler source code here...') try: salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Exception as ex: self.fail("failed to create new SalinityDoubler data process definition: %s" %ex) # create a stream definition for the data from the salinity Transform salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityDoubler.outgoing_stream_def, name='SalinityDoubler') self.dataprocessclient.assign_stream_definition_to_data_process_definition(salinity_double_stream_def_id, salinity_doubler_dprocdef_id ) # Create the output data product of the transform log.debug("create output data product SalinityDoubler") salinity_doubler_output_dp_obj = IonObject(RT.DataProduct, name='SalinityDoubler',description='transform output salinity doubler') salinity_doubler_output_dp_id = self.dataproductclient.create_data_product(salinity_doubler_output_dp_obj, salinity_double_stream_def_id) self.dataproductclient.activate_data_product_persistence(data_product_id=salinity_doubler_output_dp_id, persist_data=True, persist_metadata=True) # Create the Salinity transform data process log.debug("create L2_salinity data_process and start it") try: salinity_double_data_process_id = self.dataprocessclient.create_data_process(salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id, {'output':salinity_doubler_output_dp_id}) self.dataprocessclient.activate_data_process(salinity_double_data_process_id) except BadRequest as ex: self.fail("failed to create new data process: %s" %ex) log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return") ### ### Start the process for producing the CTD data ### # process definition for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.ctd_stream_publisher', 'class':'SimpleCtdPublisher' } ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition) # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':ctd_stream_id, } } ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration) ## get the stream id for the transform outputs stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) sal_stream_id = stream_ids[0] stream_ids, _ = self.rrclient.find_objects(salinity_doubler_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) sal_dbl_stream_id = stream_ids[0] ### ### Make a subscriber in the test to listen for transformed data ### salinity_subscription_id = self.pubsubclient.create_subscription( query=StreamQuery([ctd_stream_id, sal_stream_id,sal_dbl_stream_id]), exchange_name = 'salinity_test', name = "test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn(' data received!') results.append(message) if len(results) >15: result.set(True) subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=20)) #Stop the transform process # stop the flow parse the messages... self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data first_salinity_values = None for message in results: try: psd = PointSupplementStreamParser(stream_definition=ctd_stream_def, stream_granule=message) temp = psd.get_values('temperature') print psd.list_field_names() except KeyError as ke: temp = None if temp is not None: assertions(isinstance(temp, numpy.ndarray)) print 'temperature=' + str(numpy.nanmin(temp)) first_salinity_values = None else: psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) print psd.list_field_names() # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') print 'salinity=' + str(numpy.nanmin(salinity)) assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0 if first_salinity_values is None: first_salinity_values = salinity.tolist() else: second_salinity_values = salinity.tolist() assertions(len(first_salinity_values) == len(second_salinity_values)) for idx in range(0,len(first_salinity_values)): assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])
def process(self, packet): log.debug('(%s): Received Viz Data Packet' % (self.name) ) element_count_id = 0 expected_range = [] psd = PointSupplementStreamParser(stream_definition=self.stream_def, stream_granule=packet) vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) #if its the first time, init the dataTable if self.initDataTableFlag: # create data description from the variables in the message self.dataDescription = [('time', 'datetime', 'time')] # split the data string to extract variable names for varname in psd.list_field_names(): if varname == 'time': continue self.dataDescription.append((varname, 'number', varname)) self.initDataTableFlag = False # Add the records to the datatable for i in xrange(arrLen): varTuple = [] for varname,_,_ in self.dataDescription: val = float(vardict[varname][i]) if varname == 'time': varTuple.append(datetime.fromtimestamp(val)) else: varTuple.append(val) # Append the tuples to the data table self.dataTableContent.append (varTuple) if self.realtime_flag: # Maintain a sliding window for realtime transform processes realtime_window_size = 100 if len(self.dataTableContent) > realtime_window_size: # always pop the first element till window size is what we want while len(self.dataTableContent) > realtime_window_size: self.dataTableContent.pop(0) if not self.realtime_flag: # This is the historical view part. Make a note of now many records were received data_stream_id = self.stream_def.data_stream_id element_count_id = self.stream_def.identifiables[data_stream_id].element_count_id # From each granule you can check the constraint on the number of records expected_range = packet.identifiables[element_count_id].constraint.intervals[0] # The number of records in a given packet is: self.total_num_of_records_recvd += packet.identifiables[element_count_id].value # submit the Json version of the datatable to the viz service if self.realtime_flag: # create the google viz data table data_table = gviz_api.DataTable(self.dataDescription) data_table.LoadData(self.dataTableContent) # submit resulting table back using the out stream publisher msg = {"viz_product_type": "google_realtime_dt", "data_product_id": self.data_product_id, "data_table": data_table.ToJSonResponse() } self.out_stream_pub.publish(msg) else: # Submit table back to the service if we received all the replay data if self.total_num_of_records_recvd == (expected_range[1] + 1): # If the datatable received was too big, decimate on the fly to a fixed size max_google_dt_len = 1024 if len(self.dataTableContent) > max_google_dt_len: decimation_factor = int(math.ceil(len(self.dataTableContent) / (max_google_dt_len))) for i in xrange(len(self.dataTableContent) - 1, 0, -1): if(i % decimation_factor == 0): continue self.dataTableContent.pop(i) data_table = gviz_api.DataTable(self.dataDescription) data_table.LoadData(self.dataTableContent) # submit resulting table back using the out stream publisher msg = {"viz_product_type": "google_dt", "data_product_id_token": self.data_product_id_token, "data_table": data_table.ToJSonResponse() } self.out_stream_pub.publish(msg) return # clear the tuple for future use self.varTuple[:] = []