def _start_data_subscribers(self, count, raw_count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._raw_samples_received = [] self._async_sample_result = AsyncResult() self._async_raw_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info('Received parsed data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() def recv_raw_data(message, stream_route, stream_id): log.info('Received raw data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._raw_samples_received.append(message) if len(self._raw_samples_received) == raw_count: self._async_raw_sample_result.set() from pyon.util.containers import create_unique_identifier stream_name = 'parsed' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id], timeout=120.2) pubsub_client.activate_subscription(sub_id, timeout=120.3) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice) stream_name = 'raw' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_raw_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id], timeout=120.4) pubsub_client.activate_subscription(sub_id, timeout=120.5) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def _start_data_subscribers(self, count, raw_count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._raw_samples_received = [] self._async_sample_result = AsyncResult() self._async_raw_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info('Received parsed data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() def recv_raw_data(message, stream_route, stream_id): log.info('Received raw data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._raw_samples_received.append(message) if len(self._raw_samples_received) == raw_count: self._async_raw_sample_result.set() from pyon.util.containers import create_unique_identifier stream_name = 'parsed' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice) stream_name = 'raw' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_raw_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def _start_data_subscribers(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # A callback for processing subscribed-to data. def consume_data(message, headers): log.info("Subscriber received data message: %s.", str(message)) self._samples_received.append(message) if self._no_samples and self._no_samples == len(self._samples_received): self._async_data_result.set() # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=self.container, node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._stream_config = {} self._data_subscribers = [] for (stream_name, val) in PACKET_CONFIG.iteritems(): stream_def = ctd_stream_definition(stream_id=None) stream_def_id = pubsub_client.create_stream_definition(container=stream_def) stream_id = pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding="ION R2" ) self._stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = "%s_queue" % stream_name sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume_data) self._listen(sub) self._data_subscribers.append(sub) query = StreamQuery(stream_ids=[stream_id]) sub_id = pubsub_client.create_subscription(query=query, exchange_name=exchange_name) pubsub_client.activate_subscription(sub_id)
def _start_data_subscribers(self, count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] #self._async_data_result = AsyncResult() strXterm = "xterm -T InstrumentScienceData -sb -rightbar " pOpenString = strXterm + " -e tail -f " + PIPE_PATH subprocess.Popen([ 'xterm', '-T', 'InstrumentScienceData', '-e', 'tail', '-f', PIPE_PATH ]) #subprocess.Popen(pOpenString) #self.pipeData = open(PIPE_PATH, "w", 1) # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): print 'Received message on ' + str(stream_id) + ' (' + str( stream_route.exchange_point) + ',' + str( stream_route.routing_key) + ')' log.info('Received message on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self.pipeData = open(PIPE_PATH, "w", 1) self.pipeData.write(str(message)) self.pipeData.flush() self.pipeData.close() self._samples_received.append(message) #if len(self._samples_received) == count: #self._async_data_result.set() for (stream_name, stream_config) in self._stream_config.iteritems(): stream_id = stream_config['stream_id'] # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) print 'stream_id: %s' % stream_id sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def _start_data_subscribers(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # A callback for processing subscribed-to data. def consume_data(message, headers): log.info('Subscriber received data message: %s.', str(message)) self._samples_received.append(message) if self._no_samples and self._no_samples == len(self._samples_received): self._async_data_result.set() # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=self.container, container=self.container) # Create streams and subscriptions for each stream named in driver. self._stream_config = {} self._data_subscribers = [] # TODO the following is a mininal adjustment to at least let the test # continue: # for (stream_name, val) in PACKET_CONFIG.iteritems(): for stream_name in PACKET_CONFIG: stream_def = ctd_stream_definition(stream_id=None) stream_def_id = pubsub_client.create_stream_definition( container=stream_def) stream_id = pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2') taxy = get_taxonomy(stream_name) stream_config = dict( id=stream_id, taxonomy=taxy.dump() ) self._stream_config[stream_name] = stream_config # self._stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume_data) self._listen(sub) self._data_subscribers.append(sub) query = StreamQuery(stream_ids=[stream_id]) sub_id = pubsub_client.create_subscription( query=query, exchange_name=exchange_name, exchange_point='science_data') pubsub_client.activate_subscription(sub_id)
def run_external_transform(self): ''' This example script illustrates how a transform can interact with the an outside process (very basic) it launches an external_transform example which uses the operating system command 'bc' to add 1 to the input Producer -> A -> 'FS.TEMP/transform_output' A is an external transform that spawns an OS process to increment the input by 1 ''' pubsub_cli = PubsubManagementServiceClient(node=self.container.node) tms_cli = TransformManagementServiceClient(node=self.container.node) procd_cli = ProcessDispatcherServiceClient(node=self.container.node) #------------------------------- # Process Definition #------------------------------- process_definition = ProcessDefinition(name='external_transform_definition') process_definition.executable['module'] = 'ion.processes.data.transforms.transform_example' process_definition.executable['class'] = 'ExternalTransform' process_definition_id = procd_cli.create_process_definition(process_definition=process_definition) #------------------------------- # Streams #------------------------------- input_stream_id = pubsub_cli.create_stream(name='input_stream', original=True) #------------------------------- # Subscription #------------------------------- query = StreamQuery(stream_ids=[input_stream_id]) input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue') #------------------------------- # Launch Transform #------------------------------- transform_id = tms_cli.create_transform(name='external_transform', in_subscription_id=input_subscription_id, process_definition_id=process_definition_id, configuration={}) tms_cli.activate_transform(transform_id) #------------------------------- # Launch Producer #------------------------------- id_p = self.container.spawn_process('myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', {'process':{'type':'stream_process','publish_streams':{'out_stream':input_stream_id}},'stream_producer':{'interval':4000}}) self.container.proc_manager.procs[id_p].start()
def _start_data_subscriber(self, config, callback): """ Setup and start a data subscriber """ exchange_point = config['exchange_point'] stream_id = config['stream_id'] sub = StandaloneStreamSubscriber(exchange_point, callback) sub.start() self._data_subscribers.append(sub) pubsub_client = PubsubManagementServiceClient(node=self.container.node) sub_id = pubsub_client.create_subscription(name=exchange_point, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def __init__(self, packet_config = {}, stream_definition = None, original = True, encoding = "ION R2"): log.info("Start data subscribers") self.no_samples = None self.async_data_result = AsyncResult() self.data_greenlets = [] self.stream_config = {} self.samples_received = [] self.data_subscribers = [] self.container = Container.instance if not self.container: raise NoContainer() # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # A callback for processing subscribed-to data. def consume_data(message, headers): log.info('Subscriber received data message: %s.', str(message)) self.samples_received.append(message) if self.no_samples and self.no_samples == len(self.samples_received): self.async_data_result.set() # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=self.container, node=self.container.node) # Create streams and subscriptions for each stream named in driver. self.stream_config = {} self.data_subscribers = [] for (stream_name, val) in packet_config.iteritems(): stream_def_id = pubsub_client.create_stream_definition(container=stream_definition) stream_id = pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=original, encoding=encoding) self.stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume_data) self._listen(sub) self.data_subscribers.append(sub) query = StreamQuery(stream_ids=[stream_id]) sub_id = pubsub_client.create_subscription(query=query, exchange_name=exchange_name) pubsub_client.activate_subscription(sub_id)
def on_start(self): super(CacheLauncher, self).on_start() tms_cli = TransformManagementServiceClient() pubsub_cli = PubsubManagementServiceClient() pd_cli = ProcessDispatcherServiceClient() dname = CACHE_DATASTORE_NAME number_of_workers = self.CFG.get_safe('process.number_of_workers', 2) proc_def = ProcessDefinition( name='last_update_worker_process', description= 'Worker process for caching the last update from a stream') proc_def.executable['module'] = 'ion.processes.data.last_update_cache' proc_def.executable['class'] = 'LastUpdateCache' proc_def_id = pd_cli.create_process_definition( process_definition=proc_def) xs_dot_xp = CFG.core_xps.science_data try: self.XS, xp_base = xs_dot_xp.split('.') self.XP = '.'.join([get_sys_name(), xp_base]) except ValueError: raise StandardError( 'Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp) subscription_id = pubsub_cli.create_subscription( query=ExchangeQuery(), exchange_name='last_update_cache') config = { 'couch_storage': { 'datastore_name': dname, 'datastore_profile': 'SCIDATA' } } for i in xrange(number_of_workers): transform_id = tms_cli.create_transform( name='last_update_cache%d' % i, description= 'last_update that compiles an aggregate of metadata', in_subscription_id=subscription_id, process_definition_id=proc_def_id, configuration=config) tms_cli.activate_transform(transform_id=transform_id)
def _start_data_subscribers(self, count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] #self._async_data_result = AsyncResult() strXterm = "xterm -T InstrumentScienceData -sb -rightbar " pOpenString = strXterm + " -e tail -f " + PIPE_PATH subprocess.Popen(['xterm', '-T', 'InstrumentScienceData', '-e', 'tail', '-f', PIPE_PATH]) #subprocess.Popen(pOpenString) #self.pipeData = open(PIPE_PATH, "w", 1) # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): print 'Received message on ' + str(stream_id) + ' (' + str(stream_route.exchange_point) + ',' + str(stream_route.routing_key) + ')' log.info('Received message on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self.pipeData = open(PIPE_PATH, "w", 1) self.pipeData.write(str(message)) self.pipeData.flush() self.pipeData.close() self._samples_received.append(message) #if len(self._samples_received) == count: #self._async_data_result.set() for (stream_name, stream_config) in self._stream_config.iteritems(): stream_id = stream_config['stream_id'] # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) print 'stream_id: %s' % stream_id sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def _start_data_subscribers(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # A callback for processing subscribed-to data. def consume_data(message, headers): log.info('Subscriber received data message: %s.', str(message)) self._samples_received.append(message) if self._no_samples and self._no_samples == len( self._samples_received): self._async_data_result.set() # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar( process=self.container, container=self.container) # Create streams and subscriptions for each stream named in driver. self._stream_config = {} self._data_subscribers = [] for stream_name in PACKET_CONFIG: stream_def = ctd_stream_definition(stream_id=None) stream_def_id = pubsub_client.create_stream_definition( container=stream_def) stream_id = pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2') taxy = get_taxonomy(stream_name) stream_config = dict(id=stream_id, taxonomy=taxy.dump()) self._stream_config[stream_name] = stream_config # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber( exchange_name=exchange_name, callback=consume_data) self._listen(sub) self._data_subscribers.append(sub) query = StreamQuery(stream_ids=[stream_id]) sub_id = pubsub_client.create_subscription( query=query, exchange_name=exchange_name, exchange_point='science_data') pubsub_client.activate_subscription(sub_id)
def start_data_subscribers(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self.data_subscribers = {} self.samples_received = {} # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info('Received message on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) name = None for (stream_name, stream_config) in self.stream_config.iteritems(): if stream_route.routing_key == stream_config['routing_key']: log.debug("NAME: %s %s == %s" % (stream_name, stream_route.routing_key, stream_config['routing_key'])) name = stream_name if (not self.samples_received.get(stream_name)): self.samples_received[stream_name] = [] self.samples_received[stream_name].append(message) log.debug("Add message to stream '%s' value: %s" % (stream_name, message)) for (stream_name, stream_config) in self.stream_config.iteritems(): stream_id = stream_config['stream_id'] # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self.data_subscribers[stream_name] = sub sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def start_data_subscribers(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self.data_subscribers = {} self.samples_received = {} # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info("Received message on %s (%s,%s)", stream_id, stream_route.exchange_point, stream_route.routing_key) name = None for (stream_name, stream_config) in self.stream_config.iteritems(): if stream_route.routing_key == stream_config["routing_key"]: log.debug( "NAME: %s %s == %s" % (stream_name, stream_route.routing_key, stream_config["routing_key"]) ) name = stream_name if not self.samples_received.get(stream_name): self.samples_received[stream_name] = [] self.samples_received[stream_name].append(message) log.debug("Add message to stream '%s' value: %s" % (stream_name, message)) for (stream_name, stream_config) in self.stream_config.iteritems(): stream_id = stream_config["stream_id"] # Create subscriptions for each stream. exchange_name = "%s_queue" % stream_name self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self.data_subscribers[stream_name] = sub sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = ( sub_id ) # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def on_start(self): super(CacheLauncher,self).on_start() tms_cli = TransformManagementServiceClient() pubsub_cli = PubsubManagementServiceClient() pd_cli = ProcessDispatcherServiceClient() dname = CACHE_DATASTORE_NAME number_of_workers = self.CFG.get_safe('process.number_of_workers', 2) proc_def = ProcessDefinition(name='last_update_worker_process',description='Worker process for caching the last update from a stream') proc_def.executable['module'] = 'ion.processes.data.last_update_cache' proc_def.executable['class'] = 'LastUpdateCache' proc_def_id = pd_cli.create_process_definition(process_definition=proc_def) xs_dot_xp = CFG.core_xps.science_data try: self.XS, xp_base = xs_dot_xp.split('.') self.XP = '.'.join([get_sys_name(), xp_base]) except ValueError: raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp) subscription_id = pubsub_cli.create_subscription(query=ExchangeQuery(), exchange_name='last_update_cache') config = { 'couch_storage' : { 'datastore_name' : dname, 'datastore_profile' : 'SCIDATA' } } for i in xrange(number_of_workers): transform_id = tms_cli.create_transform( name='last_update_cache%d' % i, description='last_update that compiles an aggregate of metadata', in_subscription_id=subscription_id, process_definition_id=proc_def_id, configuration=config ) tms_cli.activate_transform(transform_id=transform_id)
def _start_data_subscribers(self, count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._async_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info('Received message on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) #print '######################## stream message:' #print str(message) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() for (stream_name, stream_config) in self._stream_config.iteritems(): if stream_name == 'parsed': # Create subscription for parsed stream only. stream_id = stream_config['stream_id'] from pyon.util.containers import create_unique_identifier # exchange_name = '%s_queue' % stream_name exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) print 'stream_id: %s' % stream_id sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def _start_data_subscribers(self, count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._async_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info("Received message on %s (%s,%s)", stream_id, stream_route.exchange_point, stream_route.routing_key) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() for (stream_name, stream_config) in self._stream_config.iteritems(): stream_id = stream_config["stream_id"] # Create subscriptions for each stream. from pyon.util.containers import create_unique_identifier # exchange_name = '%s_queue' % stream_name exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) print "stream_id: %s" % stream_id sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = ( sub_id ) # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def run_external_transform(self): ''' This example script illustrates how a transform can interact with the an outside process (very basic) it launches an external_transform example which uses the operating system command 'bc' to add 1 to the input Producer -> A -> 'FS.TEMP/transform_output' A is an external transform that spawns an OS process to increment the input by 1 ''' pubsub_cli = PubsubManagementServiceClient(node=self.container.node) tms_cli = TransformManagementServiceClient(node=self.container.node) procd_cli = ProcessDispatcherServiceClient(node=self.container.node) #------------------------------- # Process Definition #------------------------------- process_definition = ProcessDefinition( name='external_transform_definition') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.transform_example' process_definition.executable['class'] = 'ExternalTransform' process_definition_id = procd_cli.create_process_definition( process_definition=process_definition) #------------------------------- # Streams #------------------------------- input_stream_id = pubsub_cli.create_stream(name='input_stream', original=True) #------------------------------- # Subscription #------------------------------- query = StreamQuery(stream_ids=[input_stream_id]) input_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='input_queue') #------------------------------- # Launch Transform #------------------------------- transform_id = tms_cli.create_transform( name='external_transform', in_subscription_id=input_subscription_id, process_definition_id=process_definition_id, configuration={}) tms_cli.activate_transform(transform_id) #------------------------------- # Launch Producer #------------------------------- id_p = self.container.spawn_process( 'myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', { 'process': { 'type': 'stream_process', 'publish_streams': { 'out_stream': input_stream_id } }, 'stream_producer': { 'interval': 4000 } }) self.container.proc_manager.procs[id_p].start()
class TestCTDPChain(IonIntegrationTestCase): def setUp(self): super(TestCTDPChain, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 self.cnt = 0 # Cleanup of queue created by the subscriber self.queue_cleanup = [] self.data_process_cleanup = [] def _get_new_ctd_L0_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def clean_queues(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() def cleaning_operations(self): for dproc_id in self.data_process_cleanup: self.data_process_management.delete_data_process(dproc_id) def test_ctdp_chain(self): """ Test that packets are processed by a chain of CTDP transforms: L0, L1 and L2 """ #------------------------------------------------------------------------------------- # Prepare the stream def to be used for transform chain #------------------------------------------------------------------------------------- #todo Check whether the right parameter dictionary is being used self._prepare_stream_def_for_transform_chain() #------------------------------------------------------------------------------------- # Prepare the data proc defs and in and out data products for the transforms #------------------------------------------------------------------------------------- # list_args_L0 = [data_proc_def_id, input_dpod_id, output_dpod_id] list_args_L0 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L0') list_args_L1 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L1') list_args_L2_density = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_density') list_args_L2_salinity = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_salinity') log.debug("Got the following args: L0 = %s, L1 = %s, L2 density = %s, L2 salinity = %s", list_args_L0, list_args_L1, list_args_L2_density, list_args_L2_salinity ) #------------------------------------------------------------------------------------- # Launch the CTDP transforms #------------------------------------------------------------------------------------- L0_data_proc_id = self._launch_transform('L0', *list_args_L0) L1_data_proc_id = self._launch_transform('L1', *list_args_L1) L2_density_data_proc_id = self._launch_transform('L2_density', *list_args_L2_density) L2_salinity_data_proc_id = self._launch_transform('L2_salinity', *list_args_L2_salinity) log.debug("Launched the transforms: L0 = %s, L1 = %s", L0_data_proc_id, L1_data_proc_id) #------------------------------------------------------------------------- # Start a subscriber listening to the output of each of the transforms #------------------------------------------------------------------------- ar_L0 = self.start_subscriber_listening_to_L0_transform(out_data_prod_id=list_args_L0[2]) ar_L1 = self.start_subscriber_listening_to_L1_transform(out_data_prod_id=list_args_L1[2]) ar_L2_density = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_density[2]) ar_L2_salinity = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_salinity[2]) #------------------------------------------------------------------- # Publish the parsed packets that the L0 transform is listening for #------------------------------------------------------------------- stream_id, stream_route = self.get_stream_and_route_for_data_prod(data_prod_id= list_args_L0[1]) self._publish_for_L0_transform(stream_id, stream_route) #------------------------------------------------------------------- # Check the granules being outputted by the transforms #------------------------------------------------------------------- self._check_granule_from_L0_transform(ar_L0) self._check_granule_from_L1_transform(ar_L1) self._check_granule_from_L2_density_transform(ar_L2_density) self._check_granule_from_L2_salinity_transform(ar_L2_salinity) def _prepare_stream_def_for_transform_chain(self): # Get the stream definition for the stream using the parameter dictionary # pdict_id = self.dataset_management.read_parameter_dictionary_by_name(parameter_dict_name, id_only=True) pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_for_L0') self.in_stream_def_id_for_L0 = self.pubsub.create_stream_definition(name='stream_def_for_L0', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, self.in_stream_def_id_for_L0) pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'params_for_other_transforms') self.stream_def_id = self.pubsub.create_stream_definition(name='stream_def_for_CTDBP_transforms', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, self.stream_def_id) log.debug("Got the parsed parameter dictionary: id: %s", pdict_id) log.debug("Got the stream def for parsed input to L0: %s", self.in_stream_def_id_for_L0) log.debug("Got the stream def for other other streams: %s", self.stream_def_id) def _prepare_things_you_need_to_launch_transform(self, name_of_transform = ''): module, class_name = self._get_class_module(name_of_transform) #------------------------------------------------------------------------- # Data Process Definition #------------------------------------------------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name= 'CTDBP_%s_Transform' % name_of_transform, description= 'Data Process Definition for the CTDBP %s transform.' % name_of_transform, module= module, class_name=class_name) data_proc_def_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, data_proc_def_id) log.debug("created data process definition: id = %s", data_proc_def_id) #------------------------------------------------------------------------- # Construct temporal and spatial Coordinate Reference System objects for the data product objects #------------------------------------------------------------------------- tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() #------------------------------------------------------------------------- # Get the names of the input and output data products #------------------------------------------------------------------------- input_dpod_id = '' output_dpod_id = '' if name_of_transform == 'L0': input_dpod_id = self._create_input_data_product('parsed', tdom, sdom) output_dpod_id = self._create_output_data_product('L0', tdom, sdom) self.in_prod_for_L1 = output_dpod_id elif name_of_transform == 'L1': input_dpod_id = self.in_prod_for_L1 output_dpod_id = self._create_output_data_product('L1', tdom, sdom) self.in_prod_for_L2 = output_dpod_id elif name_of_transform == 'L2_density': input_dpod_id = self.in_prod_for_L2 output_dpod_id = self._create_output_data_product('L2_density', tdom, sdom) elif name_of_transform == 'L2_salinity': input_dpod_id = self.in_prod_for_L2 output_dpod_id = self._create_output_data_product('L2_salinity', tdom, sdom) else: self.fail("something bad happened") return [data_proc_def_id, input_dpod_id, output_dpod_id] def _get_class_module(self, name_of_transform): options = {'L0' : self._class_module_L0, 'L1' : self._class_module_L1, 'L2_density' : self._class_module_L2_density, 'L2_salinity' : self._class_module_L2_salinity} return options[name_of_transform]() def _class_module_L0(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L0' class_name = 'CTDBP_L0_all' return module, class_name def _class_module_L1(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L1' class_name = 'CTDBP_L1_Transform' return module, class_name def _class_module_L2_density(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_density' class_name = 'CTDBP_DensityTransform' return module, class_name def _class_module_L2_salinity(self): module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity' class_name = 'CTDBP_SalinityTransform' return module, class_name def _create_input_data_product(self, name_of_transform = '', tdom = None, sdom = None): dpod_obj = IonObject(RT.DataProduct, name='dprod_%s' % name_of_transform, description='for_%s' % name_of_transform, temporal_domain = tdom, spatial_domain = sdom) log.debug("the stream def id: %s", self.stream_def_id) if name_of_transform == 'L0': stream_def_id = self.in_stream_def_id_for_L0 else: stream_def_id = self.stream_def_id dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj, stream_definition_id= stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id) log.debug("got the data product out. id: %s", dpod_id) return dpod_id def _create_output_data_product(self, name_of_transform = '', tdom = None, sdom = None): dpod_obj = IonObject(RT.DataProduct, name='dprod_%s' % name_of_transform, description='for_%s' % name_of_transform, temporal_domain = tdom, spatial_domain = sdom) if name_of_transform == 'L0': stream_def_id = self.in_stream_def_id_for_L0 else: stream_def_id = self.stream_def_id dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj, stream_definition_id=stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id) return dpod_id def _launch_transform(self, name_of_transform = '', data_proc_def_id = None, input_dpod_id = None, output_dpod_id = None): # We need the key name here to be "L2_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L2_stream when the config is used to launch the data process if name_of_transform in ['L0', 'L1']: binding = '%s_stream' % name_of_transform elif name_of_transform == 'L2_salinity': binding = 'salinity' elif name_of_transform == 'L2_density': binding = 'density' config = None if name_of_transform == 'L1': config = self._create_calibration_coefficients_dict() elif name_of_transform == 'L2_density': config = DotDict() config.process = {'lat' : 32.7153, 'lon' : 117.1564} log.debug("launching transform for name: %s",name_of_transform ) log.debug("launching transform for data_proc_def_id: %s\ninput_dpod_id: %s\noutput_dpod_id: %s", data_proc_def_id, input_dpod_id, output_dpod_id ) data_proc_id = self.data_process_management.create_data_process( data_process_definition_id = data_proc_def_id, in_data_product_ids= [input_dpod_id], out_data_product_ids = [output_dpod_id], configuration = config) self.addCleanup(self.data_process_management.delete_data_process, data_proc_id) self.data_process_management.activate_data_process(data_proc_id) self.addCleanup(self.data_process_management.deactivate_data_process, data_proc_id) log.debug("Created a data process for ctdbp %s transform: id = %s", name_of_transform, data_proc_id) return data_proc_id def get_stream_and_route_for_data_prod(self, data_prod_id = ''): stream_ids, _ = self.resource_registry.find_objects(data_prod_id, PRED.hasStream, RT.Stream, True) stream_id = stream_ids[0] input_stream = self.resource_registry.read(stream_id) stream_route = input_stream.stream_route return stream_id, stream_route def start_subscriber_listening_to_L0_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L0 = self._start_subscriber_to_transform( name_of_transform = 'L0',stream_id=output_stream_id_of_transform) return ar_L0 def start_subscriber_listening_to_L1_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L1 = self._start_subscriber_to_transform( name_of_transform = 'L1',stream_id=output_stream_id_of_transform) return ar_L1 def start_subscriber_listening_to_L2_density_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_density', stream_id=output_stream_id_of_transform) return ar_L2_density def start_subscriber_listening_to_L2_salinity_transform(self, out_data_prod_id = ''): #----------- Create subscribers to listen to the two transforms -------------------------------- stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True) output_stream_id_of_transform = stream_ids[0] ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_salinity',stream_id=output_stream_id_of_transform) return ar_L2_density def _start_subscriber_to_transform(self, name_of_transform = '', stream_id = ''): ar = gevent.event.AsyncResult() def subscriber(m,r,s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub_%s' % name_of_transform, callback=subscriber) # Note that this running the below line creates an exchange since none of that name exists before sub_id = self.pubsub.create_subscription('subscriber_to_transform_%s' % name_of_transform, stream_ids=[stream_id], exchange_name='sub_%s' % name_of_transform) self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) return ar def _check_granule_from_L0_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L0 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L0_algorithm(granule_from_transform) def _check_granule_from_L1_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L1 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L1_algorithm(granule_from_transform) def _check_granule_from_L2_density_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L2 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L2_density_algorithm(granule_from_transform) def _check_granule_from_L2_salinity_transform(self, ar = None): granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the L2 transform: %s", granule_from_transform) # Check the algorithm being applied self._check_application_of_L2_salinity_algorithm(granule_from_transform) def _check_application_of_L0_algorithm(self, granule = None): """ Check the algorithm applied by the L0 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = ['time', 'pressure', 'conductivity', 'temperature'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _check_application_of_L1_algorithm(self, granule = None): """ Check the algorithm applied by the L1 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = [ 'time', 'pressure', 'conductivity', 'temp'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _check_application_of_L2_density_algorithm(self, granule = None): """ Check the algorithm applied by the L2 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = ['time', 'density'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _check_application_of_L2_salinity_algorithm(self, granule = None): """ Check the algorithm applied by the L2 transform """ rdt = RecordDictionaryTool.load_from_granule(granule) list_of_expected_keys = ['time', 'salinity'] for key in list_of_expected_keys: self.assertIn(key, rdt) def _publish_for_L0_transform(self, input_stream_id = None, stream_route = None): #----------- Publish on that stream so that the transform can receive it -------------------------------- self._publish_to_transform(input_stream_id, stream_route ) def _publish_to_transform(self, stream_id = '', stream_route = None): pub = StandaloneStreamPublisher(stream_id, stream_route) publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=self.in_stream_def_id_for_L0, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) def _create_input_param_dict_for_test(self, parameter_dict_name = ''): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = 'Siemens_per_meter' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = 'Pascal' pdict.add_context(pres_ctxt) if parameter_dict_name == 'input_param_for_L0': temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) else: temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = 'degree_kelvin' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = 'g/m' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = 'PSU' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) if parameter_dict_name == 'input_param_for_L0': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) elif pc[1].name == 'temp': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def _create_calibration_coefficients_dict(self): config = DotDict() config.process.calibration_coeffs = { 'temp_calibration_coeffs': { 'TA0' : 1.561342e-03, 'TA1' : 2.561486e-04, 'TA2' : 1.896537e-07, 'TA3' : 1.301189e-07, 'TOFFSET' : 0.000000e+00 }, 'cond_calibration_coeffs': { 'G' : -9.896568e-01, 'H' : 1.316599e-01, 'I' : -2.213854e-04, 'J' : 3.292199e-05, 'CPCOR' : -9.570000e-08, 'CTCOR' : 3.250000e-06, 'CSLOPE' : 1.000000e+00 }, 'pres_calibration_coeffs' : { 'PA0' : 4.960417e-02, 'PA1' : 4.883682e-04, 'PA2' : -5.687309e-12, 'PTCA0' : 5.249802e+05, 'PTCA1' : 7.595719e+00, 'PTCA2' : -1.322776e-01, 'PTCB0' : 2.503125e+01, 'PTCB1' : 5.000000e-05, 'PTCB2' : 0.000000e+00, 'PTEMPA0' : -6.431504e+01, 'PTEMPA1' : 5.168177e+01, 'PTEMPA2' : -2.847757e-01, 'POFFSET' : 0.000000e+00 } } return config
class TestInstrumentAgent(IonIntegrationTestCase): """ Test cases for instrument agent class. Functions in this class provide instrument agent integration tests and provide a tutorial on use of the agent setup and interface. """ def customCleanUp(self): log.info('CUSTOM CLEAN UP ******************************************************************************') def setUp(self): """ Setup the test environment to exersice use of instrumet agent, including: * define driver_config parameters. * create container with required services and container client. * create publication stream ids for each driver data stream. * create stream_config parameters. * create and activate subscriptions for agent data streams. * spawn instrument agent process and create agent client. * add cleanup functions to cause subscribers to get stopped. """ # params = { ('CTD', 'TA2'): -1.9434316e-05, # ('CTD', 'PTCA1'): 1.3206866, # ('CTD', 'TCALDATE'): [8, 11, 2006] } # for tup in params: # print tup self.addCleanup(self.customCleanUp) # Names of agent data streams to be configured. parsed_stream_name = 'ctd_parsed' raw_stream_name = 'ctd_raw' # Driver configuration. #Simulator self.driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.services.mi.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method':'ethernet', 'device_addr': CFG.device.sbe37.host, 'device_port': CFG.device.sbe37.port, 'server_addr': 'localhost', 'server_port': 8888 } } } #Hardware ''' self.driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.services.mi.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method':'ethernet', 'device_addr': '137.110.112.119', 'device_port': 4001, 'server_addr': 'localhost', 'server_port': 8888 } } } ''' # Start container. self._start_container() # Establish endpoint with container (used in tests below) self._container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) # Bring up services in a deploy file (no need to message) self.container.start_rel_from_url('res/deploy/r2dm.yml') # Create a pubsub client to create streams. self._pubsub_client = PubsubManagementServiceClient( node=self.container.node) # A callback for processing subscribed-to data. def consume(message, headers): log.info('Subscriber received message: %s', str(message)) # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=self.container, container=self.container) self.subs = [] # Create streams for each stream named in driver. self.stream_config = {} for (stream_name, val) in PACKET_CONFIG.iteritems(): stream_def = ctd_stream_definition(stream_id=None) stream_def_id = self._pubsub_client.create_stream_definition( container=stream_def) stream_id = self._pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2') self.stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume) sub.start() query = StreamQuery(stream_ids=[stream_id]) sub_id = self._pubsub_client.create_subscription(\ query=query, exchange_name=exchange_name) self._pubsub_client.activate_subscription(sub_id) self.subs.append(sub) # Add cleanup function to stop subscribers. def stop_subscriber(sub_list): for sub in sub_list: sub.stop() self.addCleanup(stop_subscriber, self.subs) # Create agent config. self.agent_resource_id = '123xyz' self.agent_config = { 'driver_config' : self.driver_config, 'stream_config' : self.stream_config, 'agent' : {'resource_id': self.agent_resource_id} } # Launch an instrument agent process. self._ia_name = 'agent007' self._ia_mod = 'ion.services.mi.instrument_agent' self._ia_class = 'InstrumentAgent' self._ia_pid = self._container_client.spawn_process(name=self._ia_name, module=self._ia_mod, cls=self._ia_class, config=self.agent_config) log.info('got pid=%s', str(self._ia_pid)) self._ia_client = None # Start a resource agent client to talk with the instrument agent. self._ia_client = ResourceAgentClient(self.agent_resource_id, process=FakeProcess()) log.info('got ia client %s', str(self._ia_client)) def test_initialize(self): """ Test agent initialize command. This causes creation of driver process and transition to inactive. """ cmd = AgentCommand(command='initialize') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) log.info(reply) time.sleep(2) caps = gw_agent_get_capabilities(self.agent_resource_id) log.info('Capabilities: %s',str(caps)) time.sleep(2) cmd = AgentCommand(command='reset') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) log.info(reply) def test_direct_access(self): """ Test agent direct_access command. This causes creation of driver process and transition to direct access. """ print("test initing") cmd = AgentCommand(command='initialize') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test go_active") cmd = AgentCommand(command='go_active') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test run") cmd = AgentCommand(command='run') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test go_da") cmd = AgentCommand(command='go_direct_access') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) print("reply=" + str(reply)) time.sleep(2) print("test go_ob") cmd = AgentCommand(command='go_observatory') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test go_inactive") cmd = AgentCommand(command='go_inactive') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test reset") cmd = AgentCommand(command='reset') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) def test_go_active(self): """ Test agent go_active command. This causes a driver process to launch a connection broker, connect to device hardware, determine entry state of driver and initialize driver parameters. """ cmd = AgentCommand(command='initialize') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) cmd = AgentCommand(command='go_active') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) cmd = AgentCommand(command='go_inactive') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) cmd = AgentCommand(command='reset') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2)
def run_basic_transform(self): ''' Runs a basic example of a transform. It chains two transforms together, each add 1 to their input Producer -> A -> B Producer generates a number every four seconds and publishes it on the 'ctd_output_stream' the producer is acting as a CTD or instrument in this example. A is a basic transform that increments its input and publishes it on the 'transform_output' stream. B is a basic transform that receives input. All transforms write logging data to 'FS.TEMP/transform_output' so you can visually see activity of the transforms ''' pubsub_cli = PubsubManagementServiceClient(node=self.container.node) tms_cli = TransformManagementServiceClient(node=self.container.node) procd_cli = ProcessDispatcherServiceClient(node=self.container.node) #------------------------------- # Process Definition #------------------------------- process_definition = IonObject(RT.ProcessDefinition, name='transform_process_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class': 'TransformExample' } process_definition_id = procd_cli.create_process_definition( process_definition) #------------------------------- # First Transform #------------------------------- # Create a dummy output stream from a 'ctd' instrument ctd_output_stream_id = pubsub_cli.create_stream( name='ctd_output_stream', original=True) # Create the subscription to the ctd_output_stream query = StreamQuery(stream_ids=[ctd_output_stream_id]) ctd_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='ctd_output') # Create an output stream for the transform transform_output_stream_id = pubsub_cli.create_stream( name='transform_output', original=True) configuration = {} # Launch the first transform process transform_id = tms_cli.create_transform( name='basic_transform', in_subscription_id=ctd_subscription_id, out_streams={'output': transform_output_stream_id}, process_definition_id=process_definition_id, configuration=configuration) tms_cli.activate_transform(transform_id) #------------------------------- # Second Transform #------------------------------- # Create a SUBSCRIPTION to this output stream for the second transform query = StreamQuery(stream_ids=[transform_output_stream_id]) second_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='final_output') # Create a final output stream final_output_id = pubsub_cli.create_stream(name='final_output', original=True) configuration = {} second_transform_id = tms_cli.create_transform( name='second_transform', in_subscription_id=second_subscription_id, out_streams={'output': final_output_id}, process_definition_id=process_definition_id, configuration=configuration) tms_cli.activate_transform(second_transform_id) #------------------------------- # Producer (Sample Input) #------------------------------- # Create a producing example process id_p = self.container.spawn_process( 'myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', { 'process': { 'type': 'stream_process', 'publish_streams': { 'out_stream': ctd_output_stream_id } }, 'stream_producer': { 'interval': 4000 } }) self.container.proc_manager.procs[id_p].start()
class LastUpdateCacheTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.datastore_name = CACHE_DATASTORE_NAME self.container.start_rel_from_url('res/deploy/r2dm.yml') self.db = self.container.datastore_manager.get_datastore(self.datastore_name,DataStore.DS_PROFILE.SCIDATA) self.tms_cli = TransformManagementServiceClient() self.pubsub_cli = PubsubManagementServiceClient() self.pd_cli = ProcessDispatcherServiceClient() self.rr_cli = ResourceRegistryServiceClient() xs_dot_xp = CFG.core_xps.science_data try: self.XS, xp_base = xs_dot_xp.split('.') self.XP = '.'.join([bootstrap.get_sys_name(), xp_base]) except ValueError: raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp) def make_points(self,definition,stream_id='I am very special', N=100): from prototype.sci_data.constructor_apis import PointSupplementConstructor import numpy as np import random definition.stream_resource_id = stream_id total = N n = 10 # at most n records per granule i = 0 while i < total: r = random.randint(1,n) psc = PointSupplementConstructor(point_definition=definition, stream_id=stream_id) for x in xrange(r): i+=1 point_id = psc.add_point(time=i, location=(0,0,0)) psc.add_scalar_point_coverage(point_id=point_id, coverage_id='temperature', value=np.random.normal(loc=48.0,scale=4.0, size=1)[0]) psc.add_scalar_point_coverage(point_id=point_id, coverage_id='pressure', value=np.float32(1.0)) psc.add_scalar_point_coverage(point_id=point_id, coverage_id='conductivity', value=np.float32(2.0)) granule = psc.close_stream_granule() hdf_string = granule.identifiables[definition.data_stream_id].values granule.identifiables[definition.data_stream_id].values = hdf_string yield granule return def start_worker(self): proc_def = ProcessDefinition() proc_def.executable['module'] = 'ion.processes.data.last_update_cache' proc_def.executable['class'] = 'LastUpdateCache' proc_def_id = self.pd_cli.create_process_definition(process_definition=proc_def) subscription_id = self.pubsub_cli.create_subscription(query=ExchangeQuery(), exchange_name='ingestion_aggregate') config = { 'couch_storage' : { 'datastore_name' :self.datastore_name, 'datastore_profile' : 'SCIDATA' } } transform_id = self.tms_cli.create_transform( name='last_update_cache', description='LastUpdate that compiles an aggregate of metadata', in_subscription_id=subscription_id, process_definition_id=proc_def_id, configuration=config ) self.tms_cli.activate_transform(transform_id=transform_id) transform = self.rr_cli.read(transform_id) pid = transform.process_id handle = self.container.proc_manager.procs[pid] return handle @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_last_update_cache(self): handle = self.start_worker() queue = Queue() o_process = handle.process def new_process(msg): o_process(msg) queue.put(True) handle.process = new_process definition = SBE37_CDM_stream_definition() publisher = Publisher() stream_def_id = self.pubsub_cli.create_stream_definition(container=definition) stream_id = self.pubsub_cli.create_stream(stream_definition_id=stream_def_id) time = float(0.0) for granule in self.make_points(definition=definition, stream_id=stream_id, N=10): publisher.publish(granule, to_name=(self.XP, stream_id+'.data')) # Determinism sucks try: queue.get(timeout=5) except Empty: self.assertTrue(False, 'Process never received the message.') doc = self.db.read(stream_id) ntime = doc.variables['time'].value self.assertTrue(ntime >= time, 'The documents did not sequentially get updated correctly.') time = ntime
class TestDataProcessManagementPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management = DatasetManagementServiceClient() self.resource_registry = self.container.resource_registry self.pubsub_management = PubsubManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.validators = 0 def lc_preload(self): config = DotDict() config.op = 'load' config.scenario = 'BASE,LC_TEST' config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary' config.path = 'res/preload/r2_ioc' self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config) def ctd_plain_input_data_product(self): available_fields = [ 'internal_timestamp', 'temp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'conductivity', 'driver_timestamp', 'lon', 'pressure'] return self.make_data_product('ctd_parsed_param_dict', 'ctd plain test', available_fields) def ctd_plain_salinity(self): available_fields = [ 'internal_timestamp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'driver_timestamp', 'lon', 'salinity'] return self.make_data_product('ctd_parsed_param_dict', 'salinity', available_fields) def ctd_plain_density(self): available_fields = [ 'internal_timestamp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'driver_timestamp', 'lon', 'density'] return self.make_data_product('ctd_parsed_param_dict', 'density', available_fields) def ctd_instrument_data_product(self): available_fields = [ 'internal_timestamp', 'temp', 'preferred_timestamp', 'time', 'port_timestamp', 'quality_flag', 'lat', 'conductivity', 'driver_timestamp', 'lon', 'pressure'] return self.make_data_product('ctd_LC_TEST', 'ctd instrument', available_fields) def make_data_product(self, pdict_name, dp_name, available_fields=[]): pdict_id = self.dataset_management.read_parameter_dictionary_by_name(pdict_name, id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('%s stream_def' % dp_name, parameter_dictionary_id=pdict_id, available_fields=available_fields or None) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() tdom = tdom.dump() sdom = sdom.dump() dp_obj = DataProduct(name=dp_name) dp_obj.temporal_domain = tdom dp_obj.spatial_domain = sdom data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) return data_product_id def google_dt_data_product(self): return self.make_data_product('google_dt', 'visual') def ctd_derived_data_product(self): return self.make_data_product('ctd_LC_TEST', 'ctd derived products') def publish_to_plain_data_product(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_definition._id publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) now = time.time() ntp_now = now + 2208988800 # Do not use in production, this is a loose translation rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [20.0] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['time'] = [ntp_now] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4.2914] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [3.068] granule = rdt.to_granule() publisher.publish(granule) def publish_to_data_product(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_definition._id publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) now = time.time() ntp_now = now + 2208988800 # Do not use in production, this is a loose translation rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['time'] = [ntp_now] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] granule = rdt.to_granule() publisher.publish(granule) def setup_subscriber(self, data_product_id, callback): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() sub_id = self.pubsub_management.create_subscription('validator_%s'%self.validators, stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) subscriber = StandaloneStreamSubscriber('validator_%s' % self.validators, callback=callback) subscriber.start() self.addCleanup(subscriber.stop) self.validators+=1 return subscriber def create_density_transform_function(self): tf = TransformFunction(name='ctdbp_l2_density', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_density', cls='CTDBP_DensityTransformAlgorithm') tf_id = self.data_process_management.create_transform_function(tf) self.addCleanup(self.data_process_management.delete_transform_function, tf_id) return tf_id def create_salinity_transform_function(self): tf = TransformFunction(name='ctdbp_l2_salinity', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity', cls='CTDBP_SalinityTransformAlgorithm') tf_id = self.data_process_management.create_transform_function(tf) self.addCleanup(self.data_process_management.delete_transform_function, tf_id) return tf_id @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_data_process_prime(self): self.lc_preload() instrument_data_product_id = self.ctd_instrument_data_product() derived_data_product_id = self.ctd_derived_data_product() data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[instrument_data_product_id], out_data_product_ids=[derived_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) validated = Event() def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.7144739593881])) np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.935132729668283])) validated.set() self.setup_subscriber(derived_data_product_id, callback=validation) self.publish_to_data_product(instrument_data_product_id) self.assertTrue(validated.wait(10)) def test_older_transform(self): input_data_product_id = self.ctd_plain_input_data_product() conductivity_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'conductivity_product', ['time', 'conductivity']) conductivity_stream_def_id = self.get_named_stream_def('conductivity_product stream_def') temperature_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'temperature_product', ['time', 'temp']) temperature_stream_def_id = self.get_named_stream_def('temperature_product stream_def') pressure_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'pressure_product', ['time', 'pressure']) pressure_stream_def_id = self.get_named_stream_def('pressure_product stream_def') dpd = DataProcessDefinition(name='ctdL0') dpd.data_process_type = DataProcessTypeEnum.TRANSFORM dpd.module = 'ion.processes.data.transforms.ctd.ctd_L0_all' dpd.class_name = 'ctd_L0_all' data_process_definition_id = self.data_process_management.create_data_process_definition(dpd) self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id) self.data_process_management.assign_stream_definition_to_data_process_definition(conductivity_stream_def_id, data_process_definition_id, binding='conductivity') self.data_process_management.assign_stream_definition_to_data_process_definition(temperature_stream_def_id, data_process_definition_id, binding='temperature') self.data_process_management.assign_stream_definition_to_data_process_definition(pressure_stream_def_id, data_process_definition_id, binding='pressure') data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[conductivity_data_product_id, temperature_data_product_id, pressure_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) conductivity_validated = Event() def validate_conductivity(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['conductivity'], np.array([4.2914])) conductivity_validated.set() self.setup_subscriber(conductivity_data_product_id, callback=validate_conductivity) temperature_validated = Event() def validate_temperature(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['temp'], np.array([20.0])) temperature_validated.set() self.setup_subscriber(temperature_data_product_id, callback=validate_temperature) pressure_validated = Event() def validate_pressure(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['pressure'], np.array([3.068])) pressure_validated.set() self.setup_subscriber(pressure_data_product_id, callback=validate_pressure) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(conductivity_validated.wait(10)) self.assertTrue(temperature_validated.wait(10)) self.assertTrue(pressure_validated.wait(10)) def get_named_stream_def(self, name): stream_def_ids, _ = self.resource_registry.find_resources(name=name, restype=RT.StreamDefinition, id_only=True) return stream_def_ids[0] def test_actors(self): input_data_product_id = self.ctd_plain_input_data_product() output_data_product_id = self.ctd_plain_density() actor = self.create_density_transform_function() route = {input_data_product_id: {output_data_product_id: actor}} config = DotDict() config.process.routes = route config.process.params.lat = 45. config.process.params.lon = -71. data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id], configuration=config) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) validated = Event() def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) # The value I use is a double, the value coming back is only a float32 so there's some data loss but it should be precise to the 4th digit np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) validated.set() self.setup_subscriber(output_data_product_id, callback=validation) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(validated.wait(10)) def test_multi_in_out(self): input1 = self.ctd_plain_input_data_product() input2 = self.make_data_product('ctd_parsed_param_dict', 'input2') density_dp_id = self.ctd_plain_density() salinity_dp_id = self.ctd_plain_salinity() density_actor = self.create_density_transform_function() salinity_actor = self.create_salinity_transform_function() routes = { input1 : { density_dp_id : density_actor, salinity_dp_id : salinity_actor }, input2 : { density_dp_id : density_actor } } config = DotDict() config.process.routes = routes config.process.params.lat = 45. config.process.params.lon = -71. data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input1, input2], out_data_product_ids=[density_dp_id, salinity_dp_id], configuration=config) self.addCleanup(self.data_process_management.delete_data_process2, data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) density_validated = Event() salinity_validated = Event() def density_validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) density_validated.set() def salinity_validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.93513240786831]), decimal=4) salinity_validated.set() self.setup_subscriber(density_dp_id, callback=density_validation) self.setup_subscriber(salinity_dp_id, callback=salinity_validation) self.publish_to_plain_data_product(input1) self.assertTrue(density_validated.wait(10)) self.assertTrue(salinity_validated.wait(10)) density_validated.clear() salinity_validated.clear() self.publish_to_plain_data_product(input2) self.assertTrue(density_validated.wait(10)) self.assertFalse(salinity_validated.wait(0.75)) density_validated.clear() salinity_validated.clear() def test_visual_transform(self): input_data_product_id = self.ctd_plain_input_data_product() output_data_product_id = self.google_dt_data_product() dpd = DataProcessDefinition(name='visual transform') dpd.data_process_type = DataProcessTypeEnum.TRANSFORM dpd.module = 'ion.processes.data.transforms.viz.google_dt' dpd.class_name = 'VizTransformGoogleDT' #-------------------------------------------------------------------------------- # Walk before we base jump #-------------------------------------------------------------------------------- data_process_definition_id = self.data_process_management.create_data_process_definition(dpd) self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id) data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id]) self.addCleanup(self.data_process_management.delete_data_process2,data_process_id) self.data_process_management.activate_data_process2(data_process_id) self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id) validated = Event() def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) self.assertTrue(rdt['google_dt_components'] is not None) validated.set() self.setup_subscriber(output_data_product_id, callback=validation) self.publish_to_plain_data_product(input_data_product_id) self.assertTrue(validated.wait(10))
class CtdTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber self.queue_cleanup = [] self.data_process_cleanup = [] def _create_input_param_dict_for_test(self, parameter_dict_name = ''): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) if parameter_dict_name == 'input_param_dict': temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) else: temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) if parameter_dict_name == 'input_param_dict': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) elif parameter_dict_name == 'output_param_dict' and pc[1].name == 'temp': self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def _get_new_ctd_L0_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def _create_calibration_coefficients_dict(self): config = DotDict() config.process.calibration_coeffs = { 'temp_calibration_coeffs': { 'TA0' : 1.561342e-03, 'TA1' : 2.561486e-04, 'TA2' : 1.896537e-07, 'TA3' : 1.301189e-07, 'TOFFSET' : 0.000000e+00 }, 'cond_calibration_coeffs': { 'G' : -9.896568e-01, 'H' : 1.316599e-01, 'I' : -2.213854e-04, 'J' : 3.292199e-05, 'CPCOR' : -9.570000e-08, 'CTCOR' : 3.250000e-06, 'CSLOPE' : 1.000000e+00 }, 'pres_calibration_coeffs' : { 'PA0' : 4.960417e-02, 'PA1' : 4.883682e-04, 'PA2' : -5.687309e-12, 'PTCA0' : 5.249802e+05, 'PTCA1' : 7.595719e+00, 'PTCA2' : -1.322776e-01, 'PTCB0' : 2.503125e+01, 'PTCB1' : 5.000000e-05, 'PTCB2' : 0.000000e+00, 'PTEMPA0' : -6.431504e+01, 'PTEMPA1' : 5.168177e+01, 'PTEMPA2' : -2.847757e-01, 'POFFSET' : 0.000000e+00 } } return config def clean_queues(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() def cleaning_operations(self): for dproc_id in self.data_process_cleanup: self.data_process_management.delete_data_process(dproc_id) def test_ctd_L1_all(self): """ Test that packets are processed by the ctd_L1_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='CTDBP_L1_Transform', description='Take granules on the L0 stream which have the C, T and P data and separately apply algorithms and output on the L1 stream.', module='ion.processes.data.transforms.ctdbp.ctdbp_L1', class_name='CTDBP_L1_Transform') dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() # Get the stream definition for the stream using the parameter dictionary L0_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_dict') L0_stream_def_id = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=L0_pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, L0_stream_def_id) L1_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'output_param_dict') L1_stream_def_id = self.pubsub.create_stream_definition(name='L1_out', parameter_dictionary_id=L1_pdict_id) self.addCleanup(self.pubsub.delete_stream_definition, L1_stream_def_id) log.debug("Got the parsed parameter dictionary: id: %s", L0_pdict_id) log.debug("Got the stream def for parsed input: %s", L0_stream_def_id) log.debug("got the stream def for the output: %s", L1_stream_def_id) # Input data product L0_stream_dp_obj = IonObject(RT.DataProduct, name='L0_stream', description='L0 stream input to CTBP L1 transform', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj, stream_definition_id=L0_stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L1_stream_dp_obj = IonObject(RT.DataProduct, name='L1_stream', description='L1_stream output of CTBP L1 transform', temporal_domain = tdom, spatial_domain = sdom) L1_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L1_stream_dp_obj, stream_definition_id=L1_stream_def_id ) self.addCleanup(self.dataproduct_management.delete_data_product, L1_stream_dp_id) # We need the key name here to be "L1_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L1_stream when the config is used to launch the data process self.output_products = {'L1_stream' : L1_stream_dp_id} out_stream_ids, _ = self.resource_registry.find_objects(L1_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] config = self._create_calibration_coefficients_dict() dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products, config) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L1. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True) input_stream_id = stream_ids[0] input_stream = self.resource_registry.read(input_stream_id) stream_route = input_stream.stream_route log.debug("The input stream for the L1 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m,r,s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=L0_stream_def_id, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L1 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ rdt = RecordDictionaryTool.load_from_granule(granule) self.assertIn('pressure', rdt) self.assertIn('temp', rdt) self.assertIn('conductivity', rdt) self.assertIn('time', rdt)
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition( container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.ctd_stream_publisher', 'class': 'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class': 'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream( stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=sal_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery(stream_ids=[ ctd_stream_id, ]), exchange_name='salinity_transform_input' ) # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={ 'output': sal_stream_id, }, process_definition_id=salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform( transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process( process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ sal_stream_id, ]), exchange_name='salinity_test', name="test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser( stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
class TestTransformWorkerSubscriptions(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_multi_subscriptions(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_two', description='input test stream two') self.input_dp_two_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_two_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE self.subscription_one_id = self.pubsub_client.create_subscription( name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id) self.pubsub_client.activate_subscription(self.subscription_one_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) self.start_event_listener() #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO #create a queue to catch the published granules of stream TWO self.subscription_two_id = self.pubsub_client.create_subscription( name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id) self.pubsub_client.activate_subscription(self.subscription_two_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id) stream_route_two = self.pubsub_client.read_stream_route( self.stream_two_id) self.publisher_two = StandaloneStreamPublisher( stream_id=self.stream_two_id, stream_route=stream_route_two) #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #data process 2 adds salinity + pressure and puts the result in conductivity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [22] rdt['pressure'] = [4] rdt['salinity'] = [1] self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_two_transforms_inline(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id) #retrieve subscription from data process one subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #retrieve the Stream for these data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] #the input to data process two is the output from data process one stream_ids, assoc_ids = self.rrclient.find_objects( dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] # Run provenance on the output dataproduct of the second data process to see all the links # are as expected output_data_product_provenance = self.dataproductclient.get_data_product_provenance( dp2_func_output_dp_id) # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child. self.assertTrue(len(output_data_product_provenance) == 3) # confirm that the linking from the output dataproduct to input dataproduct is correct self.assertTrue( dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents']) self.assertTrue( self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents']) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #data process 1 adds conductivity + pressure and puts the result in salinity #data process 2 adds salinity + pressure and puts the result in conductivity self.start_event_listener() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) def create_data_process_definition(self): #two data processes using one transform and one DPD # Set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') return add_array_dpd_id def create_data_process_one(self, data_process_definition_id, output_dataproduct): # Create the data process #data process 1 adds conductivity + pressure and puts the result in salinity argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[self.input_dp_one_id], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) self.dp_list.append(dp1_data_process_id) return dp1_data_process_id def create_data_process_two(self, data_process_definition_id, input_dataproduct, output_dataproduct): # Create the data process #data process 2 adds salinity + pressure and puts the result in conductivity argument_map = {'arr1': 'salinity', 'arr2': 'pressure'} output_param = 'conductivity' dp2_func_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[input_dataproduct], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp2_func_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp2_func_data_process_id) self.dp_list.append(dp2_func_data_process_id) return dp2_func_data_process_id def create_output_data_products(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject(RT.DataProduct, name='data_process1_data_product', description='output of add array func') dp1_func_output_dp_id = self.dataproductclient.create_data_product( dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_one_id = stream_ids[0] dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id) dp2_func_output_dp_obj = IonObject( RT.DataProduct, name='data_process2_data_product', description='output of add array func') dp2_func_output_dp_id = self.dataproductclient.create_data_product( dp2_func_output_dp_obj, dp2_func_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp2_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_two_id = stream_ids[0] subscription_id = self.pubsub_client.create_subscription( 'validator', data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id, dp2_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s", str(data_process_event.__dict__)) #if data process already created, check origin if not 'data process assigned to transform worker' in data_process_event.description: self.assertIn(data_process_event.origin, self.dp_list) def validate_output_granule(self, msg, route, stream_id): self.assertTrue( stream_id in [self._output_stream_one_id, self._output_stream_two_id]) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule stream_id: %s', stream_id) if stream_id == self._output_stream_one_id: sal_val = rdt['salinity'] log.debug('validate_output_granule sal_val: %s', sal_val) np.testing.assert_array_equal(sal_val, np.array([3])) self.event1_verified.set() else: cond_val = rdt['conductivity'] log.debug('validate_output_granule cond_val: %s', cond_val) np.testing.assert_array_equal(cond_val, np.array([5])) self.event2_verified.set() def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop)
class RecordDictionaryIntegrationTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.rdt = None self.data_producer_id = None self.provider_metadata_update = None self.event = Event() def verify_incoming(self, m,r,s): rdt = RecordDictionaryTool.load_from_granule(m) for k,v in rdt.iteritems(): np.testing.assert_array_equal(v, self.rdt[k]) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set() def test_serialize_compatability(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) verified = Event() def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for k,v in rdt.iteritems(): self.assertIsInstance(rdt[k], np.ndarray) self.assertIsInstance(v, np.ndarray) verified.set() subscriber = StandaloneStreamSubscriber('sub1', callback=verifier) subscriber.start() self.addCleanup(subscriber.stop) publisher = StandaloneStreamPublisher(stream_id,route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) ph.fill_rdt(rdt,10) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(60)) def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20)) def test_filter(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields,['time','temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k,v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k])) def test_rdt_param_funcs(self): param_funcs = { 'identity' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.interpolation', 'function' : 'identity', 'args':['x'] }, 'ctd_tempwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_tempwat', 'args' : ['t0'] }, 'ctd_preswat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_preswat', 'args' : ["p0", "p_range_psia"] }, 'ctd_condwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_condwat', 'args' : ['c0'] }, 'ctd_pracsal' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_pracsal', 'args' : ['c', 't', 'p'] }, 'ctd_density' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_density', 'args' : ['SP','t','p','lat','lon'] } } pfunc_ids = {} for name, param_def in param_funcs.iteritems(): paramfunc = ParameterFunction(name, **param_def) pf_id = self.dataset_management.create_parameter_function(paramfunc) pfunc_ids[name] = pf_id params = { 'time' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float64', 'units' : 'seconds since 1900-01-01' }, 'temperature_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'pressure_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'conductivity_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'temperature' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_tempwat'], 'parameter_function_map' : { 't0' : 'temperature_counts'}, 'value_encoding' : 'float32', 'units' : 'deg_C' }, 'pressure' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_preswat'], 'parameter_function_map' : {'p0' : 'pressure_counts', 'p_range_psia' : 679.34040721}, 'value_encoding' : 'float32', 'units' : 'dbar' }, 'conductivity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_condwat'], 'parameter_function_map' : {'c0' : 'conductivity_counts'}, 'value_encoding' : 'float32', 'units' : 'Sm-1' }, 'salinity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_pracsal'], 'parameter_function_map' : {'c' : 'conductivity', 't' : 'temperature', 'p' : 'pressure'}, 'value_encoding' : 'float32', 'units' : '1' }, 'density' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_density'], 'parameter_function_map' : { 'SP' : 'salinity', 't' : 'temperature', 'p' : 'pressure', 'lat' : 'lat', 'lon' : 'lon' }, 'value_encoding' : 'float32', 'units' : 'kg m-1' }, 'lat' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_north' }, 'lon' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_east' } } param_dict = {} for name, param in params.iteritems(): pcontext = ParameterContext(name, **param) param_id = self.dataset_management.create_parameter(pcontext) param_dict[name] = param_id pdict_id = self.dataset_management.create_parameter_dictionary('ctd_test', param_dict.values(), 'time') stream_def_id = self.pubsub_management.create_stream_definition('ctd_test', parameter_dictionary_id=pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temperature_counts'] = [280000] rdt['conductivity_counts'] = [100000] rdt['pressure_counts'] = [2789] rdt['lat'] = [45] rdt['lon'] = [-71] np.testing.assert_allclose(rdt['density'], np.array([1001.00543606])) def test_rdt_lookup(self): rdt = self.create_lookup_rdt() self.assertTrue('offset_a' in rdt.lookup_values()) self.assertFalse('offset_b' in rdt.lookup_values()) rdt['time'] = [0] rdt['temp'] = [10.0] rdt['offset_a'] = [2.0] self.assertEquals(rdt['offset_b'], None) self.assertEquals(rdt.lookup_values(), ['offset_a']) np.testing.assert_array_almost_equal(rdt['calibrated'], np.array([12.0])) svm = StoredValueManager(self.container) svm.stored_value_cas('coefficient_document', {'offset_b':2.0}) svm.stored_value_cas("GA03FLMA-RI001-13-CTDMOG999_OFFSETC", {'offset_c':3.0}) rdt.fetch_lookup_values() np.testing.assert_array_equal(rdt['offset_b'], np.array([2.0])) np.testing.assert_array_equal(rdt['calibrated_b'], np.array([14.0])) np.testing.assert_array_equal(rdt['offset_c'], np.array([3.0])) def create_rdt(self): contexts, pfuncs = self.create_pfuncs() context_ids = list(contexts.itervalues()) pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt def create_lookup_rdt(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) return rdt def create_pfuncs(self): contexts = {} funcs = {} t_ctxt = ParameterContext(name='TIME', parameter_type='quantity', value_encoding='float64', units='seconds since 1900-01-01') t_ctxt_id = self.dataset_management.create_parameter(t_ctxt) contexts['TIME'] = t_ctxt_id lat_ctxt = ParameterContext(name='LAT', parameter_type="sparse", value_encoding='float32', units='degrees_north') lat_ctxt_id = self.dataset_management.create_parameter(lat_ctxt) contexts['LAT'] = lat_ctxt_id lon_ctxt = ParameterContext(name='LON', parameter_type='sparse', value_encoding='float32', units='degrees_east') lon_ctxt_id = self.dataset_management.create_parameter(lon_ctxt) contexts['LON'] = lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext(name='TEMPWAT_L0', parameter_type='quantity', value_encoding='float32', units='deg_C') temp_ctxt_id = self.dataset_management.create_parameter(temp_ctxt) contexts['TEMPWAT_L0'] = temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext(name='CONDWAT_L0', parameter_type='quantity', value_encoding='float32', units='S m-1') cond_ctxt_id = self.dataset_management.create_parameter(cond_ctxt) contexts['CONDWAT_L0'] = cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext(name='PRESWAT_L0', parameter_type='quantity', value_encoding='float32', units='dbar') press_ctxt_id = self.dataset_management.create_parameter(press_ctxt) contexts['PRESWAT_L0'] = press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' expr = ParameterFunction(name='TEMPWAT_L1', function_type=PFT.NUMEXPR, function=tl1_func, args=['T']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['TEMPWAT_L1'] = expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} tempL1_ctxt = ParameterContext(name='TEMPWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=tl1_pmap, value_encoding='float32', units='deg_C') tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1_ctxt) contexts['TEMPWAT_L1'] = tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' expr = ParameterFunction(name='CONDWAT_L1', function_type=PFT.NUMEXPR, function=cl1_func, args=['C']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['CONDWAT_L1'] = expr_id cl1_pmap = {'C': 'CONDWAT_L0'} condL1_ctxt = ParameterContext(name='CONDWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=cl1_pmap, value_encoding='float32', units='S m-1') condL1_ctxt_id = self.dataset_management.create_parameter(condL1_ctxt) contexts['CONDWAT_L1'] = condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' expr = ParameterFunction(name='PRESWAT_L1',function=pl1_func,function_type=PFT.NUMEXPR,args=['P','p_range']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['PRESWAT_L1'] = expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} presL1_ctxt = ParameterContext(name='PRESWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=pl1_pmap, value_encoding='float32', units='S m-1') presL1_ctxt_id = self.dataset_management.create_parameter(presL1_ctxt) contexts['PRESWAT_L1'] = presL1_ctxt_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] expr = ParameterFunction(name='PRACSAL',function_type=PFT.PYTHON,function=sal_func,owner=owner,args=sal_arglist) expr_id = self.dataset_management.create_parameter_function(expr) funcs['PRACSAL'] = expr_id c10_f = ParameterFunction(name='c10', function_type=PFT.NUMEXPR, function='C*10', args=['C']) expr_id = self.dataset_management.create_parameter_function(c10_f) c10 = ParameterContext(name='c10', parameter_type='function', parameter_function_id=expr_id, parameter_function_map={'C':'CONDWAT_L1'}, value_encoding='float32', units='1') c10_id = self.dataset_management.create_parameter(c10) contexts['c10'] = c10_id # A magic function that may or may not exist actually forms the line below at runtime. sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_ctxt = ParameterContext(name='PRACSAL', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=sal_pmap, value_encoding='float32', units='g kg-1') sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt) contexts['PRACSAL'] = sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = CoverageParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name='DENSITY', parameter_context=dens_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id) contexts['DENSITY'] = dens_ctxt_id return contexts, funcs
def test_raw_stream_integration(self): cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there was one stream definitions... ### # create a stream definition for the data from the ctd simulator raw_ctd_stream_def = SBE37_RAW_stream_definition() raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition( container=raw_ctd_stream_def, name='Simulated RAW CTD data') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': 'ion.processes.data.raw_stream_publisher', 'class': 'RawStreamPublisher' } raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition( process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), number_of_workers=1) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream raw_ctd_stream_id = pubsub_management_service.create_stream( stream_definition_id=raw_ctd_stream_def_id) # Set up the datasets raw_ctd_dataset_id = dataset_management_service.create_dataset( stream_id=raw_ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule') # Configure ingestion of this dataset raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id=raw_ctd_dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id= ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': raw_ctd_stream_id, } } raw_sim_pid = process_dispatcher.schedule_process( process_definition_id=raw_ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### raw_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([ raw_ctd_stream_id, ]), exchange_name='raw_test', name="test raw subscription", ) # this is okay - even in cei mode! pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Raw data received!') results.append(message) if len(results) > 3: result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='raw_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription( subscription_id=raw_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process( raw_sim_pid ) # kill the ctd simulator process - that is enough data gevent.sleep(1) for message in results: sha1 = message.identifiables['stream_encoding'].sha1 data = message.identifiables['data_stream'].values filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw") with open(filename, 'r') as f: assertions(data == f.read())
class PubsubManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.pdicts = {} self.queue_cleanup = list() self.exchange_cleanup = list() def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() def test_stream_def_crud(self): # Test Creation pdict = DatasetManagementService.get_parameter_dictionary_by_name( 'ctd_parsed_param_dict') stream_definition_id = self.pubsub_management.create_stream_definition( 'ctd parsed', parameter_dictionary_id=pdict.identifier) # Make sure there is an assoc self.assertTrue( self.resource_registry.find_associations( subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True)) # Test Reading stream_definition = self.pubsub_management.read_stream_definition( stream_definition_id) self.assertTrue( PubsubManagementService._compare_pdicts( pdict.dump(), stream_definition.parameter_dictionary)) # Test Deleting self.pubsub_management.delete_stream_definition(stream_definition_id) self.assertFalse( self.resource_registry.find_associations( subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True)) # Test comparisons in_stream_definition_id = self.pubsub_management.create_stream_definition( 'L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time', 'temp', 'conductivity', 'pressure']) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id) out_stream_definition_id = in_stream_definition_id self.assertTrue( self.pubsub_management.compare_stream_definition( in_stream_definition_id, out_stream_definition_id)) self.assertTrue( self.pubsub_management.compatible_stream_definitions( in_stream_definition_id, out_stream_definition_id)) out_stream_definition_id = self.pubsub_management.create_stream_definition( 'L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time', 'salinity', 'density']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id) self.assertFalse( self.pubsub_management.compare_stream_definition( in_stream_definition_id, out_stream_definition_id)) self.assertTrue( self.pubsub_management.compatible_stream_definitions( in_stream_definition_id, out_stream_definition_id)) def test_validate_stream_defs(self): #test no input incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test input with no output incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test available field missing parameter context definition -- missing PRESWAT_L0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['DENSITY'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test l1 from l0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test l2 from l0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test Ln from L0 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = [ 'DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1' ] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L2 from L1 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1' ] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L1 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict( ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict( ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L1 incoming_pdict_id = self._get_pdict( ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL']) available_fields_in = [ 'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0' ] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition( 'in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) outgoing_stream_def_id = self.pubsub_management.create_stream_definition( 'out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) result = self.pubsub_management.validate_stream_defs( incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg) def test_stream_crud(self): stream_def_id = self.pubsub_management.create_stream_definition( 'test_definition', stream_type='stream') topic_id = self.pubsub_management.create_topic( name='test_topic', exchange_point='test_exchange') self.exchange_cleanup.append('test_exchange') topic2_id = self.pubsub_management.create_topic( name='another_topic', exchange_point='outside') stream_id, route = self.pubsub_management.create_stream( name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id) topics, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertEquals(topics, [topic_id]) defs, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertTrue(len(defs)) stream = self.pubsub_management.read_stream(stream_id) self.assertEquals(stream.name, 'test_stream') self.pubsub_management.delete_stream(stream_id) with self.assertRaises(NotFound): self.pubsub_management.read_stream(stream_id) defs, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertFalse(len(defs)) topics, assocs = self.resource_registry.find_objects( subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertFalse(len(topics)) self.pubsub_management.delete_topic(topic_id) self.pubsub_management.delete_topic(topic2_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_subscription_crud(self): stream_def_id = self.pubsub_management.create_stream_definition( 'test_definition', stream_type='stream') stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id) subscription_id = self.pubsub_management.create_subscription( name='test subscription', stream_ids=[stream_id], exchange_name='test_queue') self.exchange_cleanup.append('test_exchange') subs, assocs = self.resource_registry.find_objects( subject=subscription_id, predicate=PRED.hasStream, id_only=True) self.assertEquals(subs, [stream_id]) res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True) self.assertEquals(len(res), 1) subs, assocs = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(subs[0], res[0]) subscription = self.pubsub_management.read_subscription( subscription_id) self.assertEquals(subscription.exchange_name, 'test_queue') self.pubsub_management.delete_subscription(subscription_id) subs, assocs = self.resource_registry.find_objects( subject=subscription_id, predicate=PRED.hasStream, id_only=True) self.assertFalse(len(subs)) subs, assocs = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertFalse(len(subs)) self.pubsub_management.delete_stream(stream_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_move_before_activate(self): stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving before activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription( 'first_queue', stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving after activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription( 'first_queue', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m, r, s): self.assertEquals(m, 'verified') self.verified.set() subscriber = StandaloneStreamSubscriber('second_queue', verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('verified') self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_queue_cleanup(self): stream_id, route = self.pubsub_management.create_stream( 'test_stream', 'xp1') xn_objs, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='queue1') for xn_obj in xn_objs: xn = self.container.ex_manager.create_xn_queue(xn_obj.name) xn.delete() subscription_id = self.pubsub_management.create_subscription( 'queue1', stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids), 1) self.pubsub_management.delete_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids), 0) def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream( 'stream1', 'xp1') subscription_id = self.pubsub_management.create_subscription( 'sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m, r, s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1', verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_topic_crud(self): topic_id = self.pubsub_management.create_topic( name='test_topic', exchange_point='test_xp') self.exchange_cleanup.append('test_xp') topic = self.pubsub_management.read_topic(topic_id) self.assertEquals(topic.name, 'test_topic') self.assertEquals(topic.exchange_point, 'test_xp') self.pubsub_management.delete_topic(topic_id) with self.assertRaises(NotFound): self.pubsub_management.read_topic(topic_id) def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m, r, s): self.sub1_sat.set() def subscriber2(m, r, s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() sub2 = StandaloneStreamSubscriber('sub2', subscriber2) self.queue_cleanup.append(sub2.xn.queue) sub2.start() log_topic = self.pubsub_management.create_topic( 'instrument_logs', exchange_point='instruments') science_topic = self.pubsub_management.create_topic( 'science_data', exchange_point='instruments') events_topic = self.pubsub_management.create_topic( 'notifications', exchange_point='events') log_stream, route = self.pubsub_management.create_stream( 'instrument1-logs', topic_ids=[log_topic], exchange_point='instruments') ctd_stream, route = self.pubsub_management.create_stream( 'instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments') event_stream, route = self.pubsub_management.create_stream( 'notifications', topic_ids=[events_topic], exchange_point='events') raw_stream, route = self.pubsub_management.create_stream( 'temp', exchange_point='global.data') self.exchange_cleanup.extend(['instruments', 'events', 'global.data']) subscription1 = self.pubsub_management.create_subscription( 'subscription1', stream_ids=[log_stream, event_stream], exchange_name='sub1') subscription2 = self.pubsub_management.create_subscription( 'subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2') self.pubsub_management.activate_subscription(subscription1) self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) sub1.stop() sub2.stop() def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m, r, s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber('sub1', subscriber1) self.queue_cleanup.append(sub1.xn.queue) sub1.start() topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1') topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1) topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1) topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2) topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2) topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3) topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3) # Tree 2 topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2') topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8) topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9) topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9) topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11) topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11) self.exchange_cleanup.extend(['xp1', 'xp2']) stream1_id, route = self.pubsub_management.create_stream( 'stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1') stream2_id, route = self.pubsub_management.create_stream( 'stream2', topic_ids=[topic8], exchange_point='xp2') stream3_id, route = self.pubsub_management.create_stream( 'stream3', topic_ids=[topic10, topic13], exchange_point='xp2') stream4_id, route = self.pubsub_management.create_stream( 'stream4', topic_ids=[topic9], exchange_point='xp2') stream5_id, route = self.pubsub_management.create_stream( 'stream5', topic_ids=[topic11], exchange_point='xp2') subscription1 = self.pubsub_management.create_subscription( 'sub1', topic_ids=[topic1]) subscription2 = self.pubsub_management.create_subscription( 'sub2', topic_ids=[topic8], exchange_name='sub1') subscription3 = self.pubsub_management.create_subscription( 'sub3', topic_ids=[topic9], exchange_name='sub1') subscription4 = self.pubsub_management.create_subscription( 'sub4', topic_ids=[topic10, topic13, topic11], exchange_name='sub1') #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id, 1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) self.pubsub_management.delete_subscription(subscription1) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id, 2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) self.pubsub_management.delete_subscription(subscription2) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10), 4) self.pubsub_management.deactivate_subscription(subscription3) self.pubsub_management.delete_subscription(subscription3) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10), 6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) self.pubsub_management.delete_subscription(subscription4) #-------------------------------------------------------------------------------- sub1.stop() self.pubsub_management.delete_topic(topic13) self.pubsub_management.delete_topic(topic12) self.pubsub_management.delete_topic(topic11) self.pubsub_management.delete_topic(topic10) self.pubsub_management.delete_topic(topic9) self.pubsub_management.delete_topic(topic8) self.pubsub_management.delete_topic(topic7) self.pubsub_management.delete_topic(topic6) self.pubsub_management.delete_topic(topic5) self.pubsub_management.delete_topic(topic4) self.pubsub_management.delete_topic(topic3) self.pubsub_management.delete_topic(topic2) self.pubsub_management.delete_topic(topic1) self.pubsub_management.delete_stream(stream1_id) self.pubsub_management.delete_stream(stream2_id) self.pubsub_management.delete_stream(stream3_id) self.pubsub_management.delete_stream(stream4_id) self.pubsub_management.delete_stream(stream5_id) def _get_pdict(self, filter_values): t_ctxt = ParameterContext( 'TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context( name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', unit_of_measure=t_ctxt.uom) lat_ctxt = ParameterContext( 'LAT', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context( name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lat_ctxt.uom) lon_ctxt = ParameterContext( 'LON', param_type=ConstantType( QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context( name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=lon_ctxt.uom) # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext( 'TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context( name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=temp_ctxt.uom) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext( 'CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context( name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=cond_ctxt.uom) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext( 'PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context( name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', unit_of_measure=press_ctxt.uom) # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' tl1_pmap = {'T': 'TEMPWAT_L0'} expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap) tempL1_ctxt = ParameterContext( 'TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context( name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=tempL1_ctxt.uom) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' cl1_pmap = {'C': 'CONDWAT_L0'} expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap) condL1_ctxt = ParameterContext( 'CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context( name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=condL1_ctxt.uom) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap) presL1_ctxt = ParameterContext( 'PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context( name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', unit_of_measure=presL1_ctxt.uom) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] sal_pmap = { 'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1' } sal_kwargmap = None expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap) sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context( name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', unit_of_measure=sal_ctxt.uom) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT']) cons_temp_expr = PythonFunction( 'cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction( 'DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext( 'DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context( name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', unit_of_measure=dens_ctxt.uom) ids = [ t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id ] contexts = [ t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt ] context_ids = [ ids[i] for i, ctxt in enumerate(contexts) if ctxt.name in filter_values ] pdict_name = '_'.join( [ctxt.name for ctxt in contexts if ctxt.name in filter_values]) try: self.pdicts[pdict_name] return self.pdicts[pdict_name] except KeyError: pdict_id = self.dataset_management.create_parameter_dictionary( pdict_name, parameter_context_ids=context_ids, temporal_context='time') self.pdicts[pdict_name] = pdict_id return pdict_id
class TestWorkflowManagementIntegration(IonIntegrationTestCase): def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.pubsubclient = PubsubManagementServiceClient( node=self.container.node) self.ingestclient = IngestionManagementServiceClient( node=self.container.node) self.imsclient = InstrumentManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.datasetclient = DatasetManagementServiceClient( node=self.container.node) self.workflowclient = WorkflowManagementServiceClient( node=self.container.node) self.process_dispatcher = ProcessDispatcherServiceClient( node=self.container.node) self.ctd_stream_def = SBE37_CDM_stream_definition() def _create_ctd_input_stream_and_data_product( self, data_product_name='ctd_parsed'): cc = self.container assertions = self.assertTrue #------------------------------- # Create CTD Parsed as the initial data product #------------------------------- # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition( container=self.ctd_stream_def, name='Simulated CTD data') log.debug('Creating new CDM data product with a stream definition') dp_obj = IonObject(RT.DataProduct, name=data_product_name, description='ctd stream test') try: ctd_parsed_data_product_id = self.dataproductclient.create_data_product( dp_obj, ctd_stream_def_id) except Exception as ex: self.fail("failed to create new data product: %s" % ex) log.debug('new ctd_parsed_data_product_id = ', ctd_parsed_data_product_id) #Only ever need one device for testing purposes. instDevice_obj, _ = self.rrclient.find_resources( restype=RT.InstrumentDevice, name='SBE37IMDevice') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345") instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.damsclient.assign_data_product( input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product_id) self.dataproductclient.activate_data_product_persistence( data_product_id=ctd_parsed_data_product_id, persist_data=False, persist_metadata=False) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0) ctd_stream_id = stream_ids[0] return ctd_stream_id, ctd_parsed_data_product_id def _start_simple_input_stream_process(self, ctd_stream_id): return self._start_input_stream_process(ctd_stream_id) def _start_sinusoidal_input_stream_process(self, ctd_stream_id): return self._start_input_stream_process( ctd_stream_id, 'ion.processes.data.sinusoidal_stream_publisher', 'SinusoidalCtdPublisher') def _start_input_stream_process( self, ctd_stream_id, module='ion.processes.data.ctd_stream_publisher', class_name='SimpleCtdPublisher'): ### ### Start the process for producing the CTD data ### # process definition for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module': module, 'class': class_name } ctd_sim_procdef_id = self.process_dispatcher.create_process_definition( process_definition=producer_definition) # Start the ctd simulator to produce some data configuration = { 'process': { 'stream_id': ctd_stream_id, } } ctd_sim_pid = self.process_dispatcher.schedule_process( process_definition_id=ctd_sim_procdef_id, configuration=configuration) return ctd_sim_pid def _start_output_stream_listener(self, data_product_stream_ids, message_count_per_stream=10): cc = self.container assertions = self.assertTrue ### ### Make a subscriber in the test to listen for transformed data ### salinity_subscription_id = self.pubsubclient.create_subscription( query=StreamQuery(data_product_stream_ids), exchange_name='workflow_test', name="test workflow transformations", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn(' data received!') results.append(message) if len(results) >= len( data_product_stream_ids ) * message_count_per_stream: #Only wait for so many messages - per stream result.set(True) subscriber = subscriber_registrar.create_subscriber( exchange_name='workflow_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription( subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=30)) self.pubsubclient.deactivate_subscription( subscription_id=salinity_subscription_id) subscriber.stop() return results def _validate_messages(self, results): cc = self.container assertions = self.assertTrue first_salinity_values = None for message in results: try: psd = PointSupplementStreamParser( stream_definition=self.ctd_stream_def, stream_granule=message) temp = psd.get_values('temperature') log.info(psd.list_field_names()) except KeyError as ke: temp = None if temp is not None: assertions(isinstance(temp, numpy.ndarray)) log.info('temperature=' + str(numpy.nanmin(temp))) first_salinity_values = None else: psd = PointSupplementStreamParser( stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) log.info(psd.list_field_names()) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') log.info('salinity=' + str(numpy.nanmin(salinity))) assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0 if first_salinity_values is None: first_salinity_values = salinity.tolist() else: second_salinity_values = salinity.tolist() assertions( len(first_salinity_values) == len( second_salinity_values)) for idx in range(0, len(first_salinity_values)): assertions(first_salinity_values[idx] * 2.0 == second_salinity_values[idx]) def _create_salinity_data_process_definition(self): # Salinity: Data Process Definition #First look to see if it exists and if not, then create it dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='ctd_salinity') if len(dpd) > 0: return dpd[0] log.debug("Create data process definition SalinityTransform") dpd_obj = IonObject( RT.DataProcessDefinition, name='ctd_salinity', description='create a salinity data product', module='ion.processes.data.transforms.ctd.ctd_L2_salinity', class_name='SalinityTransform', process_source='SalinityTransform source code here...') try: ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition( dpd_obj) except Excpetion as ex: self.fail( "failed to create new SalinityTransform data process definition: %s" % ex) # create a stream definition for the data from the salinity Transform sal_stream_def_id = self.pubsubclient.create_stream_definition( container=SalinityTransform.outgoing_stream_def, name='Salinity') self.dataprocessclient.assign_stream_definition_to_data_process_definition( sal_stream_def_id, ctd_L2_salinity_dprocdef_id) return ctd_L2_salinity_dprocdef_id def _create_salinity_doubler_data_process_definition(self): #First look to see if it exists and if not, then create it dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='salinity_doubler') if len(dpd) > 0: return dpd[0] # Salinity Doubler: Data Process Definition log.debug("Create data process definition SalinityDoublerTransform") dpd_obj = IonObject( RT.DataProcessDefinition, name='salinity_doubler', description='create a salinity doubler data product', module='ion.processes.data.transforms.example_double_salinity', class_name='SalinityDoubler', process_source='SalinityDoubler source code here...') try: salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition( dpd_obj) except Exception as ex: self.fail( "failed to create new SalinityDoubler data process definition: %s" % ex) # create a stream definition for the data from the salinity Transform salinity_double_stream_def_id = self.pubsubclient.create_stream_definition( container=SalinityDoubler.outgoing_stream_def, name='SalinityDoubler') self.dataprocessclient.assign_stream_definition_to_data_process_definition( salinity_double_stream_def_id, salinity_doubler_dprocdef_id) return salinity_doubler_dprocdef_id def create_transform_process(self, data_process_definition_id, data_process_input_dp_id): data_process_definition = self.rrclient.read( data_process_definition_id) # Find the link between the output Stream Definition resource and the Data Process Definition resource stream_ids, _ = self.rrclient.find_objects(data_process_definition._id, PRED.hasStreamDefinition, RT.StreamDefinition, id_only=True) if not stream_ids: raise Inconsistent( "The data process definition %s is missing an association to an output stream definition" % data_process_definition._id) process_output_stream_def_id = stream_ids[0] #Concatenate the name of the workflow and data process definition for the name of the data product output data_process_name = data_process_definition.name # Create the output data product of the transform transform_dp_obj = IonObject( RT.DataProduct, name=data_process_name, description=data_process_definition.description) transform_dp_id = self.dataproductclient.create_data_product( transform_dp_obj, process_output_stream_def_id) self.dataproductclient.activate_data_product_persistence( data_product_id=transform_dp_id, persist_data=True, persist_metadata=True) #last one out of the for loop is the output product id output_data_product_id = transform_dp_id # Create the transform data process log.debug("create data_process and start it") data_process_id = self.dataprocessclient.create_data_process( data_process_definition._id, [data_process_input_dp_id], {'output': transform_dp_id}) self.dataprocessclient.activate_data_process(data_process_id) #Find the id of the output data stream stream_ids, _ = self.rrclient.find_objects(transform_dp_id, PRED.hasStream, None, True) if not stream_ids: raise Inconsistent( "The data process %s is missing an association to an output stream" % data_process_id) return data_process_id, output_data_product_id @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') #@unittest.skip("Skipping for debugging ") def test_SA_transform_components(self): assertions = self.assertTrue #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) ### ### Setup the first transformation ### # Salinity: Data Process Definition ctd_L2_salinity_dprocdef_id = self._create_salinity_data_process_definition( ) l2_salinity_all_data_process_id, ctd_l2_salinity_output_dp_id = self.create_transform_process( ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product_id) ## get the stream id for the transform outputs stream_ids, _ = self.rrclient.find_objects( ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0) sal_stream_id = stream_ids[0] data_product_stream_ids.append(sal_stream_id) ### ### Setup the second transformation ### # Salinity Doubler: Data Process Definition salinity_doubler_dprocdef_id = self._create_salinity_doubler_data_process_definition( ) salinity_double_data_process_id, salinity_doubler_output_dp_id = self.create_transform_process( salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id) stream_ids, _ = self.rrclient.find_objects( salinity_doubler_output_dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0) sal_dbl_stream_id = stream_ids[0] data_product_stream_ids.append(sal_dbl_stream_id) #Start the input stream process ctd_sim_pid = self._start_simple_input_stream_process(ctd_stream_id) #Start te output stream listener to monitor and verify messages results = self._start_output_stream_listener(data_product_stream_ids) #Stop the transform processes self.dataprocessclient.deactivate_data_process( salinity_double_data_process_id) self.dataprocessclient.deactivate_data_process( l2_salinity_all_data_process_id) # stop the flow parse the messages... self.process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data #Validate the data from each of the messages along the way self._validate_messages(results) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') #@unittest.skip("Skipping for debugging ") def test_transform_workflow(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject( RT.WorkflowDefinition, name='Salinity_Test_Workflow', description='tests a workflow of multiple transform data processes' ) workflow_data_product_name = 'TEST-Workflow_Output_Product' #Set a specific output product name #Add a transformation process definition ctd_L2_salinity_dprocdef_id = self._create_salinity_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=ctd_L2_salinity_dprocdef_id, persist_process_output_data=False ) #Don't persist the intermediate data product workflow_def_obj.workflow_steps.append(workflow_step_obj) #Add a transformation process definition salinity_doubler_dprocdef_id = self._create_salinity_doubler_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=salinity_doubler_dprocdef_id, output_data_product_name=workflow_data_product_name) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) aids = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition) assertions(len(aids) == 2) #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id, timeout=30) workflow_output_ids, _ = self.rrclient.find_subjects( RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1) #Verify the output data product name matches what was specified in the workflow definition workflow_product = self.rrclient.read(workflow_product_id) assertions(workflow_product.name == workflow_data_product_name) #Walk the associations to find the appropriate output data streams to validate the messages workflow_dp_ids, _ = self.rrclient.find_objects( workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 2) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1) data_product_stream_ids.append(stream_ids[0]) #Start the input stream process ctd_sim_pid = self._start_simple_input_stream_process(ctd_stream_id) #Start the output stream listener to monitor and verify messages results = self._start_output_stream_listener(data_product_stream_ids) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id, False, timeout=15) # Should test true at some point #Make sure the Workflow object was removed objs, _ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(objs) == 0) # stop the flow parse the messages... self.process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data #Validate the data from each of the messages along the way self._validate_messages(results) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids, _ = self.rrclient.find_resources( restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0) def _create_google_dt_data_process_definition(self): #First look to see if it exists and if not, then create it dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='google_dt_transform') if len(dpd) > 0: return dpd[0] # Data Process Definition log.debug("Create data process definition GoogleDtTransform") dpd_obj = IonObject( RT.DataProcessDefinition, name='google_dt_transform', description='Convert data streams to Google DataTables', module='ion.processes.data.transforms.viz.google_dt', class_name='VizTransformGoogleDT', process_source='VizTransformGoogleDT source code here...') try: procdef_id = self.dataprocessclient.create_data_process_definition( dpd_obj) except Exception as ex: self.fail( "failed to create new VizTransformGoogleDT data process definition: %s" % ex) # create a stream definition for the data from the stream_def_id = self.pubsubclient.create_stream_definition( container=VizTransformGoogleDT.outgoing_stream_def, name='VizTransformGoogleDT') self.dataprocessclient.assign_stream_definition_to_data_process_definition( stream_def_id, procdef_id) return procdef_id def _validate_google_dt_results(self, results_stream_def, results): cc = self.container assertions = self.assertTrue for g in results: if isinstance(g, Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) #log.warn(tx.pretty_print()) #log.warn(rdt.pretty_print()) gdt_component = rdt['google_dt_components'][0] assertions( gdt_component['viz_product_type'] == 'google_realtime_dt') gdt_description = gdt_component['data_table_description'] gdt_content = gdt_component['data_table_content'] assertions(gdt_description[0][0] == 'time') assertions(len(gdt_description) > 1) assertions(len(gdt_content) >= 0) return def _create_mpl_graphs_data_process_definition(self): #First look to see if it exists and if not, then create it dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='mpl_graphs_transform') if len(dpd) > 0: return dpd[0] #Data Process Definition log.debug("Create data process definition MatplotlibGraphsTransform") dpd_obj = IonObject( RT.DataProcessDefinition, name='mpl_graphs_transform', description='Convert data streams to Matplotlib graphs', module='ion.processes.data.transforms.viz.matplotlib_graphs', class_name='VizTransformMatplotlibGraphs', process_source='VizTransformMatplotlibGraphs source code here...') try: procdef_id = self.dataprocessclient.create_data_process_definition( dpd_obj) except Exception as ex: self.fail( "failed to create new VizTransformMatplotlibGraphs data process definition: %s" % ex) # create a stream definition for the data stream_def_id = self.pubsubclient.create_stream_definition( container=VizTransformMatplotlibGraphs.outgoing_stream_def, name='VizTransformMatplotlibGraphs') self.dataprocessclient.assign_stream_definition_to_data_process_definition( stream_def_id, procdef_id) return procdef_id def _validate_mpl_graphs_results(self, results_stream_def, results): cc = self.container assertions = self.assertTrue for g in results: if isinstance(g, Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) #log.warn(tx.pretty_print()) #log.warn(rdt.pretty_print()) graphs = rdt['matplotlib_graphs'] for graph in graphs: assertions( graph['viz_product_type'] == 'matplotlib_graphs') # check to see if the list (numpy array) contians actual images assertions( imghdr.what(graph['image_name'], graph['image_obj']) == 'png') return @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') #unittest.skip("Skipping for debugging ") def test_google_dt_transform_workflow(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject( RT.WorkflowDefinition, name='GoogleDT_Test_Workflow', description= 'Tests the workflow of converting stream data to Google DT') #Add a transformation process definition google_dt_procdef_id = self._create_google_dt_data_process_definition() workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id) workflow_output_ids, _ = self.rrclient.find_subjects( RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1) #Walk the associations to find the appropriate output data streams to validate the messages workflow_dp_ids, _ = self.rrclient.find_objects( workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 1) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1) data_product_stream_ids.append(stream_ids[0]) #Start the input stream process ctd_sim_pid = self._start_simple_input_stream_process(ctd_stream_id) #Start the output stream listener to monitor and verify messages results = self._start_output_stream_listener(data_product_stream_ids) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id, False) # Should test true at some point # stop the flow parse the messages... self.process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data #Validate the data from each of the messages along the way self._validate_google_dt_results( VizTransformGoogleDT.outgoing_stream_def, results) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids, _ = self.rrclient.find_resources( restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') #@unittest.skip("Skipping for debugging ") def test_mpl_graphs_transform_workflow(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject( RT.WorkflowDefinition, name='Mpl_Graphs_Test_Workflow', description= 'Tests the workflow of converting stream data to Matplotlib graphs' ) #Add a transformation process definition mpl_graphs_procdef_id = self._create_mpl_graphs_data_process_definition( ) workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=mpl_graphs_procdef_id) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product( ) data_product_stream_ids.append(ctd_stream_id) #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id) workflow_output_ids, _ = self.rrclient.find_subjects( RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True) assertions(len(workflow_output_ids) == 1) #Walk the associations to find the appropriate output data streams to validate the messages workflow_dp_ids, _ = self.rrclient.find_objects( workflow_id, PRED.hasDataProduct, RT.DataProduct, True) assertions(len(workflow_dp_ids) == 1) for dp_id in workflow_dp_ids: stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True) assertions(len(stream_ids) == 1) data_product_stream_ids.append(stream_ids[0]) #Start the input stream process ctd_sim_pid = self._start_sinusoidal_input_stream_process( ctd_stream_id) #Start the output stream listener to monitor and verify messages results = self._start_output_stream_listener(data_product_stream_ids) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id, False) # Should test true at some point # stop the flow parse the messages... self.process_dispatcher.cancel_process( ctd_sim_pid ) # kill the ctd simulator process - that is enough data #Validate the data from each of the messages along the way self._validate_mpl_graphs_results( VizTransformGoogleDT.outgoing_stream_def, results) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids, _ = self.rrclient.find_resources( restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Not integrated for CEI') #@unittest.skip("Skipping for debugging ") def test_multiple_workflow_instances(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject( RT.WorkflowDefinition, name='Multiple_Test_Workflow', description='Tests the workflow of converting stream data') #Add a transformation process definition google_dt_procdef_id = self._create_google_dt_data_process_definition() workflow_step_obj = IonObject( 'DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition( workflow_def_obj) #The list of data product streams to monitor data_product_stream_ids = list() #Create the first input data product ctd_stream_id1, ctd_parsed_data_product_id1 = self._create_ctd_input_stream_and_data_product( 'ctd_parsed1') data_product_stream_ids.append(ctd_stream_id1) #Create and start the first workflow workflow_id1, workflow_product_id1 = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id1) #Create the second input data product ctd_stream_id2, ctd_parsed_data_product_id2 = self._create_ctd_input_stream_and_data_product( 'ctd_parsed2') data_product_stream_ids.append(ctd_stream_id2) #Create and start the first workflow workflow_id2, workflow_product_id2 = self.workflowclient.create_data_process_workflow( workflow_def_id, ctd_parsed_data_product_id2) #Walk the associations to find the appropriate output data streams to validate the messages workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(workflow_ids) == 2) #Start the first input stream process ctd_sim_pid1 = self._start_sinusoidal_input_stream_process( ctd_stream_id1) #Start the second input stream process ctd_sim_pid2 = self._start_simple_input_stream_process(ctd_stream_id2) #Start the output stream listener to monitor a set number of messages being sent through the workflows results = self._start_output_stream_listener( data_product_stream_ids, message_count_per_stream=5) # stop the flow of messages... self.process_dispatcher.cancel_process( ctd_sim_pid1 ) # kill the ctd simulator process - that is enough data self.process_dispatcher.cancel_process(ctd_sim_pid2) #Stop the first workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id1, False) # Should test true at some point #Stop the second workflow processes self.workflowclient.terminate_data_process_workflow( workflow_id2, False) # Should test true at some point workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow) assertions(len(workflow_ids) == 0) #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) workflow_def_ids, _ = self.rrclient.find_resources( restype=RT.WorkflowDefinition) assertions(len(workflow_def_ids) == 0) aid_list = self.rrclient.find_associations( workflow_def_id, PRED.hasDataProcessDefinition) assertions(len(aid_list) == 0)
class TestTransformPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url( 'res/deploy/r2deploy.yml') # Because hey why not?! self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() def setup_streams(self): in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'sbe37_L0_test', id_only=True) out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'sbe37_L1_test', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition( 'L0 SBE37', parameter_dictionary_id=in_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition( 'L1 SBE37', parameter_dictionary_id=out_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream( 'L0 input', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream( 'L0 output', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_advanced_streams(self): in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'sbe37_LC_TEST', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition( 'sbe37_instrument', parameter_dictionary_id=in_pdict_id, available_fields=[ 'time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon' ]) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition( 'sbe37_l2', parameter_dictionary_id=out_pdict_id, available_fields=['time', 'rho', 'PRACSAL_L2']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream( 'instrument stream', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream( 'data product stream', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def preload(self): config = DotDict() config.op = 'load' config.scenario = 'BASE,LC_TEST' config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary' config.path = 'res/preload/r2_ioc' self.container.spawn_process('preload', 'ion.processes.bootstrap.ion_loader', 'IONLoader', config) def setup_advanced_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_advanced_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)] = None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id: out_stream_id} sub_id = self.pubsub_management.create_subscription( queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process( 'transform_prime', 'ion.processes.data.transforms.transform_prime', 'TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription( 'listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)] = None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id: out_stream_id} sub_id = self.pubsub_management.create_subscription( queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process( 'transform_prime', 'ion.processes.data.transforms.transform_prime', 'TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription( 'listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_validator(self, validator): listener = StandaloneStreamSubscriber('listener', validator) listener.start() self.addCleanup(listener.stop) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
class PubsubManagementIntTest(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.pdicts = {} self.queue_cleanup = list() self.exchange_cleanup = list() self.context_ids = set() def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() self.cleanup_contexts() def test_stream_def_crud(self): # Test Creation pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') stream_definition_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict.identifier) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_definition_id) # Make sure there is an assoc self.assertTrue(self.resource_registry.find_associations(subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True)) # Test Reading stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id) self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary)) # Test comparisons in_stream_definition_id = self.pubsub_management.create_stream_definition('L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time','temp','conductivity','pressure']) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id) out_stream_definition_id = in_stream_definition_id self.assertTrue(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)) self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)) out_stream_definition_id = self.pubsub_management.create_stream_definition('L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time','salinity','density']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id) self.assertFalse(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id)) self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id)) @unittest.skip('Needs to be refactored for cleanup') def test_validate_stream_defs(self): self.addCleanup(self.cleanup_contexts) #test no input incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = [] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test input with no output incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = [] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test available field missing parameter context definition -- missing PRESWAT_L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test l1 from l0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test l2 from l0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test Ln from L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L2 from L1 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertTrue(result) #test L1 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L0 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']) available_fields_in = ['TIME', 'LAT', 'LON'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) #test L2 from L0 missing L1 incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']) outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL']) available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['DENSITY', 'PRACSAL'] incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in) self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id) outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out) self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id) result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id) self.assertFalse(result) def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg) def test_stream_crud(self): stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream') self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_exchange') self.addCleanup(self.pubsub_management.delete_topic, topic_id) self.exchange_cleanup.append('test_exchange') topic2_id = self.pubsub_management.create_topic(name='another_topic', exchange_point='outside') self.addCleanup(self.pubsub_management.delete_topic, topic2_id) stream_id, route = self.pubsub_management.create_stream(name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertEquals(topics,[topic_id]) defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertTrue(len(defs)) stream = self.pubsub_management.read_stream(stream_id) self.assertEquals(stream.name,'test_stream') self.pubsub_management.delete_stream(stream_id) with self.assertRaises(NotFound): self.pubsub_management.read_stream(stream_id) defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True) self.assertFalse(len(defs)) topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True) self.assertFalse(len(topics)) def test_data_product_subscription(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='ctd parsed') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id]) self.addCleanup(self.pubsub_management.delete_subscription, subscription_id) validated = Event() def validation(msg, route, stream_id): validated.set() stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) dp_stream_id = stream_ids.pop() validator = StandaloneStreamSubscriber('validator', callback=validation) validator.start() self.addCleanup(validator.stop) self.pubsub_management.activate_subscription(subscription_id) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id) route = self.pubsub_management.read_stream_route(dp_stream_id) publisher = StandaloneStreamPublisher(dp_stream_id, route) publisher.publish('hi') self.assertTrue(validated.wait(10)) def test_subscription_crud(self): stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream') stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id) subscription_id = self.pubsub_management.create_subscription(name='test subscription', stream_ids=[stream_id], exchange_name='test_queue') self.exchange_cleanup.append('test_exchange') subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True) self.assertEquals(subs,[stream_id]) res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True) self.assertEquals(len(res),1) subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(subs[0], res[0]) subscription = self.pubsub_management.read_subscription(subscription_id) self.assertEquals(subscription.exchange_name, 'test_queue') self.pubsub_management.delete_subscription(subscription_id) subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True) self.assertFalse(len(subs)) subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertFalse(len(subs)) self.pubsub_management.delete_stream(stream_id) self.pubsub_management.delete_stream_definition(stream_def_id) def test_move_before_activate(self): stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving before activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects),1) self.assertEquals(subjects[0], xn_ids[0]) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving after activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m,r,s): self.assertEquals(m,'verified') self.verified.set() subscriber = StandaloneStreamSubscriber('second_queue', verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects),1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('verified') self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_queue_cleanup(self): stream_id, route = self.pubsub_management.create_stream('test_stream','xp1') xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') for xn_obj in xn_objs: xn = self.container.ex_manager.create_xn_queue(xn_obj.name) xn.delete() subscription_id = self.pubsub_management.create_subscription('queue1',stream_ids=[stream_id]) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids),1) self.pubsub_management.delete_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1') self.assertEquals(len(xn_ids),0) def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream('stream1','xp1') subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m,r,s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1',verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id) def test_topic_crud(self): topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_xp') self.exchange_cleanup.append('test_xp') topic = self.pubsub_management.read_topic(topic_id) self.assertEquals(topic.name,'test_topic') self.assertEquals(topic.exchange_point, 'test_xp') self.pubsub_management.delete_topic(topic_id) with self.assertRaises(NotFound): self.pubsub_management.read_topic(topic_id) def test_full_pubsub(self): self.sub1_sat = Event() self.sub2_sat = Event() def subscriber1(m,r,s): self.sub1_sat.set() def subscriber2(m,r,s): self.sub2_sat.set() sub1 = StandaloneStreamSubscriber('sub1', subscriber1) sub1.start() self.addCleanup(sub1.stop) sub2 = StandaloneStreamSubscriber('sub2', subscriber2) sub2.start() self.addCleanup(sub2.stop) log_topic = self.pubsub_management.create_topic('instrument_logs', exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_topic, log_topic) science_topic = self.pubsub_management.create_topic('science_data', exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_topic, science_topic) events_topic = self.pubsub_management.create_topic('notifications', exchange_point='events') self.addCleanup(self.pubsub_management.delete_topic, events_topic) log_stream, route = self.pubsub_management.create_stream('instrument1-logs', topic_ids=[log_topic], exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_stream, log_stream) ctd_stream, route = self.pubsub_management.create_stream('instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments') self.addCleanup(self.pubsub_management.delete_stream, ctd_stream) event_stream, route = self.pubsub_management.create_stream('notifications', topic_ids=[events_topic], exchange_point='events') self.addCleanup(self.pubsub_management.delete_stream, event_stream) raw_stream, route = self.pubsub_management.create_stream('temp', exchange_point='global.data') self.addCleanup(self.pubsub_management.delete_stream, raw_stream) subscription1 = self.pubsub_management.create_subscription('subscription1', stream_ids=[log_stream,event_stream], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription1) subscription2 = self.pubsub_management.create_subscription('subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2') self.addCleanup(self.pubsub_management.delete_subscription, subscription2) self.pubsub_management.activate_subscription(subscription1) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription1) self.pubsub_management.activate_subscription(subscription2) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription2) self.publish_on_stream(log_stream, 1) self.assertTrue(self.sub1_sat.wait(4)) self.assertFalse(self.sub2_sat.is_set()) self.publish_on_stream(raw_stream,1) self.assertTrue(self.sub1_sat.wait(4)) def test_topic_craziness(self): self.msg_queue = Queue() def subscriber1(m,r,s): self.msg_queue.put(m) sub1 = StandaloneStreamSubscriber('sub1', subscriber1) sub1.start() self.addCleanup(sub1.stop) topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1') self.addCleanup(self.pubsub_management.delete_topic, topic1) topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1) self.addCleanup(self.pubsub_management.delete_topic, topic2) topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1) self.addCleanup(self.pubsub_management.delete_topic, topic3) topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2) self.addCleanup(self.pubsub_management.delete_topic, topic4) topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2) self.addCleanup(self.pubsub_management.delete_topic, topic5) topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3) self.addCleanup(self.pubsub_management.delete_topic, topic6) topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3) self.addCleanup(self.pubsub_management.delete_topic, topic7) # Tree 2 topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_topic, topic8) topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8) self.addCleanup(self.pubsub_management.delete_topic, topic9) topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9) self.addCleanup(self.pubsub_management.delete_topic, topic10) topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9) self.addCleanup(self.pubsub_management.delete_topic, topic11) topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11) self.addCleanup(self.pubsub_management.delete_topic, topic12) topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11) self.addCleanup(self.pubsub_management.delete_topic, topic13) self.exchange_cleanup.extend(['xp1','xp2']) stream1_id, route = self.pubsub_management.create_stream('stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1') self.addCleanup(self.pubsub_management.delete_stream, stream1_id) stream2_id, route = self.pubsub_management.create_stream('stream2', topic_ids=[topic8], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream2_id) stream3_id, route = self.pubsub_management.create_stream('stream3', topic_ids=[topic10,topic13], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream3_id) stream4_id, route = self.pubsub_management.create_stream('stream4', topic_ids=[topic9], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream4_id) stream5_id, route = self.pubsub_management.create_stream('stream5', topic_ids=[topic11], exchange_point='xp2') self.addCleanup(self.pubsub_management.delete_stream, stream5_id) subscription1 = self.pubsub_management.create_subscription('sub1', topic_ids=[topic1]) self.addCleanup(self.pubsub_management.delete_subscription, subscription1) subscription2 = self.pubsub_management.create_subscription('sub2', topic_ids=[topic8], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription2) subscription3 = self.pubsub_management.create_subscription('sub3', topic_ids=[topic9], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription3) subscription4 = self.pubsub_management.create_subscription('sub4', topic_ids=[topic10,topic13, topic11], exchange_name='sub1') self.addCleanup(self.pubsub_management.delete_subscription, subscription4) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription1) self.publish_on_stream(stream1_id,1) self.assertEquals(self.msg_queue.get(timeout=10), 1) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription1) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription2) self.publish_on_stream(stream2_id,2) self.assertEquals(self.msg_queue.get(timeout=10), 2) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.1) self.pubsub_management.deactivate_subscription(subscription2) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription3) self.publish_on_stream(stream2_id, 3) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream3_id, 4) self.assertEquals(self.msg_queue.get(timeout=10),4) self.pubsub_management.deactivate_subscription(subscription3) #-------------------------------------------------------------------------------- self.pubsub_management.activate_subscription(subscription4) self.publish_on_stream(stream4_id, 5) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.publish_on_stream(stream5_id, 6) self.assertEquals(self.msg_queue.get(timeout=10),6) with self.assertRaises(Empty): self.msg_queue.get(timeout=0.3) self.pubsub_management.deactivate_subscription(subscription4) #-------------------------------------------------------------------------------- def cleanup_contexts(self): for context_id in self.context_ids: self.dataset_management.delete_parameter_context(context_id) def add_context_to_cleanup(self, context_id): self.context_ids.add(context_id) def _get_pdict(self, filter_values): t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64'))) t_ctxt.uom = 'seconds since 01-01-1900' t_ctxt_id = self.dataset_management.create_parameter_context(name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', units=t_ctxt.uom) self.add_context_to_cleanup(t_ctxt_id) lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lat_ctxt.axis = AxisTypeEnum.LAT lat_ctxt.uom = 'degree_north' lat_ctxt_id = self.dataset_management.create_parameter_context(name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', units=lat_ctxt.uom) self.add_context_to_cleanup(lat_ctxt_id) lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999) lon_ctxt.axis = AxisTypeEnum.LON lon_ctxt.uom = 'degree_east' lon_ctxt_id = self.dataset_management.create_parameter_context(name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', units=lon_ctxt.uom) self.add_context_to_cleanup(lon_ctxt_id) # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) temp_ctxt.uom = 'deg_C' temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', units=temp_ctxt.uom) self.add_context_to_cleanup(temp_ctxt_id) # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) cond_ctxt.uom = 'S m-1' cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', units=cond_ctxt.uom) self.add_context_to_cleanup(cond_ctxt_id) # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999) press_ctxt.uom = 'dbar' press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', units=press_ctxt.uom) self.add_context_to_cleanup(press_ctxt_id) # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' tl1_pmap = {'T': 'TEMPWAT_L0'} expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap) tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) tempL1_ctxt.uom = 'deg_C' tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', units=tempL1_ctxt.uom) self.add_context_to_cleanup(tempL1_ctxt_id) # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' cl1_pmap = {'C': 'CONDWAT_L0'} expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap) condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) condL1_ctxt.uom = 'S m-1' condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', units=condL1_ctxt.uom) self.add_context_to_cleanup(condL1_ctxt_id) # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap) presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL) presL1_ctxt.uom = 'S m-1' presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', units=presL1_ctxt.uom) self.add_context_to_cleanup(presL1_ctxt_id) # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_kwargmap = None expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap) sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL) sal_ctxt.uom = 'g kg-1' sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', units=sal_ctxt.uom) self.add_context_to_cleanup(sal_ctxt_id) # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', units=dens_ctxt.uom) self.add_context_to_cleanup(dens_ctxt_id) ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id] contexts = [t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt] context_ids = [ids[i] for i,ctxt in enumerate(contexts) if ctxt.name in filter_values] pdict_name = '_'.join([ctxt.name for ctxt in contexts if ctxt.name in filter_values]) try: self.pdicts[pdict_name] return self.pdicts[pdict_name] except KeyError: pdict_id = self.dataset_management.create_parameter_dictionary(pdict_name, parameter_context_ids=context_ids, temporal_context='time') self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) self.pdicts[pdict_name] = pdict_id return pdict_id
class TestVisualizationServiceIntegration(VisualizationIntegrationTestHelper): def setUp(self): # Start container logging.disable(logging.ERROR) self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') logging.disable(logging.NOTSET) # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.datasetclient = DatasetManagementServiceClient(node=self.container.node) self.workflowclient = WorkflowManagementServiceClient(node=self.container.node) self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node) self.vis_client = VisualizationServiceClient(node=self.container.node) self.ctd_stream_def = SBE37_CDM_stream_definition() def validate_messages(self, msgs): cc = self.container assertions = self.assertTrue rdt = RecordDictionaryTool.load_from_granule(msgs.body) vardict = {} vardict['temp'] = get_safe(rdt, 'temp') vardict['time'] = get_safe(rdt, 'time') print vardict['time'] print vardict['temp'] @attr('LOCOINT') #@patch.dict('pyon.ion.exchange.CFG', {'container':{'exchange':{'auto_register': False}}}) @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') @unittest.skip("in progress") def test_visualization_queue(self): assertions = self.assertTrue #The list of data product streams to monitor data_product_stream_ids = list() #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() data_product_stream_ids.append(ctd_stream_id) user_queue_name = 'user_queue' xq = self.container.ex_manager.create_xn_queue(user_queue_name) salinity_subscription_id = self.pubsubclient.create_subscription( query=StreamQuery(data_product_stream_ids), exchange_name = user_queue_name, exchange_point = 'science_data', name = "user visualization queue" ) subscriber_registrar = StreamSubscriberRegistrar(container=self.container) #subscriber = subscriber_registrar.create_subscriber(exchange_name=user_queue_name) #subscriber.start() #Using endpoint Subscriber directly; but should be a Stream-based subscriber that does nto require a process #subscriber = Subscriber(from_name=(subscriber_registrar.XP, user_queue_name), callback=cb) subscriber = Subscriber(from_name=xq) subscriber.initialize() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id) #Start the output stream listener to monitor and collect messages #results = self.start_output_stream_and_listen(None, data_product_stream_ids) #Not sure why this is needed - but it is #subscriber._chan.stop_consume() ctd_sim_pid = self.start_simple_input_stream_process(ctd_stream_id) gevent.sleep(10.0) # Send some messages - don't care how many msg_count,_ = xq.get_stats() log.info('Messages in user queue 1: %s ' % msg_count) #Validate the data from each of the messages along the way #self.validate_messages(results) # for x in range(msg_count): # mo = subscriber.get_one_msg(timeout=1) # print mo.body # mo.ack() msgs = subscriber.get_all_msgs(timeout=2) for x in range(len(msgs)): msgs[x].ack() self.validate_messages(msgs[x]) # print msgs[x].body #Should be zero after pulling all of the messages. msg_count,_ = xq.get_stats() log.info('Messages in user queue 2: %s ' % msg_count) #Trying to continue to receive messages in the queue gevent.sleep(5.0) # Send some messages - don't care how many #Turning off after everything - since it is more representative of an always on stream of data! self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data #Should see more messages in the queue msg_count,_ = xq.get_stats() log.info('Messages in user queue 3: %s ' % msg_count) msgs = subscriber.get_all_msgs(timeout=2) for x in range(len(msgs)): msgs[x].ack() self.validate_messages(msgs[x]) #Should be zero after pulling all of the messages. msg_count,_ = xq.get_stats() log.info('Messages in user queue 4: %s ' % msg_count) subscriber.close() #@unittest.skip("in progress") def test_realtime_visualization(self): assertions = self.assertTrue # Build the workflow definition workflow_def_obj = IonObject(RT.WorkflowDefinition, name='GoogleDT_Test_Workflow',description='Tests the workflow of converting stream data to Google DT') #Add a transformation process definition google_dt_procdef_id = self.create_google_dt_data_process_definition() workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id, persist_process_output_data=True) workflow_def_obj.workflow_steps.append(workflow_step_obj) #Create it in the resource registry workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj) #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, timeout=20) ctd_sim_pid = self.start_sinusoidal_input_stream_process(ctd_stream_id) #TODO - Need to add workflow creation for google data table vis_params ={} vis_params['in_product_type'] = 'google_dt' vis_token = self.vis_client.initiate_realtime_visualization(data_product_id=workflow_product_id, visualization_parameters=vis_params) #Trying to continue to receive messages in the queue gevent.sleep(10.0) # Send some messages - don't care how many #TODO - find out what the actual return data type should be vis_data = self.vis_client.get_realtime_visualization_data(vis_token) print vis_data #Trying to continue to receive messages in the queue gevent.sleep(5.0) # Send some messages - don't care how many #Turning off after everything - since it is more representative of an always on stream of data! #todo remove the try except try: self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data except: log.warning("cancelling process did not work") vis_data = self.vis_client.get_realtime_visualization_data(vis_token) print vis_data self.vis_client.terminate_realtime_visualization_data(vis_token) #Stop the workflow processes self.workflowclient.terminate_data_process_workflow(workflow_id, False) # Should test true at some point #Cleanup to make sure delete is correct. self.workflowclient.delete_workflow_definition(workflow_def_id) def test_google_dt_overview_visualization(self): #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() ctd_sim_pid = self.start_sinusoidal_input_stream_process(ctd_stream_id) # Generate some data for a few seconds gevent.sleep(5.0) #Turning off after everything - since it is more representative of an always on stream of data! self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data # Use the data product to test the data retrieval and google dt generation capability of the vis service vis_data = self.vis_client.get_visualization_data(ctd_parsed_data_product_id) # validate the returned data self.validate_vis_service_google_dt_results(vis_data) def test_mpl_graphs_overview_visualization(self): #Create the input data product ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product() ctd_sim_pid = self.start_sinusoidal_input_stream_process(ctd_stream_id) # Generate some data for a few seconds gevent.sleep(5.0) #Turning off after everything - since it is more representative of an always on stream of data! self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data # Use the data product to test the data retrieval and google dt generation capability of the vis service vis_data = self.vis_client.get_visualization_image(ctd_parsed_data_product_id) # validate the returned data self.validate_vis_service_mpl_graphs_results(vis_data) return
def test_usgs_integration(self): ''' test_usgs_integration Test full DM Services Integration using usgs ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here #----------------------------- pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) process_list = [] datasets = [] datastore_name = 'test_usgs_integration' #--------------------------- # Set up ingestion #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=8 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) usgs_stream_def = USGS_stream_definition() stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition') #--------------------------- # Set up the producers (CTD Simulators) #--------------------------- # Launch five simulated CTD producers for iteration in xrange(2): # Make a stream to output on stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id) #--------------------------- # Set up the datasets #--------------------------- dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Keep track of the datasets datasets.append(dataset_id) stream_policy_id = ingestion_management_service.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.agents.eoi.handler.usgs_stream_publisher', 'class':'UsgsPublisher' } configuration = { 'process':{ 'stream_id':stream_id, } } procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id) pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration) # Keep track, we'll kill 'em later. process_list.append(pid) # Get about 4 seconds of data time.sleep(4) #--------------------------- # Stop producing data #--------------------------- for process in process_list: process_dispatcher.cancel_process(process) #---------------------------------------------- # The replay and the transform, a love story. #---------------------------------------------- # Happy Valentines to the clever coder who catches the above! transform_definition = ProcessDefinition() transform_definition.executable = { 'module':'ion.processes.data.transforms.transform_example', 'class':'TransformCapture' } transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition) dataset_id = datasets.pop() # Just need one for now replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id) #-------------------------------------------- # I'm Selling magazine subscriptions here! #-------------------------------------------- subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]), exchange_name='transform_capture_point') #-------------------------------------------- # Start the transform (capture) #-------------------------------------------- transform_id = transform_management_service.create_transform( name='capture_transform', in_subscription_id=subscription, process_definition_id=transform_definition_id ) transform_management_service.activate_transform(transform_id=transform_id) #-------------------------------------------- # BEGIN REPLAY! #-------------------------------------------- data_retriever_service.start_replay(replay_id=replay_id) #-------------------------------------------- # Lets get some boundaries #-------------------------------------------- bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
class TestOmsLaunch(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.omsclient = ObservatoryManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.dpclient = DataProductManagementServiceClient(node=self.container.node) self.pubsubcli = PubsubManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.dataset_management = DatasetManagementServiceClient() self.platformModel_id = None # rsn_oms: to retrieve network structure and information from RSN-OMS: # Note that OmsClientFactory will create an "embedded" RSN OMS # simulator object by default. self.rsn_oms = OmsClientFactory.create_instance() self.all_platforms = {} self.topology = {} self.agent_device_map = {} self.agent_streamconfig_map = {} self._async_data_result = AsyncResult() self._data_subscribers = [] self._samples_received = [] self.addCleanup(self._stop_data_subscribers) self._async_event_result = AsyncResult() self._event_subscribers = [] self._events_received = [] self.addCleanup(self._stop_event_subscribers) self._start_event_subscriber() self._set_up_DataProduct_obj() self._set_up_PlatformModel_obj() def _set_up_DataProduct_obj(self): # Create data product object to be used for each of the platform log streams tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() pdict_id = self.dataset_management.read_parameter_dictionary_by_name('platform_eng_parsed', id_only=True) self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition( name='platform_eng', parameter_dictionary_id=pdict_id) self.dp_obj = IonObject(RT.DataProduct, name='platform_eng data', description='platform_eng test', temporal_domain = tdom, spatial_domain = sdom) def _set_up_PlatformModel_obj(self): # Create PlatformModel platformModel_obj = IonObject(RT.PlatformModel, name='RSNPlatformModel', description="RSNPlatformModel") try: self.platformModel_id = self.imsclient.create_platform_model(platformModel_obj) except BadRequest as ex: self.fail("failed to create new PLatformModel: %s" %ex) log.debug( 'new PlatformModel id = %s', self.platformModel_id) def _traverse(self, platform_id, parent_platform_objs=None): """ Recursive routine that repeatedly calls _prepare_platform to build the object dictionary for each platform. @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval the dict returned by _prepare_platform at this level. """ log.info("Starting _traverse for %r", platform_id) plat_objs = self._prepare_platform(platform_id, parent_platform_objs) self.all_platforms[platform_id] = plat_objs # now, traverse the children: retval = self.rsn_oms.getSubplatformIDs(platform_id) subplatform_ids = retval[platform_id] for subplatform_id in subplatform_ids: self._traverse(subplatform_id, plat_objs) # note, topology indexed by platform_id self.topology[platform_id] = plat_objs['children'] return plat_objs def _prepare_platform(self, platform_id, parent_platform_objs): """ This routine generalizes the manual construction currently done in test_oms_launch.py. It is called by the recursive _traverse method so all platforms starting from a given base platform are prepared. Note: For simplicity in this test, sites are organized in the same hierarchical way as the platforms themselves. @param platform_id ID of the platform to be visited @param parent_platform_objs dict of objects associated to parent platform, if any. @retval a dict of associated objects similar to those in test_oms_launch """ site__obj = IonObject(RT.PlatformSite, name='%s_PlatformSite' % platform_id, description='%s_PlatformSite platform site' % platform_id) site_id = self.omsclient.create_platform_site(site__obj) if parent_platform_objs: # establish hasSite association with the parent self.rrclient.create_association( subject=parent_platform_objs['site_id'], predicate=PRED.hasSite, object=site_id) # prepare platform attributes and ports: monitor_attributes = self._prepare_platform_attributes(platform_id) ports = self._prepare_platform_ports(platform_id) device__obj = IonObject(RT.PlatformDevice, name='%s_PlatformDevice' % platform_id, description='%s_PlatformDevice platform device' % platform_id, ports=ports, platform_monitor_attributes = monitor_attributes) device_id = self.imsclient.create_platform_device(device__obj) self.imsclient.assign_platform_model_to_platform_device(self.platformModel_id, device_id) self.rrclient.create_association(subject=site_id, predicate=PRED.hasDevice, object=device_id) self.damsclient.register_instrument(instrument_id=device_id) if parent_platform_objs: # establish hasDevice association with the parent self.rrclient.create_association( subject=parent_platform_objs['device_id'], predicate=PRED.hasDevice, object=device_id) agent__obj = IonObject(RT.PlatformAgent, name='%s_PlatformAgent' % platform_id, description='%s_PlatformAgent platform agent' % platform_id) agent_id = self.imsclient.create_platform_agent(agent__obj) if parent_platform_objs: # add this platform_id to parent's children: parent_platform_objs['children'].append(platform_id) self.imsclient.assign_platform_model_to_platform_agent(self.platformModel_id, agent_id) # agent_instance_obj = IonObject(RT.PlatformAgentInstance, # name='%s_PlatformAgentInstance' % platform_id, # description="%s_PlatformAgentInstance" % platform_id) # # agent_instance_id = self.imsclient.create_platform_agent_instance( # agent_instance_obj, agent_id, device_id) plat_objs = { 'platform_id': platform_id, 'site__obj': site__obj, 'site_id': site_id, 'device__obj': device__obj, 'device_id': device_id, 'agent__obj': agent__obj, 'agent_id': agent_id, # 'agent_instance_obj': agent_instance_obj, # 'agent_instance_id': agent_instance_id, 'children': [] } log.info("plat_objs for platform_id %r = %s", platform_id, str(plat_objs)) self.agent_device_map[platform_id] = device__obj stream_config = self._create_stream_config(plat_objs) self.agent_streamconfig_map[platform_id] = stream_config # self._start_data_subscriber(agent_instance_id, stream_config) return plat_objs def _prepare_platform_attributes(self, platform_id): """ Returns the list of PlatformMonitorAttributes objects corresponding to the attributes associated to the given platform. """ result = self.rsn_oms.getPlatformAttributes(platform_id) self.assertTrue(platform_id in result) ret_infos = result[platform_id] monitor_attributes = [] for attrName, attrDfn in ret_infos.iteritems(): log.debug("platform_id=%r: preparing attribute=%r", platform_id, attrName) monitor_rate = attrDfn['monitorCycleSeconds'] units = attrDfn['units'] plat_attr_obj = IonObject(OT.PlatformMonitorAttributes, id=attrName, monitor_rate=monitor_rate, units=units) monitor_attributes.append(plat_attr_obj) return monitor_attributes def _prepare_platform_ports(self, platform_id): """ Returns the list of PlatformPort objects corresponding to the ports associated to the given platform. """ result = self.rsn_oms.getPlatformPorts(platform_id) self.assertTrue(platform_id in result) port_dict = result[platform_id] ports = [] for port_id, port in port_dict.iteritems(): log.debug("platform_id=%r: preparing port=%r", platform_id, port_id) ip_address = port['comms']['ip'] plat_port_obj = IonObject(OT.PlatformPort, port_id=port_id, ip_address=ip_address) ports.append(plat_port_obj) return ports def _create_stream_config(self, plat_objs): platform_id = plat_objs['platform_id'] device_id = plat_objs['device_id'] #create the log data product self.dp_obj.name = '%s platform_eng data' % platform_id data_product_id = self.dpclient.create_data_product(data_product=self.dp_obj, stream_definition_id=self.platform_eng_stream_def_id) self.damsclient.assign_data_product(input_resource_id=device_id, data_product_id=data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id, PRED.hasStream, None, True) stream_config = self._build_stream_config(stream_ids[0]) return stream_config def _build_stream_config(self, stream_id=''): platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name('platform_eng_parsed') #get the streamroute object from pubsub by passing the stream_id stream_def_ids, _ = self.rrclient.find_objects(stream_id, PRED.hasStreamDefinition, RT.StreamDefinition, True) stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id) stream_config = {'routing_key' : stream_route.routing_key, 'stream_id' : stream_id, 'stream_definition_ref' : stream_def_ids[0], 'exchange_point' : stream_route.exchange_point, 'parameter_dictionary':platform_eng_dictionary.dump()} return stream_config def _set_platform_agent_instances(self): """ Once most of the objs/defs associated with all platforms are in place, this method creates and associates the PlatformAgentInstance elements. """ self.platform_configs = {} for platform_id, plat_objs in self.all_platforms.iteritems(): PLATFORM_CONFIG = self.platform_configs[platform_id] = { 'platform_id': platform_id, 'platform_topology': self.topology, # 'agent_device_map': self.agent_device_map, 'agent_streamconfig_map': self.agent_streamconfig_map, 'driver_config': DVR_CONFIG, } agent_config = { 'platform_config': PLATFORM_CONFIG, } agent_instance_obj = IonObject(RT.PlatformAgentInstance, name='%s_PlatformAgentInstance' % platform_id, description="%s_PlatformAgentInstance" % platform_id, agent_config=agent_config) agent_id = plat_objs['agent_id'] device_id = plat_objs['device_id'] agent_instance_id = self.imsclient.create_platform_agent_instance( agent_instance_obj, agent_id, device_id) plat_objs['agent_instance_obj'] = agent_instance_obj plat_objs['agent_instance_id'] = agent_instance_id stream_config = self.agent_streamconfig_map[platform_id] self._start_data_subscriber(agent_instance_id, stream_config) def _start_data_subscriber(self, stream_name, stream_config): """ Starts data subscriber for the given stream_name and stream_config """ def consume_data(message, stream_route, stream_id): # A callback for processing subscribed-to data. log.info('Subscriber received data message: %s.', str(message)) self._samples_received.append(message) self._async_data_result.set() log.info('_start_data_subscriber stream_name=%r', stream_name) stream_id = stream_config['stream_id'] # Create subscription for the stream exchange_name = '%s_queue' % stream_name self.container.ex_manager.create_xn_queue(exchange_name).purge() sub = StandaloneStreamSubscriber(exchange_name, consume_data) sub.start() self._data_subscribers.append(sub) sub_id = self.pubsubcli.create_subscription(name=exchange_name, stream_ids=[stream_id]) self.pubsubcli.activate_subscription(sub_id) sub.subscription_id = sub_id def _stop_data_subscribers(self): """ Stop the data subscribers on cleanup. """ try: for sub in self._data_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription(sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._data_subscribers = [] def _start_event_subscriber(self, event_type="PlatformAlarmEvent", sub_type="power"): """ Starts event subscriber for events of given event_type ("PlatformAlarmEvent" by default) and given sub_type ("power" by default). """ # TODO note: ion-definitions still using 'PlatformAlarmEvent' but we # should probably define 'PlatformExternalEvent' or something like that. def consume_event(evt, *args, **kwargs): # A callback for consuming events. log.info('Event subscriber received evt: %s.', str(evt)) self._events_received.append(evt) self._async_event_result.set(evt) sub = EventSubscriber(event_type=event_type, sub_type=sub_type, callback=consume_event) sub.start() log.info("registered event subscriber for event_type=%r, sub_type=%r", event_type, sub_type) self._event_subscribers.append(sub) sub._ready_event.wait(timeout=EVENT_TIMEOUT) def _stop_event_subscribers(self): """ Stops the event subscribers on cleanup. """ try: for sub in self._event_subscribers: if hasattr(sub, 'subscription_id'): try: self.pubsubcli.deactivate_subscription(sub.subscription_id) except: pass self.pubsubcli.delete_subscription(sub.subscription_id) sub.stop() finally: self._event_subscribers = [] def test_oms_create_and_launch(self): # pick a base platform: base_platform_id = BASE_PLATFORM_ID # and trigger the traversal of the branch rooted at that base platform # to create corresponding ION objects and configuration dictionaries: base_platform_objs = self._traverse(base_platform_id) # now that most of the topology information is there, add the # PlatformAgentInstance elements self._set_platform_agent_instances() base_platform_config = self.platform_configs[base_platform_id] log.info("base_platform_id = %r", base_platform_id) log.info("topology = %s", str(self.topology)) #------------------------------- # Launch Base Platform AgentInstance, connect to the resource agent client #------------------------------- agent_instance_id = base_platform_objs['agent_instance_id'] pid = self.imsclient.start_platform_agent_instance(platform_agent_instance_id=agent_instance_id) log.debug("start_platform_agent_instance returned pid=%s", pid) #wait for start instance_obj = self.imsclient.read_platform_agent_instance(agent_instance_id) gate = ProcessStateGate(self.processdispatchclient.read_process, instance_obj.agent_process_id, ProcessStateEnum.RUNNING) self.assertTrue(gate.await(90), "The platform agent instance did not spawn in 90 seconds") agent_instance_obj= self.imsclient.read_instrument_agent_instance(agent_instance_id) log.debug('test_oms_create_and_launch: Platform agent instance obj: %s', str(agent_instance_obj)) # Start a resource agent client to talk with the instrument agent. self._pa_client = ResourceAgentClient('paclient', name=agent_instance_obj.agent_process_id, process=FakeProcess()) log.debug(" test_oms_create_and_launch:: got pa client %s", str(self._pa_client)) log.debug("base_platform_config =\n%s", base_platform_config) # ping_agent can be issued before INITIALIZE retval = self._pa_client.ping_agent(timeout=TIMEOUT) log.debug( 'Base Platform ping_agent = %s', str(retval) ) # issue INITIALIZE command to the base platform, which will launch the # creation of the whole platform hierarchy rooted at base_platform_config['platform_id'] # cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config)) cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform INITIALIZE = %s', str(retval) ) # GO_ACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform GO_ACTIVE = %s', str(retval) ) # RUN: this command includes the launch of the resource monitoring greenlets cmd = AgentCommand(command=PlatformAgentEvent.RUN) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform RUN = %s', str(retval) ) # START_EVENT_DISPATCH kwargs = dict(params="TODO set params") cmd = AgentCommand(command=PlatformAgentEvent.START_EVENT_DISPATCH, kwargs=kwargs) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) self.assertTrue(retval.result is not None) # wait for data sample # just wait for at least one -- see consume_data above log.info("waiting for reception of a data sample...") self._async_data_result.get(timeout=DATA_TIMEOUT) self.assertTrue(len(self._samples_received) >= 1) log.info("waiting a bit more for reception of more data samples...") sleep(10) log.info("Got data samples: %d", len(self._samples_received)) # wait for event # just wait for at least one event -- see consume_event above log.info("waiting for reception of an event...") self._async_event_result.get(timeout=EVENT_TIMEOUT) log.info("Received events: %s", len(self._events_received)) # STOP_EVENT_DISPATCH cmd = AgentCommand(command=PlatformAgentEvent.STOP_EVENT_DISPATCH) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) self.assertTrue(retval.result is not None) # GO_INACTIVE cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform GO_INACTIVE = %s', str(retval) ) # RESET: Resets the base platform agent, which includes termination of # its sub-platforms processes: cmd = AgentCommand(command=PlatformAgentEvent.RESET) retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT) log.debug( 'Base Platform RESET = %s', str(retval) ) #------------------------------- # Stop Base Platform AgentInstance #------------------------------- self.imsclient.stop_platform_agent_instance(platform_agent_instance_id=agent_instance_id)
class EventTriggeredTransformIntTest(IonIntegrationTestCase): def setUp(self): super(EventTriggeredTransformIntTest, self).setUp() self._start_container() self.container.start_rel_from_url("res/deploy/r2deploy.yml") self.queue_cleanup = [] self.exchange_cleanup = [] self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.exchange_name = "test_queue" self.exchange_point = "test_exchange" self.dataset_management = DatasetManagementServiceClient() def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() @attr("LOCOINT") @unittest.skipIf(os.getenv("CEI_LAUNCH_TEST", False), "Skip test while in CEI LAUNCH mode") def test_event_triggered_transform_A(self): """ Test that packets are processed by the event triggered transform """ # --------------------------------------------------------------------------------------------- # Launch a ctd transform # --------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name="EventTriggeredTransform_A", description="For testing EventTriggeredTransform_A" ) process_definition.executable["module"] = "ion.processes.data.transforms.event_triggered_transform" process_definition.executable["class"] = "EventTriggeredTransform_A" event_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition ) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) stream_def_id = self.pubsub.create_stream_definition("cond_stream_def", parameter_dictionary_id=pdict_id) cond_stream_id, _ = self.pubsub.create_stream( "test_conductivity", exchange_point="science_data", stream_definition_id=stream_def_id ) config.process.publish_streams.conductivity = cond_stream_id config.process.event_type = "ResourceLifecycleEvent" # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=event_transform_proc_def_id, configuration=config ) # --------------------------------------------------------------------------------------------- # Publish an event to wake up the event triggered transform # --------------------------------------------------------------------------------------------- event_publisher = EventPublisher("ResourceLifecycleEvent") event_publisher.publish_event(origin="fake_origin") # --------------------------------------------------------------------------------------------- # Create subscribers that will receive the conductivity, temperature and pressure granules from # the ctd transform # --------------------------------------------------------------------------------------------- ar_cond = gevent.event.AsyncResult() def subscriber1(m, r, s): ar_cond.set(m) sub_event_transform = StandaloneStreamSubscriber("sub_event_transform", subscriber1) self.addCleanup(sub_event_transform.stop) sub_event_transform_id = self.pubsub.create_subscription( "subscription_cond", stream_ids=[cond_stream_id], exchange_name="sub_event_transform" ) self.pubsub.activate_subscription(sub_event_transform_id) self.queue_cleanup.append(sub_event_transform.xn.queue) sub_event_transform.start() # ------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform # ------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = "stream_id.stream" stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind("stream_id.stream", xp) pub = StandaloneStreamPublisher("stream_id", stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) # ------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules # ------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result_cond = ar_cond.get(timeout=10) self.assertTrue(isinstance(result_cond, Granule)) rdt = RecordDictionaryTool.load_from_granule(result_cond) self.assertTrue(rdt.__contains__("conductivity")) self.check_cond_algorithm_execution(publish_granule, result_cond) def check_cond_algorithm_execution(self, publish_granule, granule_from_transform): input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform) output_data = output_rdt_transform["conductivity"] input_data = input_rdt_to_transform["conductivity"] self.assertTrue(((input_data / 100000.0) - 0.5).all() == output_data.all()) def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) for field in rdt: rdt[field] = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() return g def test_event_triggered_transform_B(self): """ Test that packets are processed by the event triggered transform """ # --------------------------------------------------------------------------------------------- # Launch a ctd transform # --------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name="EventTriggeredTransform_B", description="For testing EventTriggeredTransform_B" ) process_definition.executable["module"] = "ion.processes.data.transforms.event_triggered_transform" process_definition.executable["class"] = "EventTriggeredTransform_B" event_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition ) pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True) stream_def_id = self.pubsub.create_stream_definition("stream_def", parameter_dictionary_id=pdict_id) stream_id, _ = self.pubsub.create_stream( "test_stream", exchange_point="science_data", stream_definition_id=stream_def_id ) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point config.process.publish_streams.output = stream_id config.process.event_type = "ResourceLifecycleEvent" config.process.stream_id = stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=event_transform_proc_def_id, configuration=config ) # --------------------------------------------------------------------------------------------- # Publish an event to wake up the event triggered transform # --------------------------------------------------------------------------------------------- event_publisher = EventPublisher("ResourceLifecycleEvent") event_publisher.publish_event(origin="fake_origin")
class TestPreloadThenLoadDataset(IonIntegrationTestCase): """ Uses the preload system to define the ExternalDataset and related resources, then invokes services to perform the load """ def setUp(self): # Start container self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') config = dict(op="load", scenario="NOSE", attachments="res/preload/r2_ioc/attachments") self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=config) self.pubsub = PubsubManagementServiceClient() self.dams = DataAcquisitionManagementServiceClient() @unittest.skip("depricated test, now in mi repo") def test_use_case(self): # setUp() has already started the container and performed the preload # self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources self.assert_dataset_loaded( 'Unit Test SMB37') # association changed -- now use device name self.do_listen_for_incoming( ) # listen for any data being received from the dataset self.do_read_dataset() # call services to load dataset self.assert_data_received() # check that data was received as expected self.do_shutdown() def assert_dataset_loaded(self, name): rr = self.container.resource_registry # self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset) devs, _ = rr.find_resources(RT.InstrumentDevice, name=name, id_only=False) self.assertEquals(len(devs), 1) self.device = devs[0] obj, _ = rr.find_objects(subject=self.device._id, predicate=PRED.hasAgentInstance, object_type=RT.ExternalDatasetAgentInstance) self.agent_instance = obj[0] obj, _ = rr.find_objects(object_type=RT.ExternalDatasetAgent, predicate=PRED.hasAgentDefinition, subject=self.agent_instance._id) self.agent = obj[0] driver_cfg = self.agent_instance.driver_config #stream_definition_id = driver_cfg['dh_cfg']['stream_def'] if 'dh_cfg' in driver_cfg else driver_cfg['stream_def'] #self.stream_definition = rr.read(stream_definition_id) self.data_product = rr.read_object(subject=self.device._id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct) self.dataset_id = rr.read_object(subject=self.data_product._id, predicate=PRED.hasDataset, object_type=RT.Dataset, id_only=True) ids, _ = rr.find_objects(subject=self.data_product._id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True) self.stream_id = ids[0] self.route = self.pubsub.read_stream_route(self.stream_id) def do_listen_for_incoming(self): subscription_id = self.pubsub.create_subscription( 'validator', data_product_ids=[self.data_product._id]) self.addCleanup(self.pubsub.delete_subscription, subscription_id) self.granule_capture = [] self.granule_count = 0 def on_granule(msg, route, stream_id): self.granule_count += 1 if self.granule_count < 5: self.granule_capture.append(msg) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub.activate_subscription(subscription_id) self.addCleanup(self.pubsub.deactivate_subscription, subscription_id) self.dataset_modified = Event() def cb2(*args, **kwargs): self.dataset_modified.set() # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset es = EventSubscriber(event_type=OT.DatasetModified, callback=cb2, origin=self.dataset_id) es.start() self.addCleanup(es.stop) def do_read_dataset(self): self.dams.start_external_dataset_agent_instance( self.agent_instance._id) # # should i wait for process (above) to start # before launching client (below)? # self.client = None end = time.time() + MAX_AGENT_START_TIME while not self.client and time.time() < end: try: self.client = ResourceAgentClient(self.device._id, process=FakeProcess()) except NotFound: time.sleep(2) if not self.client: self.fail( msg='external dataset agent process did not start in %d seconds' % MAX_AGENT_START_TIME) self.client.execute_agent( AgentCommand(command=ResourceAgentEvent.INITIALIZE)) self.client.execute_agent( AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)) self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN)) self.client.execute_resource(command=AgentCommand( command=DriverEvent.START_AUTOSAMPLE)) def assert_data_received(self): #let it go for up to 120 seconds, then stop the agent and reset it if not self.dataset_modified.is_set(): self.dataset_modified.wait(30) self.assertTrue(self.granule_count > 2, msg='granule count = %d' % self.granule_count) rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0]) self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01) self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01) self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01) self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01) self.assertAlmostEqual(3527207897.0, rdt['time'][0], delta=1) def do_shutdown(self): self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
def test_integrated_transform(self): ''' This example script runs a chained three way transform: B A < C Where A is the even_odd transform (generates a stream of even and odd numbers from input) and B and C are the basic transforms that receive even and odd input ''' cc = self.container assertions = self.assertTrue pubsub_cli = PubsubManagementServiceClient(node=cc.node) rr_cli = ResourceRegistryServiceClient(node=cc.node) tms_cli = TransformManagementServiceClient(node=cc.node) #------------------------------- # Process Definition #------------------------------- # Create the process definition for the basic transform process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformExample' } basic_transform_definition_id, _ = rr_cli.create(process_definition) # Create The process definition for the TransformEvenOdd process_definition = IonObject(RT.ProcessDefinition, name='evenodd_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformEvenOdd' } evenodd_transform_definition_id, _ = rr_cli.create(process_definition) #------------------------------- # Streams #------------------------------- streams = [pubsub_cli.create_stream() for i in xrange(5)] #------------------------------- # Subscriptions #------------------------------- query = StreamQuery(stream_ids=[streams[0]]) input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue') query = StreamQuery(stream_ids = [streams[1]]) # even output even_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='even_queue') query = StreamQuery(stream_ids = [streams[2]]) # odd output odd_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='odd_queue') #------------------------------- # Launch the EvenOdd Transform #------------------------------- evenodd_id = tms_cli.create_transform(name='even_odd', in_subscription_id=input_subscription_id, out_streams={'even':streams[1], 'odd':streams[2]}, process_definition_id=evenodd_transform_definition_id, configuration={}) tms_cli.activate_transform(evenodd_id) #------------------------------- # Launch the Even Processing Transform #------------------------------- even_transform_id = tms_cli.create_transform(name='even_transform', in_subscription_id = even_subscription_id, out_streams={'even_plus1':streams[3]}, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(even_transform_id) #------------------------------- # Launch the Odd Processing Transform #------------------------------- odd_transform_id = tms_cli.create_transform(name='odd_transform', in_subscription_id = odd_subscription_id, out_streams={'odd_plus1':streams[4]}, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(odd_transform_id) #------------------------------- # Set up final subscribers #------------------------------- evenplus1_subscription_id = pubsub_cli.create_subscription( query=StreamQuery([streams[3]]), exchange_name='evenplus1_queue', name='EvenPlus1Subscription', description='EvenPlus1 SubscriptionDescription' ) oddplus1_subscription_id = pubsub_cli.create_subscription( query=StreamQuery([streams[4]]), exchange_name='oddplus1_queue', name='OddPlus1Subscription', description='OddPlus1 SubscriptionDescription' ) total_msg_count = 2 msgs = gevent.queue.Queue() def even1_message_received(message, headers): input = int(message.get('num')) assertions( (input % 2) ) # Assert it is odd (transform adds 1) msgs.put(True) def odd1_message_received(message, headers): input = int(message.get('num')) assertions(not (input % 2)) # Assert it is even msgs.put(True) subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node) even_subscriber = subscriber_registrar.create_subscriber(exchange_name='evenplus1_queue', callback=even1_message_received) odd_subscriber = subscriber_registrar.create_subscriber(exchange_name='oddplus1_queue', callback=odd1_message_received) # Start subscribers even_subscriber.start() odd_subscriber.start() # Activate subscriptions pubsub_cli.activate_subscription(evenplus1_subscription_id) pubsub_cli.activate_subscription(oddplus1_subscription_id) #------------------------------- # Set up fake stream producer #------------------------------- pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] # Normally the user does not see or create the publisher, this is part of the containers business. # For the test we need to set it up explicitly publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node) stream_publisher = publisher_registrar.create_publisher(stream_id=streams[0]) #------------------------------- # Start test #------------------------------- # Publish a stream for i in xrange(total_msg_count): stream_publisher.publish({'num':str(i)}) time.sleep(0.5) for i in xrange(total_msg_count * 2): try: msgs.get() except Empty: assertions(False, "Failed to process all messages correctly.")
class TransformPrototypeIntTest(IonIntegrationTestCase): def setUp(self): super(TransformPrototypeIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.rrc = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.ssclient = SchedulerServiceClient() self.event_publisher = EventPublisher() self.exchange_names = [] self.exchange_points = [] def tearDown(self): for xn in self.exchange_names: xni = self.container.ex_manager.create_xn_queue(xn) xni.delete() for xp in self.exchange_points: xpi = self.container.ex_manager.create_xp(xp) xpi.delete() def now_utc(self): return time.mktime(datetime.datetime.utcnow().timetuple()) def _create_interval_timer_with_end_time(self,timer_interval= None, end_time = None ): ''' A convenience method to set up an interval timer with an end time ''' self.timer_received_time = 0 self.timer_interval = timer_interval start_time = self.now_utc() if not end_time: end_time = start_time + 2 * timer_interval + 1 log.debug("got the end time here!! %s" % end_time) # Set up the interval timer. The scheduler will publish event with origin set as "Interval Timer" sid = self.ssclient.create_interval_timer(start_time="now" , interval=self.timer_interval, end_time=end_time, event_origin="Interval Timer", event_subtype="") def cleanup_timer(scheduler, schedule_id): """ Do a friendly cancel of the scheduled event. If it fails, it's ok. """ try: scheduler.cancel_timer(schedule_id) except: log.warn("Couldn't cancel") self.addCleanup(cleanup_timer, self.ssclient, sid) return sid @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_processing(self): ''' Test that events are processed by the transforms according to a provided algorithm ''' #------------------------------------------------------------------------------------- # Set up the scheduler for an interval timer with an end time #------------------------------------------------------------------------------------- id = self._create_interval_timer_with_end_time(timer_interval=2) self.assertIsNotNone(id) #------------------------------------------------------------------------------------- # Create an event alert transform.... # The configuration for the Event Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- configuration = { 'process':{ 'event_type': 'ResourceEvent', 'timer_origin': 'Interval Timer', 'instrument_origin': 'My_favorite_instrument' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'event_alert_transform', module='ion.processes.data.transforms.event_alert_transform', class_name='EventAlertTransform', configuration= configuration) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish events and make assertions about alerts #------------------------------------------------------------------------------------- queue = gevent.queue.Queue() def event_received(message, headers): queue.put(message) event_subscriber = EventSubscriber( origin="EventAlertTransform", event_type="DeviceEvent", callback=event_received) event_subscriber.start() self.addCleanup(event_subscriber.stop) # publish event twice for i in xrange(5): self.event_publisher.publish_event( event_type = 'ExampleDetectableEvent', origin = "My_favorite_instrument", voltage = 5, telemetry = 10, temperature = 20) gevent.sleep(0.1) self.assertTrue(queue.empty()) #publish event the third time but after a time interval larger than 2 seconds gevent.sleep(5) #------------------------------------------------------------------------------------- # Make assertions about the alert event published by the EventAlertTransform #------------------------------------------------------------------------------------- event = queue.get(timeout=10) log.debug("Alarm event received from the EventAertTransform %s" % event) self.assertEquals(event.type_, "DeviceEvent") self.assertEquals(event.origin, "EventAlertTransform") #------------------------------------------------------------------------------------------------ # Now clear the event queue being populated by alarm events and publish normally once again #------------------------------------------------------------------------------------------------ queue.queue.clear() for i in xrange(5): self.event_publisher.publish_event( event_type = 'ExampleDetectableEvent', origin = "My_favorite_instrument", voltage = 5, telemetry = 10, temperature = 20) gevent.sleep(0.1) self.assertTrue(queue.empty()) log.debug("This completes the requirement that the EventAlertTransform publishes \ an alarm event when it does not hear from the instrument for some time.") @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_stream_processing(self): #-------------------------------------------------------------------------------- #Test that streams are processed by the transforms according to a provided algorithm #-------------------------------------------------------------------------------- #todo: In this simple implementation, we are checking if the stream has the word, PUBLISH, #todo(contd) and if the word VALUE=<number> exists and that number is less than something #todo later on we are going to use complex algorithms to make this prototype powerful #------------------------------------------------------------------------------------- # Start a subscriber to listen for an alert event from the Stream Alert Transform #------------------------------------------------------------------------------------- queue = gevent.queue.Queue() def event_received(message, headers): queue.put(message) event_subscriber = EventSubscriber( origin="StreamAlertTransform", event_type="DeviceEvent", callback=event_received) event_subscriber.start() self.addCleanup(event_subscriber.stop) #------------------------------------------------------------------------------------- # The configuration for the Stream Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- config = { 'process':{ 'queue_name': 'a_queue', 'value': 10, 'event_type':'DeviceEvent' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'transform_data_process', module='ion.processes.data.transforms.event_alert_transform', class_name='StreamAlertTransform', configuration= config) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish streams and make assertions about alerts #------------------------------------------------------------------------------------- exchange_name = 'a_queue' exchange_point = 'test_exchange' routing_key = 'stream_id.stream' stream_route = StreamRoute(exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(exchange_name) xp = self.container.ex_manager.create_xp(exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) message = "A dummy example message containing the word PUBLISH, and with VALUE = 5 . This message" +\ " will trigger an alert event from the StreamAlertTransform because the value provided is "\ "less than 10 that was passed in through the config." pub.publish(message) event = queue.get(timeout=10) self.assertEquals(event.type_, "DeviceEvent") self.assertEquals(event.origin, "StreamAlertTransform") # self.purge_queues(exchange_name) # def purge_queues(self, exchange_name): # xn = self.container.ex_manager.create_xn_queue(exchange_name) # xn.purge() @staticmethod def create_process(name= '', module = '', class_name = '', configuration = None): ''' A helper method to create a process ''' producer_definition = ProcessDefinition(name=name) producer_definition.executable = { 'module':module, 'class': class_name } process_dispatcher = ProcessDispatcherServiceClient() procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) pid = process_dispatcher.schedule_process(process_definition_id= procdef_id, configuration=configuration) return pid def test_demo_stream_granules_processing(self): """ Test that the Demo Stream Alert Transform is functioning. The transform coordinates with the scheduler. It is configured to listen to a source that publishes granules. It publishes a DeviceStatusEvent if it receives a granule with bad data or a DeviceCommsEvent if no granule has arrived between two timer events. The transform is configured at launch using a config dictionary. """ #------------------------------------------------------------------------------------- # Start a subscriber to listen for an alert event from the Stream Alert Transform #------------------------------------------------------------------------------------- queue_bad_data = gevent.queue.Queue() queue_no_data = gevent.queue.Queue() def bad_data(message, headers): if message.type_ == "DeviceStatusEvent": queue_bad_data.put(message) def no_data(message, headers): queue_no_data.put(message) event_subscriber_bad_data = EventSubscriber( origin="DemoStreamAlertTransform", event_type="DeviceStatusEvent", callback=bad_data) event_subscriber_no_data = EventSubscriber( origin="DemoStreamAlertTransform", event_type="DeviceCommsEvent", callback=no_data) event_subscriber_bad_data.start() event_subscriber_no_data.start() self.addCleanup(event_subscriber_bad_data.stop) self.addCleanup(event_subscriber_no_data.stop) #------------------------------------------------------------------------------------- # The configuration for the Stream Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- self.valid_values = [-100, 100] self.timer_interval = 5 self.queue_name = 'a_queue' config = { 'process':{ 'timer_interval': self.timer_interval, 'queue_name': self.queue_name, 'variable_name': 'input_voltage', 'time_field_name': 'preferred_timestamp', 'valid_values': self.valid_values, 'timer_origin': 'Interval Timer' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'DemoStreamAlertTransform', module='ion.processes.data.transforms.event_alert_transform', class_name='DemoStreamAlertTransform', configuration= config) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish streams and make assertions about alerts #------------------------------------------------------------------------------------- pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name= 'platform_eng_parsed', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('demo_stream', parameter_dictionary_id=pdict_id) stream_id, stream_route = self.pubsub_management.create_stream( name='test_demo_alert', exchange_point='exch_point_1', stream_definition_id=stream_def_id) sub_1 = self.pubsub_management.create_subscription(name='sub_1', stream_ids=[stream_id], exchange_points=['exch_point_1'], exchange_name=self.queue_name) self.pubsub_management.activate_subscription(sub_1) self.exchange_names.append('sub_1') self.exchange_points.append('exch_point_1') #------------------------------------------------------------------------------------- # publish a *GOOD* granule #------------------------------------------------------------------------------------- self.length = 2 val = numpy.array([random.uniform(0,50) for l in xrange(self.length)]) self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val, length=self.length) self.assertTrue(queue_bad_data.empty()) #------------------------------------------------------------------------------------- # publish a few *BAD* granules #------------------------------------------------------------------------------------- self.length = 2 self.number = 2 val = numpy.array([random.uniform(110,200) for l in xrange(self.length)]) self._publish_granules(stream_id= stream_id, stream_route= stream_route, number= self.number, values=val, length=self.length) for i in xrange(self.length * self.number): event = queue_bad_data.get(timeout=10) self.assertEquals(event.type_, "DeviceStatusEvent") self.assertEquals(event.origin, "DemoStreamAlertTransform") self.assertEquals(event.state, DeviceStatusType.OUT_OF_RANGE) self.assertEquals(event.valid_values, self.valid_values) self.assertEquals(event.sub_type, 'input_voltage') self.assertIsNotNone(event.value) self.assertIsNotNone(event.time_stamp) # To ensure that only the bad values generated the alert events. Queue should be empty now self.assertEquals(queue_bad_data.qsize(), 0) #------------------------------------------------------------------------------------- # Do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status #------------------------------------------------------------------------------------- event = queue_no_data.get(timeout=15) self.assertEquals(event.type_, "DeviceCommsEvent") self.assertEquals(event.origin, "DemoStreamAlertTransform") self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION) self.assertEquals(event.sub_type, 'input_voltage') #------------------------------------------------------------------------------------- # Empty the queues and repeat tests #------------------------------------------------------------------------------------- queue_bad_data.queue.clear() queue_no_data.queue.clear() #------------------------------------------------------------------------------------- # publish a *GOOD* granule again #------------------------------------------------------------------------------------- self.length = 2 val = numpy.array([random.uniform(0,50) for l in xrange(self.length)]) self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val, length=self.length) self.assertTrue(queue_bad_data.empty()) #------------------------------------------------------------------------------------- # Again do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status #------------------------------------------------------------------------------------- event = queue_no_data.get(timeout=20) self.assertEquals(event.type_, "DeviceCommsEvent") self.assertEquals(event.origin, "DemoStreamAlertTransform") self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION) self.assertEquals(event.sub_type, 'input_voltage') def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None, length=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = numpy.array([random.uniform(0,1000) for l in xrange(length)]) g = rdt.to_granule() pub.publish(g)
class TransformManagementServiceIntTest(IonIntegrationTestCase): def setUp(self): # set up the container self._start_container() self.container.start_rel_from_url('res/deploy/r2dm.yml') self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node) self.tms_cli = TransformManagementServiceClient(node=self.container.node) self.rr_cli = ResourceRegistryServiceClient(node=self.container.node) self.procd_cli = ProcessDispatcherServiceClient(node=self.container.node) self.input_stream_id = self.pubsub_cli.create_stream(name='input_stream',original=True) self.input_subscription_id = self.pubsub_cli.create_subscription(query=StreamQuery(stream_ids=[self.input_stream_id]),exchange_name='transform_input',name='input_subscription') self.output_stream_id = self.pubsub_cli.create_stream(name='output_stream',original=True) self.process_definition = ProcessDefinition(name='basic_transform_definition') self.process_definition.executable = {'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformExample'} self.process_definition_id = self.procd_cli.create_process_definition(process_definition=self.process_definition) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') def test_create_transform(self): configuration = {'program_args':{'arg1':'value'}} transform_id = self.tms_cli.create_transform( name='test_transform', in_subscription_id=self.input_subscription_id, out_streams={'output':self.output_stream_id}, process_definition_id=self.process_definition_id) # test transform creation in rr transform = self.rr_cli.read(transform_id) self.assertEquals(transform.name,'test_transform') # test associations predicates = [PRED.hasSubscription, PRED.hasOutStream, PRED.hasProcessDefinition] assocs = [] for p in predicates: assocs += self.rr_cli.find_associations(transform_id,p,id_only=True) self.assertEquals(len(assocs),3) # test process creation transform = self.tms_cli.read_transform(transform_id) pid = transform.process_id proc = self.container.proc_manager.procs.get(pid) self.assertIsInstance(proc,TransformExample) # clean up self.tms_cli.delete_transform(transform_id) def test_create_transform_no_procdef(self): with self.assertRaises(NotFound): self.tms_cli.create_transform(name='test',in_subscription_id=self.input_subscription_id) def test_create_transform_bad_procdef(self): with self.assertRaises(NotFound): self.tms_cli.create_transform(name='test', in_subscription_id=self.input_subscription_id, process_definition_id='bad') def test_create_transform_no_config(self): transform_id = self.tms_cli.create_transform( name='test_transform', in_subscription_id=self.input_subscription_id, out_streams={'output':self.output_stream_id}, process_definition_id=self.process_definition_id, ) self.tms_cli.delete_transform(transform_id) def test_create_transform_name_failure(self): transform_id = self.tms_cli.create_transform( name='test_transform', in_subscription_id=self.input_subscription_id, out_streams={'output':self.output_stream_id}, process_definition_id=self.process_definition_id, ) with self.assertRaises(BadRequest): transform_id = self.tms_cli.create_transform( name='test_transform', in_subscription_id=self.input_subscription_id, out_streams={'output':self.output_stream_id}, process_definition_id=self.process_definition_id, ) self.tms_cli.delete_transform(transform_id) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI') def test_create_no_output(self): transform_id = self.tms_cli.create_transform( name='test_transform', in_subscription_id=self.input_subscription_id, process_definition_id=self.process_definition_id, ) predicates = [PRED.hasSubscription, PRED.hasOutStream, PRED.hasProcessDefinition] assocs = [] for p in predicates: assocs += self.rr_cli.find_associations(transform_id,p,id_only=True) self.assertEquals(len(assocs),2) # test process creation transform = self.tms_cli.read_transform(transform_id) pid = transform.process_id proc = self.container.proc_manager.procs.get(pid) self.assertIsInstance(proc,TransformExample) self.tms_cli.delete_transform(transform_id) def test_read_transform_exists(self): trans_obj = IonObject(RT.Transform,name='trans_obj') trans_id, _ = self.rr_cli.create(trans_obj) res = self.tms_cli.read_transform(trans_id) actual = self.rr_cli.read(trans_id) self.assertEquals(res._id,actual._id) def test_read_transform_nonexist(self): with self.assertRaises(NotFound) as e: self.tms_cli.read_transform('123') def test_activate_transform(self): transform_id = self.tms_cli.create_transform( name='test_transform', in_subscription_id=self.input_subscription_id, out_streams={'output':self.output_stream_id}, process_definition_id=self.process_definition_id ) self.tms_cli.activate_transform(transform_id) # pubsub check if activated? self.tms_cli.delete_transform(transform_id) def test_activate_transform_nonexist(self): with self.assertRaises(NotFound): self.tms_cli.activate_transform('1234') def test_delete_transform(self): transform_id = self.tms_cli.create_transform( name='test_transform', in_subscription_id=self.input_subscription_id, process_definition_id=self.process_definition_id ) self.tms_cli.delete_transform(transform_id) # assertions with self.assertRaises(NotFound): self.rr_cli.read(transform_id) def test_delete_transform_nonexist(self): with self.assertRaises(NotFound): self.tms_cli.delete_transform('123') def test_execute_transform(self): # set up process_definition = ProcessDefinition(name='procdef_execute') process_definition.executable['module'] = 'ion.processes.data.transforms.transform_example' process_definition.executable['class'] = 'ReverseTransform' data = [1,2,3] process_definition_id, _ = self.rr_cli.create(process_definition) retval = self.tms_cli.execute_transform(process_definition_id,data) self.assertEquals(retval,[3,2,1]) def test_integrated_transform(self): ''' This example script runs a chained three way transform: B A < C Where A is the even_odd transform (generates a stream of even and odd numbers from input) and B and C are the basic transforms that receive even and odd input ''' cc = self.container assertions = self.assertTrue pubsub_cli = PubsubManagementServiceClient(node=cc.node) rr_cli = ResourceRegistryServiceClient(node=cc.node) tms_cli = TransformManagementServiceClient(node=cc.node) #------------------------------- # Process Definition #------------------------------- # Create the process definition for the basic transform process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformExample' } basic_transform_definition_id, _ = rr_cli.create(process_definition) # Create The process definition for the TransformEvenOdd process_definition = IonObject(RT.ProcessDefinition, name='evenodd_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class':'TransformEvenOdd' } evenodd_transform_definition_id, _ = rr_cli.create(process_definition) #------------------------------- # Streams #------------------------------- streams = [pubsub_cli.create_stream() for i in xrange(5)] #------------------------------- # Subscriptions #------------------------------- query = StreamQuery(stream_ids=[streams[0]]) input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue') query = StreamQuery(stream_ids = [streams[1]]) # even output even_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='even_queue') query = StreamQuery(stream_ids = [streams[2]]) # odd output odd_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='odd_queue') #------------------------------- # Launch the EvenOdd Transform #------------------------------- evenodd_id = tms_cli.create_transform(name='even_odd', in_subscription_id=input_subscription_id, out_streams={'even':streams[1], 'odd':streams[2]}, process_definition_id=evenodd_transform_definition_id, configuration={}) tms_cli.activate_transform(evenodd_id) #------------------------------- # Launch the Even Processing Transform #------------------------------- even_transform_id = tms_cli.create_transform(name='even_transform', in_subscription_id = even_subscription_id, out_streams={'even_plus1':streams[3]}, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(even_transform_id) #------------------------------- # Launch the Odd Processing Transform #------------------------------- odd_transform_id = tms_cli.create_transform(name='odd_transform', in_subscription_id = odd_subscription_id, out_streams={'odd_plus1':streams[4]}, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(odd_transform_id) #------------------------------- # Set up final subscribers #------------------------------- evenplus1_subscription_id = pubsub_cli.create_subscription( query=StreamQuery([streams[3]]), exchange_name='evenplus1_queue', name='EvenPlus1Subscription', description='EvenPlus1 SubscriptionDescription' ) oddplus1_subscription_id = pubsub_cli.create_subscription( query=StreamQuery([streams[4]]), exchange_name='oddplus1_queue', name='OddPlus1Subscription', description='OddPlus1 SubscriptionDescription' ) total_msg_count = 2 msgs = gevent.queue.Queue() def even1_message_received(message, headers): input = int(message.get('num')) assertions( (input % 2) ) # Assert it is odd (transform adds 1) msgs.put(True) def odd1_message_received(message, headers): input = int(message.get('num')) assertions(not (input % 2)) # Assert it is even msgs.put(True) subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node) even_subscriber = subscriber_registrar.create_subscriber(exchange_name='evenplus1_queue', callback=even1_message_received) odd_subscriber = subscriber_registrar.create_subscriber(exchange_name='oddplus1_queue', callback=odd1_message_received) # Start subscribers even_subscriber.start() odd_subscriber.start() # Activate subscriptions pubsub_cli.activate_subscription(evenplus1_subscription_id) pubsub_cli.activate_subscription(oddplus1_subscription_id) #------------------------------- # Set up fake stream producer #------------------------------- pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] # Normally the user does not see or create the publisher, this is part of the containers business. # For the test we need to set it up explicitly publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node) stream_publisher = publisher_registrar.create_publisher(stream_id=streams[0]) #------------------------------- # Start test #------------------------------- # Publish a stream for i in xrange(total_msg_count): stream_publisher.publish({'num':str(i)}) time.sleep(0.5) for i in xrange(total_msg_count * 2): try: msgs.get() except Empty: assertions(False, "Failed to process all messages correctly.") """
class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Instantiate a process to represent the test process = TransformWorkerTestProcess() self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceProcessClient( node=self.container.node, process=process) self.time_dom, self.spatial_dom = time_series_domain() self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) def push_granule(self, data_product_id): ''' Publishes and monitors that the granule arrived ''' datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True) dataset_monitor = DatasetMonitor(datasets[0]) rdt = self.ph.rdt_for_data_product(data_product_id) self.ph.fill_parsed_rdt(rdt) self.ph.publish_rdt_to_data_product(data_product_id, rdt) assert dataset_monitor.wait() dataset_monitor.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources( restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition( dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects( dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[ output_data_product_id[0]]['parents'][self.input_dp_id] ['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue( self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition( dataprocessdef_id) self.assertIn('def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition( dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects( subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_instrumentdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. instDevice_obj, _ = self.rrclient.find_resources( restype=RT.InstrumentDevice, name='test_ctd_device') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345") instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(instDevice_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_platformdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. platform_device_obj, _ = self.rrclient.find_resources( restype=RT.PlatformDevice, name='TestPlatform') if platform_device_obj: platform_device_id = platform_device_obj[0]._id else: platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345") platform_device_id = self.imsclient.create_platform_device( platform_device=platform_device_obj) self.damsclient.assign_data_product( input_resource_id=platform_device_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(platform_device_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[platform_device_id] ['type'] == 'PlatformDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_bad_argument_map(self): self._output_stream_ids = [] # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during # data process creation and that the correct exception is raised for both input and output. self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() # Set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process with invalid argument map argument_map = {"arr1": "foo", "arr2": "bar"} output_param = "salinity" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Input data product does not contain the parameters defined in argument map" ) # create the data process with invalid output parameter name argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "foo" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Output data product does not contain the output parameter name provided" ) def create_event_data_processes(self): # two data processes using one transform and one DPD argument_map = {"a": "salinity"} # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='validate_salinity_array', description='validate_salinity_array', function='validate_salinity_array', module="ion.processes.data.transforms.test.test_transform_worker", arguments=['a'], function_type=TransformFunctionType.TRANSFORM) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='validate_salinity_array', description='validate_salinity_array', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=None, argument_map=argument_map) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) return dp1_data_process_id def create_data_process(self): # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, version_label='1.0a') add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) # add an attachment object to this DPD to test new SA-21 import msgpack attachment_content = 'foo bar' attachment_obj = IonObject(RT.Attachment, name='test_attachment', attachment_type=AttachmentType.ASCII, content_type='text/plain', content=msgpack.packb(attachment_content)) att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj) self.addCleanup(self.rrclient.delete_attachment, att_id) return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id def create_output_data_product(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject(RT.DataProduct, name='data_process1_data_product', description='output of add array func') dp1_func_output_dp_id = self.dataproductclient.create_data_product( dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription( 'validator', data_product_ids=[dp1_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) self.granule_verified.set() validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s", str(data_process_event.__dict__)) # if data process already created, check origin if self.dp_list: self.assertIn(data_process_event.origin, self.dp_list) # if this is a heartbeat event then 100 granules have been processed if 'data process status update.' in data_process_event.description: self.heartbeat_event_verified.set() else: # else check that this is the assign event if 'Data process assigned to transform worker' in data_process_event.description: self.worker_assigned_event_verified.set() elif 'Data process created for data product' in data_process_event.description: self.dp_created_event_verified.set() def validate_output_granule(self, msg, route, stream_id): self.assertIn(stream_id, self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def validate_transform_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ status_alert_event = args[0] np.testing.assert_array_equal(status_alert_event.origin, self.stream_id) np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id])) log.debug("DeviceStatusAlertEvent: %s", str(status_alert_event.__dict__)) self.event_verified.set() def start_event_transform_listener(self): es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event) es.start() self.addCleanup(es.stop) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1, 2) self.assertEquals(a, 3)
class CtdbpTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdbpTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def _create_input_param_dict_for_test(self, parameter_dict_name = ''): pdict = ParameterDictionary() t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump()) pc_list.append(ctxt_id) self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject(RT.DataProcessDefinition, name='CTDBP_L0_all', description='Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj) self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test(parameter_dict_name = 'fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject(RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain = tdom, spatial_domain = sdom) input_dp_id = self.dataproduct_management.create_data_product(data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict ) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject(RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain = tdom, spatial_domain = sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict ) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process self.output_products = {'L0_stream' : L0_stream_dp_id} out_stream_ids, _ = self.resource_registry.find_objects(L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m,r,s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet(stream_definition_id=input_stream_def_dict, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ pass
def run_even_odd_transform(self): ''' This example script runs a chained three way transform: B A < C Where A is the even_odd transform (generates a stream of even and odd numbers from input) and B and C are the basic transforms that receive even and odd input ''' pubsub_cli = PubsubManagementServiceClient(node=self.container.node) tms_cli = TransformManagementServiceClient(node=self.container.node) procd_cli = ProcessDispatcherServiceClient(node=self.container.node) #------------------------------- # Process Definition #------------------------------- # Create the process definition for the basic transform process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class': 'TransformExample' } basic_transform_definition_id = procd_cli.create_process_definition( process_definition=process_definition) # Create The process definition for the TransformEvenOdd process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition') process_definition.executable = { 'module': 'ion.processes.data.transforms.transform_example', 'class': 'TransformEvenOdd' } evenodd_transform_definition_id = procd_cli.create_process_definition( process_definition=process_definition) #------------------------------- # Streams #------------------------------- input_stream_id = pubsub_cli.create_stream(name='input_stream', original=True) even_stream_id = pubsub_cli.create_stream(name='even_stream', original=True) odd_stream_id = pubsub_cli.create_stream(name='odd_stream', original=True) #------------------------------- # Subscriptions #------------------------------- query = StreamQuery(stream_ids=[input_stream_id]) input_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='input_queue') query = StreamQuery(stream_ids=[even_stream_id]) even_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='even_queue') query = StreamQuery(stream_ids=[odd_stream_id]) odd_subscription_id = pubsub_cli.create_subscription( query=query, exchange_name='odd_queue') #------------------------------- # Launch the EvenOdd Transform #------------------------------- evenodd_id = tms_cli.create_transform( name='even_odd', in_subscription_id=input_subscription_id, out_streams={ 'even': even_stream_id, 'odd': odd_stream_id }, process_definition_id=evenodd_transform_definition_id, configuration={}) tms_cli.activate_transform(evenodd_id) #------------------------------- # Launch the Even Processing Transform #------------------------------- even_transform_id = tms_cli.create_transform( name='even_transform', in_subscription_id=even_subscription_id, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(even_transform_id) #------------------------------- # Launch the Odd Processing Transform #------------------------------- odd_transform_id = tms_cli.create_transform( name='odd_transform', in_subscription_id=odd_subscription_id, process_definition_id=basic_transform_definition_id, configuration={}) tms_cli.activate_transform(odd_transform_id) #------------------------------- # Spawn the Streaming Producer #------------------------------- id_p = self.container.spawn_process( 'myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', { 'process': { 'type': 'stream_process', 'publish_streams': { 'out_stream': input_stream_id } }, 'stream_producer': { 'interval': 4000 } }) self.container.proc_manager.procs[id_p].start()
class VisualizationIntegrationTestHelper(IonIntegrationTestCase): def create_ctd_input_stream_and_data_product(self, data_product_name='ctd_parsed'): cc = self.container assertions = self.assertTrue # Now create client to DataProductManagementService self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.pubsubclient = PubsubManagementServiceClient(node=self.container.node) self.ingestclient = IngestionManagementServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.datasetclient = DatasetManagementServiceClient(node=self.container.node) self.workflowclient = WorkflowManagementServiceClient(node=self.container.node) self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node) self.vis_client = VisualizationServiceClient(node=self.container.node) #------------------------------- # Create CTD Parsed as the initial data product #------------------------------- # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data') log.debug('Creating new CDM data product with a stream definition') craft = CoverageCraft sdom, tdom = craft.create_domains() sdom = sdom.dump() tdom = tdom.dump() parameter_dictionary = craft.create_parameters() parameter_dictionary = parameter_dictionary.dump() dp_obj = IonObject(RT.DataProduct, name=data_product_name, description='ctd stream test', temporal_domain = tdom, spatial_domain = sdom) ctd_parsed_data_product_id = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary) log.debug('new ctd_parsed_data_product_id = %s' % ctd_parsed_data_product_id) #Only ever need one device for testing purposes. instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='SBE37IMDevice') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product_id) self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_id, PRED.hasStream, None, True) assertions(len(stream_ids) > 0 ) ctd_stream_id = stream_ids[0] return ctd_stream_id, ctd_parsed_data_product_id def create_data_product(self, dp_name = "", dp_description = ""): craft = CoverageCraft sdom, tdom = craft.create_domains() sdom = sdom.dump() tdom = tdom.dump() parameter_dictionary = craft.create_parameters() # this creates a ParameterDictionary object parameter_dictionary = parameter_dictionary.dump() # this returns a python dictionary data_prod_obj = IonObject(RT.DataProduct, name=dp_name, description=dp_description, temporal_domain = tdom, spatial_domain = sdom) data_prod_id = self.create_data_product(data_prod_obj, stream_definition_id, parameter_dictionary) return data_prod_id, data_prod_obj def start_simple_input_stream_process(self, ctd_stream_id): return self.start_input_stream_process(ctd_stream_id) def start_sinusoidal_input_stream_process(self, ctd_stream_id): return self.start_input_stream_process(ctd_stream_id, 'ion.processes.data.sinusoidal_stream_publisher', 'SinusoidalCtdPublisher') def start_input_stream_process(self, ctd_stream_id, module = 'ion.processes.data.ctd_stream_publisher', class_name= 'SimpleCtdPublisher'): ### ### Start the process for producing the CTD data ### # process definition for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module':module, 'class':class_name } ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition) # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':ctd_stream_id, } } ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration) return ctd_sim_pid def start_output_stream_and_listen(self, ctd_stream_id, data_product_stream_ids, message_count_per_stream=10): cc = self.container assertions = self.assertTrue ### ### Make a subscriber in the test to listen for transformed data ### salinity_subscription_id = self.pubsubclient.create_subscription( query=StreamQuery(data_product_stream_ids), exchange_name = 'workflow_test', exchange_point = 'science_data', name = "test workflow transformations", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, container=cc) result = gevent.event.AsyncResult() results = [] message_count = len(data_product_stream_ids) * message_count_per_stream def message_received(message, headers): # Heads results.append(message) if len(results) >= message_count: #Only wait for so many messages - per stream result.set(True) subscriber = subscriber_registrar.create_subscriber(exchange_name='workflow_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id) #Start the input stream process if ctd_stream_id is not None: ctd_sim_pid = self.start_simple_input_stream_process(ctd_stream_id) # Assert that we have received data assertions(result.get(timeout=30)) # stop the flow parse the messages... if ctd_stream_id is not None: self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data self.pubsubclient.deactivate_subscription(subscription_id=salinity_subscription_id) subscriber.stop() return results def validate_messages(self, results): cc = self.container assertions = self.assertTrue first_salinity_values = None for message in results: rdt = RecordDictionaryTool.load_from_granule(message) try: temp = get_safe(rdt, 'temp') # psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message) # temp = psd.get_values('temperature') # log.info(psd.list_field_names()) except KeyError as ke: temp = None if temp is not None: assertions(isinstance(temp, numpy.ndarray)) log.info( 'temperature=' + str(numpy.nanmin(temp))) first_salinity_values = None else: #psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) #log.info( psd.list_field_names()) # Test the handy info method for the names of fields in the stream def #assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = get_safe(rdt, 'salinity') #salinity = psd.get_values('salinity') log.info( 'salinity=' + str(numpy.nanmin(salinity))) # Check to see if salinity has values assertions(salinity != None) assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0 if first_salinity_values is None: first_salinity_values = salinity.tolist() else: second_salinity_values = salinity.tolist() assertions(len(first_salinity_values) == len(second_salinity_values)) for idx in range(0,len(first_salinity_values)): assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx]) def validate_data_ingest_retrieve(self, dataset_id): assertions = self.assertTrue self.data_retriever = DataRetrieverServiceClient(node=self.container.node) #validate that data was ingested replay_granule = self.data_retriever.retrieve_last_granule(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) salinity = get_safe(rdt, 'salinity') assertions(salinity != None) #retrieve all the granules from the database and check the values replay_granule_all = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule_all) for k, v in rdt.iteritems(): if k == 'salinity': for val in numpy.nditer(v): assertions(val > 0) def create_salinity_data_process_definition(self): # Salinity: Data Process Definition #First look to see if it exists and if not, then create it dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='ctd_salinity') if len(dpd) > 0: return dpd[0] log.debug("Create data process definition SalinityTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='ctd_salinity', description='create a salinity data product', module='ion.processes.data.transforms.ctd.ctd_L2_salinity', class_name='SalinityTransform', process_source='SalinityTransform source code here...') try: ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Excpetion as ex: self.fail("failed to create new SalinityTransform data process definition: %s" %ex) # create a stream definition for the data from the salinity Transform sal_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Salinity') self.dataprocessclient.assign_stream_definition_to_data_process_definition(sal_stream_def_id, ctd_L2_salinity_dprocdef_id ) return ctd_L2_salinity_dprocdef_id def create_salinity_doubler_data_process_definition(self): #First look to see if it exists and if not, then create it dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='salinity_doubler') if len(dpd) > 0: return dpd[0] # Salinity Doubler: Data Process Definition log.debug("Create data process definition SalinityDoublerTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='salinity_doubler', description='create a salinity doubler data product', module='ion.processes.data.transforms.example_double_salinity', class_name='SalinityDoubler', process_source='SalinityDoubler source code here...') try: salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Exception as ex: self.fail("failed to create new SalinityDoubler data process definition: %s" %ex) # create a stream definition for the data from the salinity Transform salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityDoubler.outgoing_stream_def, name='SalinityDoubler') self.dataprocessclient.assign_stream_definition_to_data_process_definition(salinity_double_stream_def_id, salinity_doubler_dprocdef_id ) return salinity_doubler_dprocdef_id def create_transform_process(self, data_process_definition_id, data_process_input_dp_id): data_process_definition = self.rrclient.read(data_process_definition_id) # Find the link between the output Stream Definition resource and the Data Process Definition resource stream_ids,_ = self.rrclient.find_objects(data_process_definition._id, PRED.hasStreamDefinition, RT.StreamDefinition, id_only=True) if not stream_ids: raise Inconsistent("The data process definition %s is missing an association to an output stream definition" % data_process_definition._id ) process_output_stream_def_id = stream_ids[0] #Concatenate the name of the workflow and data process definition for the name of the data product output data_process_name = data_process_definition.name # Create the output data product of the transform transform_dp_obj = IonObject(RT.DataProduct, name=data_process_name,description=data_process_definition.description) transform_dp_id = self.dataproductclient.create_data_product(transform_dp_obj, process_output_stream_def_id) self.dataproductclient.activate_data_product_persistence(data_product_id=transform_dp_id) #last one out of the for loop is the output product id output_data_product_id = transform_dp_id # Create the transform data process log.debug("create data_process and start it") data_process_id = self.dataprocessclient.create_data_process(data_process_definition._id, [data_process_input_dp_id], {'output':transform_dp_id}) self.dataprocessclient.activate_data_process(data_process_id) #Find the id of the output data stream stream_ids, _ = self.rrclient.find_objects(transform_dp_id, PRED.hasStream, None, True) if not stream_ids: raise Inconsistent("The data process %s is missing an association to an output stream" % data_process_id ) return data_process_id, output_data_product_id def create_google_dt_data_process_definition(self): #First look to see if it exists and if not, then create it dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='google_dt_transform') if len(dpd) > 0: return dpd[0] # Data Process Definition log.debug("Create data process definition GoogleDtTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='google_dt_transform', description='Convert data streams to Google DataTables', module='ion.processes.data.transforms.viz.google_dt', class_name='VizTransformGoogleDT', process_source='VizTransformGoogleDT source code here...') try: procdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Exception as ex: self.fail("failed to create new VizTransformGoogleDT data process definition: %s" %ex) # create a stream definition for the data from the stream_def_id = self.pubsubclient.create_stream_definition(container=VizTransformGoogleDT.outgoing_stream_def, name='VizTransformGoogleDT') self.dataprocessclient.assign_stream_definition_to_data_process_definition(stream_def_id, procdef_id ) return procdef_id def validate_google_dt_transform_results(self, results): cc = self.container assertions = self.assertTrue # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases if isinstance(results,Granule): results =[results] for g in results: if isinstance(g,Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) gdt_data = get_safe(rdt, 'google_dt_components') # IF this granule does not contains google dt, skip if gdt_data == None: continue gdt = gdt_data[0] assertions(gdt['viz_product_type'] == 'google_dt' ) assertions(len(gdt['data_description']) >= 0) # Need to come up with a better check assertions(len(gdt['data_content']) >= 0) def create_mpl_graphs_data_process_definition(self): #First look to see if it exists and if not, then create it dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='mpl_graphs_transform') if len(dpd) > 0: return dpd[0] #Data Process Definition log.debug("Create data process definition MatplotlibGraphsTransform") dpd_obj = IonObject(RT.DataProcessDefinition, name='mpl_graphs_transform', description='Convert data streams to Matplotlib graphs', module='ion.processes.data.transforms.viz.matplotlib_graphs', class_name='VizTransformMatplotlibGraphs', process_source='VizTransformMatplotlibGraphs source code here...') try: procdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj) except Exception as ex: self.fail("failed to create new VizTransformMatplotlibGraphs data process definition: %s" %ex) # create a stream definition for the data stream_def_id = self.pubsubclient.create_stream_definition(container=VizTransformMatplotlibGraphs.outgoing_stream_def, name='VizTransformMatplotlibGraphs') self.dataprocessclient.assign_stream_definition_to_data_process_definition(stream_def_id, procdef_id ) return procdef_id def validate_mpl_graphs_transform_results(self, results): cc = self.container assertions = self.assertTrue # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases if isinstance(results,Granule): results =[results] for g in results: if isinstance(g,Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) graphs = get_safe(rdt, 'matplotlib_graphs') if graphs == None: continue for graph in graphs[0]: # At this point only dictionaries containing image data should be passed # For some reason non dictionary values are filtering through. if not isinstance(graph, dict): continue assertions(graph['viz_product_type'] == 'matplotlib_graphs' ) # check to see if the list (numpy array) contains actual images assertions(imghdr.what(graph['image_name'], h = graph['image_obj']) == 'png') def validate_vis_service_google_dt_results(self, results): assertions = self.assertTrue assertions(results) gdt_str = (results.lstrip("google.visualization.Query.setResponse(")).rstrip(")") assertions(len(gdt_str) > 0) return def validate_vis_service_mpl_graphs_results(self, results): assertions = self.assertTrue assertions(results) # check to see if the object passed is a dictionary with a valid image object in it image_format = results["content_type"].lstrip("image/") assertions(imghdr.what(results['image_name'], h = base64.decodestring(results['image_obj'])) == image_format) return
class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Instantiate a process to represent the test process=TransformWorkerTestProcess() self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.pubsub_client = PubsubManagementServiceClient(node=self.container.node) self.dataproductclient = DataProductManagementServiceClient(node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process = process) self.time_dom, self.spatial_dom = time_series_domain() self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) def push_granule(self, data_product_id): ''' Publishes and monitors that the granule arrived ''' datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True) dataset_monitor = DatasetMonitor(datasets[0]) rdt = self.ph.rdt_for_data_product(data_product_id) self.ph.fill_parsed_rdt(rdt) self.ph.publish_rdt_to_data_product(data_product_id, rdt) assert dataset_monitor.wait() dataset_monitor.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources(restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects(dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[output_data_product_id[0]]['parents'][self.input_dp_id]['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue(self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition(dataprocessdef_id) self.assertIn( 'def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition(dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects(subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_instrumentdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='test_ctd_device') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345" ) instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(instDevice_id in output_data_product_provenance[self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_platformdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. platform_device_obj,_ = self.rrclient.find_resources(restype=RT.PlatformDevice, name='TestPlatform') if platform_device_obj: platform_device_id = platform_device_obj[0]._id else: platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345" ) platform_device_id = self.imsclient.create_platform_device(platform_device=platform_device_obj) self.damsclient.assign_data_product(input_resource_id=platform_device_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process() self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents']) self.assertTrue(platform_device_id in output_data_product_provenance[self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[platform_device_id]['type'] == 'PlatformDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_bad_argument_map(self): self._output_stream_ids = [] # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during # data process creation and that the correct exception is raised for both input and output. self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() # Set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' ) # create the data process with invalid argument map argument_map = {"arr1": "foo", "arr2": "bar"} output_param = "salinity" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual(ex.message, "Input data product does not contain the parameters defined in argument map") # create the data process with invalid output parameter name argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "foo" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual(ex.message, "Output data product does not contain the output parameter name provided") def create_event_data_processes(self): # two data processes using one transform and one DPD argument_map= {"a": "salinity"} # set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='validate_salinity_array', description='validate_salinity_array', function='validate_salinity_array', module="ion.processes.data.transforms.test.test_transform_worker", arguments=['a'], function_type=TransformFunctionType.TRANSFORM ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='validate_salinity_array', description='validate_salinity_array', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array' ) # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=None, argument_map=argument_map) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) return dp1_data_process_id def create_data_process(self): # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" # set up DPD and DP #2 - array add function tf_obj = IonObject(RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject(RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, version_label='1.0a' ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' ) # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) # add an attachment object to this DPD to test new SA-21 import msgpack attachment_content = 'foo bar' attachment_obj = IonObject( RT.Attachment, name='test_attachment', attachment_type=AttachmentType.ASCII, content_type='text/plain', content=msgpack.packb(attachment_content)) att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj) self.addCleanup(self.rrclient.delete_attachment, att_id) return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id def create_output_data_product(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject( RT.DataProduct, name='data_process1_data_product', description='output of add array func', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) self.granule_verified.set() validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s" , str(data_process_event.__dict__)) # if data process already created, check origin if self.dp_list: self.assertIn( data_process_event.origin, self.dp_list) # if this is a heartbeat event then 100 granules have been processed if 'data process status update.' in data_process_event.description: self.heartbeat_event_verified.set() else: # else check that this is the assign event if 'Data process assigned to transform worker' in data_process_event.description: self.worker_assigned_event_verified.set() elif 'Data process created for data product' in data_process_event.description: self.dp_created_event_verified.set() def validate_output_granule(self, msg, route, stream_id): self.assertIn( stream_id, self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def validate_transform_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ status_alert_event = args[0] np.testing.assert_array_equal(status_alert_event.origin, self.stream_id ) np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id])) log.debug("DeviceStatusAlertEvent: %s" , str(status_alert_event.__dict__)) self.event_verified.set() def start_event_transform_listener(self): es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event) es.start() self.addCleanup(es.stop) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1,2) self.assertEquals(a,3)
class CtdTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.queue_cleanup = [] self.exchange_cleanup = [] self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.exchange_name = 'ctd_L0_all_queue' self.exchange_point = 'test_exchange' self.i = 0 def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() def test_ctd_L0_all(self): ''' Test that packets are processed by the ctd_L0_all transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='ctd_L0_all', description='For testing ctd_L0_all') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.ctd_L0_all' process_definition.executable['class'] = 'ctd_L0_all' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'ctd_all_stream_def', parameter_dictionary_id=pdict_id) cond_stream_id, _ = self.pubsub.create_stream( 'test_cond', exchange_point='science_data', stream_definition_id=stream_def_id) pres_stream_id, _ = self.pubsub.create_stream( 'test_pres', exchange_point='science_data', stream_definition_id=stream_def_id) temp_stream_id, _ = self.pubsub.create_stream( 'test_temp', exchange_point='science_data', stream_definition_id=stream_def_id) config.process.publish_streams.conductivity = cond_stream_id config.process.publish_streams.pressure = pres_stream_id config.process.publish_streams.temperature = temp_stream_id # Schedule the process pid = self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscribers that will receive the conductivity, temperature and pressure granules from # the ctd transform #--------------------------------------------------------------------------------------------- ar_cond = gevent.event.AsyncResult() def subscriber1(m, r, s): ar_cond.set(m) sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1) self.addCleanup(sub_cond.stop) ar_temp = gevent.event.AsyncResult() def subscriber2(m, r, s): ar_temp.set(m) sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2) self.addCleanup(sub_temp.stop) ar_pres = gevent.event.AsyncResult() def subscriber3(m, r, s): ar_pres.set(m) sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3) self.addCleanup(sub_pres.stop) sub_cond_id = self.pubsub.create_subscription( 'subscription_cond', stream_ids=[cond_stream_id], exchange_name='sub_cond') sub_temp_id = self.pubsub.create_subscription( 'subscription_temp', stream_ids=[temp_stream_id], exchange_name='sub_temp') sub_pres_id = self.pubsub.create_subscription( 'subscription_pres', stream_ids=[pres_stream_id], exchange_name='sub_pres') self.pubsub.activate_subscription(sub_cond_id) self.pubsub.activate_subscription(sub_temp_id) self.pubsub.activate_subscription(sub_pres_id) self.queue_cleanup.append(sub_cond.xn.queue) self.queue_cleanup.append(sub_temp.xn.queue) self.queue_cleanup.append(sub_pres.xn.queue) sub_cond.start() sub_temp.start() sub_pres.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published publish_granule = self._get_new_ctd_packet( stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result_cond = ar_cond.get(timeout=10) result_temp = ar_temp.get(timeout=10) result_pres = ar_pres.get(timeout=10) out_dict = {} out_dict['c'] = RecordDictionaryTool.load_from_granule( result_cond)['conductivity'] out_dict['t'] = RecordDictionaryTool.load_from_granule( result_temp)['temp'] out_dict['p'] = RecordDictionaryTool.load_from_granule( result_pres)['pressure'] # Check that the transform algorithm was successfully executed self.check_granule_splitting(publish_granule, out_dict) def test_ctd_L1_conductivity(self): ''' Test that packets are processed by the ctd_L1_conductivity transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='CTDL1ConductivityTransform', description='For testing CTDL1ConductivityTransform') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.ctd_L1_conductivity' process_definition.executable['class'] = 'CTDL1ConductivityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'cond_stream_def', parameter_dictionary_id=pdict_id) cond_stream_id, _ = self.pubsub.create_stream( 'test_conductivity', exchange_point='science_data', stream_definition_id=stream_def_id) config.process.publish_streams.conductivity = cond_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscribers that will receive the conductivity, temperature and pressure granules from # the ctd transform #--------------------------------------------------------------------------------------------- ar_cond = gevent.event.AsyncResult() def subscriber1(m, r, s): ar_cond.set(m) sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1) self.addCleanup(sub_cond.stop) sub_cond_id = self.pubsub.create_subscription( 'subscription_cond', stream_ids=[cond_stream_id], exchange_name='sub_cond') self.pubsub.activate_subscription(sub_cond_id) self.queue_cleanup.append(sub_cond.xn.queue) sub_cond.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published publish_granule = self._get_new_ctd_packet( stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result_cond = ar_cond.get(timeout=10) self.assertTrue(isinstance(result_cond, Granule)) rdt = RecordDictionaryTool.load_from_granule(result_cond) self.assertTrue(rdt.__contains__('conductivity')) self.check_cond_algorithm_execution(publish_granule, result_cond) def check_cond_algorithm_execution(self, publish_granule, granule_from_transform): input_rdt_to_transform = RecordDictionaryTool.load_from_granule( publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule( granule_from_transform) output_data = output_rdt_transform['conductivity'] input_data = input_rdt_to_transform['conductivity'] self.assertTrue( numpy.array_equal(((input_data / 100000.0) - 0.5), output_data)) def check_pres_algorithm_execution(self, publish_granule, granule_from_transform): input_rdt_to_transform = RecordDictionaryTool.load_from_granule( publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule( granule_from_transform) output_data = output_rdt_transform['pressure'] input_data = input_rdt_to_transform['pressure'] self.assertTrue( numpy.array_equal((input_data / 100.0) + 0.5, output_data)) def check_temp_algorithm_execution(self, publish_granule, granule_from_transform): input_rdt_to_transform = RecordDictionaryTool.load_from_granule( publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule( granule_from_transform) output_data = output_rdt_transform['temp'] input_data = input_rdt_to_transform['temp'] self.assertTrue( numpy.array_equal(((input_data / 10000.0) - 10), output_data)) def check_density_algorithm_execution(self, publish_granule, granule_from_transform): #------------------------------------------------------------------ # Calculate the correct density from the input granule data #------------------------------------------------------------------ input_rdt_to_transform = RecordDictionaryTool.load_from_granule( publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule( granule_from_transform) conductivity = input_rdt_to_transform['conductivity'] pressure = input_rdt_to_transform['pressure'] temperature = input_rdt_to_transform['temp'] longitude = input_rdt_to_transform['lon'] latitude = input_rdt_to_transform['lat'] sp = SP_from_cndr(r=conductivity / cte.C3515, t=temperature, p=pressure) sa = SA_from_SP(sp, pressure, longitude, latitude) dens_value = rho(sa, temperature, pressure) out_density = output_rdt_transform['density'] log.debug("density output from the transform: %s", out_density) log.debug("values of density expected from the transform: %s", dens_value) numpy.testing.assert_array_almost_equal(out_density, dens_value, decimal=3) # #----------------------------------------------------------------------------- # # Check that the output data from the transform has the correct density values # #----------------------------------------------------------------------------- # for item in zip(out_density.tolist(), dens_value.tolist()): # if isinstance(item[0], int) or isinstance(item[0], float): # self.assertEquals(item[0], item[1]) def check_salinity_algorithm_execution(self, publish_granule, granule_from_transform): #------------------------------------------------------------------ # Calculate the correct density from the input granule data #------------------------------------------------------------------ input_rdt_to_transform = RecordDictionaryTool.load_from_granule( publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule( granule_from_transform) conductivity = input_rdt_to_transform['conductivity'] pressure = input_rdt_to_transform['pressure'] temperature = input_rdt_to_transform['temp'] sal_value = SP_from_cndr(r=conductivity / cte.C3515, t=temperature, p=pressure) out_salinity = output_rdt_transform['salinity'] #----------------------------------------------------------------------------- # Check that the output data from the transform has the correct density values #----------------------------------------------------------------------------- self.assertTrue(numpy.array_equal(sal_value, out_salinity)) def check_granule_splitting(self, publish_granule, out_dict): ''' This checks that the ctd_L0_all transform is able to split out one of the granules from the whole granule fed into the transform ''' input_rdt_to_transform = RecordDictionaryTool.load_from_granule( publish_granule) in_cond = input_rdt_to_transform['conductivity'] in_pressure = input_rdt_to_transform['pressure'] in_temp = input_rdt_to_transform['temp'] out_cond = out_dict['c'] out_pres = out_dict['p'] out_temp = out_dict['t'] self.assertTrue(numpy.array_equal(in_cond, out_cond)) self.assertTrue(numpy.array_equal(in_pressure, out_pres)) self.assertTrue(numpy.array_equal(in_temp, out_temp)) def test_ctd_L1_pressure(self): ''' Test that packets are processed by the ctd_L1_pressure transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='CTDL1PressureTransform', description='For testing CTDL1PressureTransform') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.ctd_L1_pressure' process_definition.executable['class'] = 'CTDL1PressureTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'pres_stream_def', parameter_dictionary_id=pdict_id) pres_stream_id, _ = self.pubsub.create_stream( 'test_pressure', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.pressure = pres_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscribers that will receive the pressure granules from # the ctd transform #--------------------------------------------------------------------------------------------- ar_pres = gevent.event.AsyncResult() def subscriber3(m, r, s): ar_pres.set(m) sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3) self.addCleanup(sub_pres.stop) sub_pres_id = self.pubsub.create_subscription( 'subscription_pres', stream_ids=[pres_stream_id], exchange_name='sub_pres') self.pubsub.activate_subscription(sub_pres_id) self.queue_cleanup.append(sub_pres.xn.queue) sub_pres.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published publish_granule = self._get_new_ctd_packet( stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_pres.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('pressure')) self.check_pres_algorithm_execution(publish_granule, result) def test_ctd_L1_temperature(self): ''' Test that packets are processed by the ctd_L1_temperature transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='CTDL1TemperatureTransform', description='For testing CTDL1TemperatureTransform') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.ctd_L1_temperature' process_definition.executable['class'] = 'CTDL1TemperatureTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'temp_stream_def', parameter_dictionary_id=pdict_id) temp_stream_id, _ = self.pubsub.create_stream( 'test_temperature', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.temperature = temp_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscriber that will receive the temperature granule from # the ctd transform #--------------------------------------------------------------------------------------------- ar_temp = gevent.event.AsyncResult() def subscriber2(m, r, s): ar_temp.set(m) sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2) self.addCleanup(sub_temp.stop) sub_temp_id = self.pubsub.create_subscription( 'subscription_temp', stream_ids=[temp_stream_id], exchange_name='sub_temp') self.pubsub.activate_subscription(sub_temp_id) self.queue_cleanup.append(sub_temp.xn.queue) sub_temp.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published publish_granule = self._get_new_ctd_packet( stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_temp.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('temp')) self.check_temp_algorithm_execution(publish_granule, result) def test_ctd_L2_density(self): ''' Test that packets are processed by the ctd_L1_density transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='DensityTransform', description='For testing DensityTransform') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.ctd_L2_density' process_definition.executable['class'] = 'DensityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point config.process.interval = 1.0 pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'dens_stream_def', parameter_dictionary_id=pdict_id) dens_stream_id, _ = self.pubsub.create_stream( 'test_density', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.density = dens_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create a subscriber that will receive the density granule from the ctd transform #--------------------------------------------------------------------------------------------- ar_dens = gevent.event.AsyncResult() def subscriber3(m, r, s): ar_dens.set(m) sub_dens = StandaloneStreamSubscriber('sub_dens', subscriber3) self.addCleanup(sub_dens.stop) sub_dens_id = self.pubsub.create_subscription( 'subscription_dens', stream_ids=[dens_stream_id], exchange_name='sub_dens') self.pubsub.activate_subscription(sub_dens_id) self.queue_cleanup.append(sub_dens.xn.queue) sub_dens.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published publish_granule = self._get_new_ctd_packet( stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_dens.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('density')) self.check_density_algorithm_execution(publish_granule, result) def test_ctd_L2_salinity(self): ''' Test that packets are processed by the ctd_L1_salinity transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='SalinityTransform', description='For testing SalinityTransform') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.ctd_L2_salinity' process_definition.executable['class'] = 'SalinityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'sal_stream_def', parameter_dictionary_id=pdict_id) sal_stream_id, _ = self.pubsub.create_stream( 'test_salinity', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.salinity = sal_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create a subscriber that will receive the salinity granule from the ctd transform #--------------------------------------------------------------------------------------------- ar_sal = gevent.event.AsyncResult() def subscriber3(m, r, s): ar_sal.set(m) sub_sal = StandaloneStreamSubscriber('sub_sal', subscriber3) self.addCleanup(sub_sal.stop) sub_sal_id = self.pubsub.create_subscription( 'subscription_sal', stream_ids=[sal_stream_id], exchange_name='sub_sal') self.pubsub.activate_subscription(sub_sal_id) self.queue_cleanup.append(sub_sal.xn.queue) sub_sal.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published publish_granule = self._get_new_ctd_packet( stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_sal.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('salinity')) self.check_salinity_algorithm_execution(publish_granule, result) def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i + length) for field in rdt: if isinstance( rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() self.i += length return g def test_presf_L0_splitter(self): ''' Test that packets are processed by the ctd_L1_pressure transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='Presf L0 Splitter', description='For testing Presf L0 Splitter') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.presf_L0_splitter' process_definition.executable['class'] = 'PresfL0Splitter' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'pres_stream_def', parameter_dictionary_id=pdict_id) pres_stream_id, _ = self.pubsub.create_stream( 'test_pressure', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.absolute_pressure = pres_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) # #--------------------------------------------------------------------------------------------- # # Create subscribers that will receive the pressure granules from # # the ctd transform # #--------------------------------------------------------------------------------------------- # # ar_pres = gevent.event.AsyncResult() # def subscriber3(m,r,s): # ar_pres.set(m) # sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3) # self.addCleanup(sub_pres.stop) # # sub_pres_id = self.pubsub.create_subscription('subscription_pres', # stream_ids=[pres_stream_id], # exchange_name='sub_pres') # # self.pubsub.activate_subscription(sub_pres_id) # # self.queue_cleanup.append(sub_pres.xn.queue) # # sub_pres.start() # # #------------------------------------------------------------------------------------------------------ # # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform # #------------------------------------------------------------------------------------------------------ # # # Do all the routing stuff for the publishing # routing_key = 'stream_id.stream' # stream_route = StreamRoute(self.exchange_point, routing_key) # # xn = self.container.ex_manager.create_xn_queue(self.exchange_name) # xp = self.container.ex_manager.create_xp(self.exchange_point) # xn.bind('stream_id.stream', xp) # # pub = StandaloneStreamPublisher('stream_id', stream_route) # # # Build a packet that can be published # publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # # # Publish the packet # pub.publish(publish_granule) # # #------------------------------------------------------------------------------------------------------ # # Make assertions about whether the ctd transform executed its algorithm and published the correct # # granules # #------------------------------------------------------------------------------------------------------ # # # Get the granule that is published by the ctd transform post processing # result = ar_pres.get(timeout=10) # self.assertTrue(isinstance(result, Granule)) # # rdt = RecordDictionaryTool.load_from_granule(result) # self.assertTrue(rdt.__contains__('pressure')) # # self.check_pres_algorithm_execution(publish_granule, result) # def test_presf_L1(self): ''' Test that packets are processed by the ctd_L1_pressure transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='PresfL1Transform', description='For testing PresfL1Transform') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.presf_L1' process_definition.executable['class'] = 'PresfL1Transform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'pres_stream_def', parameter_dictionary_id=pdict_id) pres_stream_id, _ = self.pubsub.create_stream( 'test_pressure', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.seafloor_pressure = pres_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config)
class TestInstrumentAgent(IonIntegrationTestCase): """ Test cases for instrument agent class. Functions in this class provide instrument agent integration tests and provide a tutorial on use of the agent setup and interface. """ def customCleanUp(self): log.info( 'CUSTOM CLEAN UP ******************************************************************************' ) def setUp(self): """ Setup the test environment to exersice use of instrumet agent, including: * define driver_config parameters. * create container with required services and container client. * create publication stream ids for each driver data stream. * create stream_config parameters. * create and activate subscriptions for agent data streams. * spawn instrument agent process and create agent client. * add cleanup functions to cause subscribers to get stopped. """ # params = { ('CTD', 'TA2'): -1.9434316e-05, # ('CTD', 'PTCA1'): 1.3206866, # ('CTD', 'TCALDATE'): [8, 11, 2006] } # for tup in params: # print tup self.addCleanup(self.customCleanUp) # Names of agent data streams to be configured. parsed_stream_name = 'ctd_parsed' raw_stream_name = 'ctd_raw' # Driver configuration. #Simulator self.driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method': 'ethernet', 'device_addr': CFG.device.sbe37.host, 'device_port': CFG.device.sbe37.port, 'server_addr': 'localhost', 'server_port': 8888 } } } #Hardware ''' self.driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method':'ethernet', 'device_addr': '137.110.112.119', 'device_port': 4001, 'server_addr': 'localhost', 'server_port': 8888 } } } ''' # Start container. self._start_container() # Establish endpoint with container (used in tests below) self._container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) # Bring up services in a deploy file (no need to message) self.container.start_rel_from_url('res/deploy/r2dm.yml') # Create a pubsub client to create streams. self._pubsub_client = PubsubManagementServiceClient( node=self.container.node) # A callback for processing subscribed-to data. def consume(message, headers): log.info('Subscriber received message: %s', str(message)) # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar( process=self.container, node=self.container.node) self.subs = [] # Create streams for each stream named in driver. self.stream_config = {} for (stream_name, val) in PACKET_CONFIG.iteritems(): stream_def = ctd_stream_definition(stream_id=None) stream_def_id = self._pubsub_client.create_stream_definition( container=stream_def) stream_id = self._pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2') self.stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber( exchange_name=exchange_name, callback=consume) sub.start() query = StreamQuery(stream_ids=[stream_id]) sub_id = self._pubsub_client.create_subscription(\ query=query, exchange_name=exchange_name) self._pubsub_client.activate_subscription(sub_id) self.subs.append(sub) # Add cleanup function to stop subscribers. def stop_subscriber(sub_list): for sub in sub_list: sub.stop() self.addCleanup(stop_subscriber, self.subs) # Create agent config. self.agent_resource_id = '123xyz' self.agent_config = { 'driver_config': self.driver_config, 'stream_config': self.stream_config, 'agent': { 'resource_id': self.agent_resource_id } } # Launch an instrument agent process. self._ia_name = 'agent007' self._ia_mod = 'ion.agents.instrument.instrument_agent' self._ia_class = 'InstrumentAgent' self._ia_pid = self._container_client.spawn_process( name=self._ia_name, module=self._ia_mod, cls=self._ia_class, config=self.agent_config) log.info('got pid=%s', str(self._ia_pid)) self._ia_client = None # Start a resource agent client to talk with the instrument agent. self._ia_client = ResourceAgentClient(self.agent_resource_id, process=FakeProcess()) log.info('got ia client %s', str(self._ia_client)) def test_initialize(self): """ Test agent initialize command. This causes creation of driver process and transition to inactive. """ cmd = AgentCommand(command='initialize') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) log.info(reply) time.sleep(2) caps = gw_agent_get_capabilities(self.agent_resource_id) log.info('Capabilities: %s', str(caps)) time.sleep(2) cmd = AgentCommand(command='reset') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) log.info(reply) def test_direct_access(self): """ Test agent direct_access command. This causes creation of driver process and transition to direct access. """ print("test initing") cmd = AgentCommand(command='initialize') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test go_active") cmd = AgentCommand(command='go_active') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test run") cmd = AgentCommand(command='run') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test go_da") cmd = AgentCommand(command='go_direct_access') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) print("reply=" + str(reply)) time.sleep(2) print("test go_ob") cmd = AgentCommand(command='go_observatory') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test go_inactive") cmd = AgentCommand(command='go_inactive') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) print("test reset") cmd = AgentCommand(command='reset') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) def test_go_active(self): """ Test agent go_active command. This causes a driver process to launch a connection broker, connect to device hardware, determine entry state of driver and initialize driver parameters. """ cmd = AgentCommand(command='initialize') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) cmd = AgentCommand(command='go_active') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) cmd = AgentCommand(command='go_inactive') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2) cmd = AgentCommand(command='reset') reply = gw_agent_execute_agent(self.agent_resource_id, cmd) time.sleep(2)
class EventManagementIntTest(IonIntegrationTestCase): def setUp(self): super(EventManagementIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.event_management = EventManagementServiceClient() self.rrc = ResourceRegistryServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub = PubsubManagementServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() self.queue_cleanup = [] self.exchange_cleanup = [] def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() def test_create_read_update_delete_event_type(self): """ Test that the CRUD method for event types work correctly """ event_type = EventType(name="an event type") event_type.origin = 'instrument_1' # create event_type_id = self.event_management.create_event_type(event_type) self.assertIsNotNone(event_type_id) # read read_event_type = self.event_management.read_event_type(event_type_id) self.assertEquals(read_event_type.name, event_type.name) self.assertEquals(read_event_type.origin, event_type.origin) #update read_event_type.origin = 'instrument_2' read_event_type.producer = 'producer' self.event_management.update_event_type(read_event_type) updated_event_type = self.event_management.read_event_type(event_type_id) self.assertEquals(updated_event_type.origin, 'instrument_2') self.assertEquals(updated_event_type.producer, 'producer') # delete self.event_management.delete_event_type(event_type_id) with self.assertRaises(NotFound): self.event_management.read_event_type(event_type_id) def test_create_read_update_delete_event_process_definition(self): """ Test that the CRUD methods for the event process definitions work correctly """ # Create module = 'ion.processes.data.transforms.event_alert_transform' class_name = 'EventAlertTransform' procdef_id = self.event_management.create_event_process_definition(version='ver_1', module=module, class_name=class_name, uri='http://hare.com', arguments=['arg1', 'arg2'], event_types=['ExampleDetectableEvent', 'type_2'], sub_types=['sub_type_1']) # Read read_process_def = self.event_management.read_event_process_definition(procdef_id) self.assertEquals(read_process_def.executable['module'], module) self.assertEquals(read_process_def.executable['class'], class_name) # Update self.event_management.update_event_process_definition(event_process_definition_id=procdef_id, class_name='StreamAlertTransform', arguments=['arg3', 'arg4'], event_types=['event_type_new']) updated_event_process_def = self.event_management.read_event_process_definition(procdef_id) self.assertEquals(updated_event_process_def.executable['class'], 'StreamAlertTransform') self.assertEquals(updated_event_process_def.arguments, ['arg3', 'arg4']) definition = updated_event_process_def.definition self.assertEquals(updated_event_process_def.definition.event_types, ['event_type_new']) # Delete self.event_management.delete_event_process_definition(procdef_id) with self.assertRaises(NotFound): self.event_management.read_event_process_definition(procdef_id) def test_event_in_stream_out_transform(self): """ Test the event-in/stream-out transform """ stream_id, _ = self.pubsub.create_stream('test_stream', exchange_point='science_data') self.exchange_cleanup.append('science_data') #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='EventToStreamTransform', description='For testing an event-in/stream-out transform') process_definition.executable['module']= 'ion.processes.data.transforms.event_in_stream_out_transform' process_definition.executable['class'] = 'EventToStreamTransform' proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = 'test_queue' config.process.exchange_point = 'science_data' config.process.publish_streams.output = stream_id config.process.event_type = 'ExampleDetectableEvent' config.process.variables = ['voltage', 'temperature' ] # Schedule the process pid = self.process_dispatcher.schedule_process(process_definition_id=proc_def_id, configuration=config) self.addCleanup(self.process_dispatcher.cancel_process,pid) #--------------------------------------------------------------------------------------------- # Create a subscriber for testing #--------------------------------------------------------------------------------------------- ar_cond = gevent.event.AsyncResult() def subscriber_callback(m, r, s): ar_cond.set(m) sub = StandaloneStreamSubscriber('sub', subscriber_callback) self.addCleanup(sub.stop) sub_id = self.pubsub.create_subscription('subscription_cond', stream_ids=[stream_id], exchange_name='sub') self.pubsub.activate_subscription(sub_id) self.queue_cleanup.append(sub.xn.queue) sub.start() gevent.sleep(4) #--------------------------------------------------------------------------------------------- # Publish an event. The transform has been configured to receive this event #--------------------------------------------------------------------------------------------- event_publisher = EventPublisher("ExampleDetectableEvent") event_publisher.publish_event(origin = 'fake_origin', voltage = '5', temperature = '273') # Assert that the transform processed the event and published data on the output stream result_cond = ar_cond.get(timeout=10) self.assertTrue(result_cond) def test_create_read_delete_event_process(self): """ Test that the CRUD methods for the event processes work correctly """ #--------------------------------------------------------------------------------------------- # Create a process definition #--------------------------------------------------------------------------------------------- # Create module = 'ion.processes.data.transforms.event_alert_transform' class_name = 'EventAlertTransform' procdef_id = self.event_management.create_event_process_definition(version='ver_1', module=module, class_name=class_name, uri='http://hare.com', arguments=['arg1', 'arg2'], event_types=['ExampleDetectableEvent', 'type_2'], sub_types=['sub_type_1']) # Read read_process_def = self.event_management.read_event_process_definition(procdef_id) self.assertEquals(read_process_def.arguments, ['arg1', 'arg2']) #--------------------------------------------------------------------------------------------- # Use the process definition to create a process #--------------------------------------------------------------------------------------------- # Create a stream param_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_def_id = self.pubsub.create_stream_definition('cond_stream_def', parameter_dictionary_id=param_dict_id) tdom, sdom = time_series_domain() tdom, sdom = tdom.dump(), sdom.dump() dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp', temporal_domain = tdom, spatial_domain = sdom) # Create a data product data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id) output_products = {} output_products['conductivity'] = data_product_id # Create an event process event_process_id = self.event_management.create_event_process( process_definition_id=procdef_id, event_types=['ExampleDetectableEvent','DetectionEvent'], sub_types=['s1', 's2'], origins=['or_1', 'or_2'], origin_types=['or_t1', 'or_t2'], out_data_products = output_products) self.addCleanup(self.process_dispatcher.cancel_process, event_process_id) #--------------------------------------------------------------------------------------------- # Read the event process object and make assertions #--------------------------------------------------------------------------------------------- out_data_products={'conductivity': data_product_id}) event_process_obj = self.event_management.read_event_process(event_process_id=event_process_id) # Get the stream associated with the data product for the sake of making assertions stream_ids, _ = self.rrc.find_objects(data_product_id, PRED.hasStream, id_only=True) stream_id = stream_ids[0] # Assertions! self.assertEquals(event_process_obj.detail.output_streams['conductivity'], stream_id) self.assertEquals(event_process_obj.detail.event_types, ['ExampleDetectableEvent', 'DetectionEvent']) self.assertEquals(event_process_obj.detail.sub_types, ['s1', 's2']) self.assertEquals(event_process_obj.detail.origins, ['or_1', 'or_2']) self.assertEquals(event_process_obj.detail.origin_types, ['or_t1', 'or_t2'])
class CtdbpTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdbpTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.dataproduct_management = DataProductManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() # This is for the time values inside the packets going into the transform self.i = 0 # Cleanup of queue created by the subscriber def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i + length) for field in rdt: if isinstance( rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array( [random.uniform(0.0, 75.0) for i in xrange(length)]) g = rdt.to_granule() self.i += length return g def _create_input_param_dict_for_test(self, parameter_dict_name=''): pdict = ParameterDictionary() t_ctxt = ParameterContext( 'time', param_type=QuantityType(value_encoding=numpy.dtype('float64'))) t_ctxt.axis = AxisTypeEnum.TIME t_ctxt.uom = 'seconds since 01-01-1900' pdict.add_context(t_ctxt) cond_ctxt = ParameterContext( 'conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) cond_ctxt.uom = '' pdict.add_context(cond_ctxt) pres_ctxt = ParameterContext( 'pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) pres_ctxt.uom = '' pdict.add_context(pres_ctxt) temp_ctxt = ParameterContext( 'temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) temp_ctxt.uom = '' pdict.add_context(temp_ctxt) dens_ctxt = ParameterContext( 'density', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) dens_ctxt.uom = '' pdict.add_context(dens_ctxt) sal_ctxt = ParameterContext( 'salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32'))) sal_ctxt.uom = '' pdict.add_context(sal_ctxt) #create temp streamdef so the data product can create the stream pc_list = [] for pc_k, pc in pdict.iteritems(): ctxt_id = self.dataset_management.create_parameter_context( pc_k, pc[1].dump()) pc_list.append(ctxt_id) self.addCleanup(self.dataset_management.delete_parameter_context, ctxt_id) pdict_id = self.dataset_management.create_parameter_dictionary( parameter_dict_name, pc_list) self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id) return pdict_id def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject( RT.DataProcessDefinition, name='CTDBP_L0_all', description= 'Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition( dpd_obj) self.addCleanup( self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test( parameter_dict_name='fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition( name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject( RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) input_dp_id = self.dataproduct_management.create_data_product( data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject( RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product( data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process out_stream_ids, _ = self.resource_registry.find_objects( L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( data_process_definition_id=dprocdef_id, in_data_product_ids=[input_dp_id], out_data_product_ids=[L0_stream_dp_id], configuration=None) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects( input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m, r, s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet( stream_definition_id=input_stream_def_dict, length=5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform) def _check_granule_from_transform(self, granule): """ An internal method to check if a granule has the right properties """ pass
class TestTransformPrime(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?! self.dataset_management = DatasetManagementServiceClient() self.data_process_management = DataProcessManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.data_product_management = DataProductManagementServiceClient() def setup_streams(self): in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L0_test', id_only=True) out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L1_test', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition('L0 SBE37', parameter_dictionary_id=in_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition('L1 SBE37', parameter_dictionary_id=out_pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream('L0 input', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream('L0 output', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_advanced_streams(self): in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_LC_TEST', id_only=True) in_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_instrument', parameter_dictionary_id=in_pdict_id, available_fields=['time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon']) self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id) out_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_l2', parameter_dictionary_id=out_pdict_id, available_fields=['time', 'rho','PRACSAL_L2']) self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id) in_stream_id, in_route = self.pubsub_management.create_stream('instrument stream', stream_definition_id=in_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, in_stream_id) out_stream_id, out_route = self.pubsub_management.create_stream('data product stream', stream_definition_id=out_stream_def_id, exchange_point='test') self.addCleanup(self.pubsub_management.delete_stream, out_stream_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def preload(self): config = DotDict() config.op = 'load' config.scenario = 'BASE,LC_TEST' config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary' config.path = 'res/preload/r2_ioc' self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config) def setup_advanced_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_advanced_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)]= None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id:out_stream_id} sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_transform(self): self.preload() queue_name = 'transform_prime' stream_info = self.setup_streams() in_stream_id, in_stream_def_id = stream_info[0] out_stream_id, out_stream_def_id = stream_info[1] routes = {} routes[(in_stream_id, out_stream_id)]= None config = DotDict() config.process.queue_name = queue_name config.process.routes = routes config.process.publish_streams = {out_stream_id:out_stream_id} sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config) listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id) self.pubsub_management.activate_subscription(listen_sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id) return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)] def setup_validator(self, validator): listener = StandaloneStreamSubscriber('listener', validator) listener.start() self.addCleanup(listener.stop) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
class TestDMEnd2End(IonIntegrationTestCase): def setUp(self): # Love the non pep-8 convention self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.process_dispatcher = ProcessDispatcherServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.resource_registry = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.ingestion_management = IngestionManagementServiceClient() self.data_retriever = DataRetrieverServiceClient() self.event = Event() self.exchange_space_name = 'test_granules' self.exchange_point_name = 'science_data' self.i = 0 self.cci = 0 #-------------------------------------------------------------------------------- # Helper/Utility methods #-------------------------------------------------------------------------------- def create_dataset(self, parameter_dict_id=''): ''' Creates a time-series dataset ''' if not parameter_dict_id: parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) dataset = Dataset('test_dataset_%i'%self.i) dataset_id = self.dataset_management.create_dataset(dataset, parameter_dictionary_id=parameter_dict_id) self.addCleanup(self.dataset_management.delete_dataset, dataset_id) return dataset_id def get_datastore(self, dataset_id): ''' Gets an instance of the datastore This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes delete a CouchDB datastore and the other containers are unaware of the new state of the datastore. ''' dataset = self.dataset_management.read_dataset(dataset_id) datastore_name = dataset.datastore_name datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA) return datastore def get_ingestion_config(self): ''' Grab the ingestion configuration from the resource registry ''' # The ingestion configuration should have been created by the bootstrap service # which is configured through r2deploy.yml ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True) return ingest_configs[0] def launch_producer(self, stream_id=''): ''' Launch the producer ''' pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}}) self.addCleanup(self.container.terminate_process, pid) def make_simple_dataset(self): ''' Makes a stream, a stream definition and a dataset, the essentials for most of these tests ''' pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data %i' % self.i, parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) # self.get_datastore(dataset_id) self.i += 1 return stream_id, route, stream_def_id, dataset_id def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule()) def publish_fake_data(self,stream_id, route): ''' Make four granules ''' for i in xrange(4): self.publish_hifi(stream_id,route,i) def start_ingestion(self, stream_id, dataset_id): ''' Starts ingestion/persistence for a given dataset ''' ingest_config_id = self.get_ingestion_config() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) def stop_ingestion(self, stream_id): ingest_config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id) def validate_granule_subscription(self, msg, route, stream_id): ''' Validation for granule format ''' if msg == {}: return rdt = RecordDictionaryTool.load_from_granule(msg) log.info('%s', rdt.pretty_print()) self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg)) self.event.set() def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: extents = self.dataset_management.dataset_extents(dataset_id, 'time') granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size: done = True else: gevent.sleep(0.2) #-------------------------------------------------------------------------------- # Test Methods #-------------------------------------------------------------------------------- def test_dm_end_2_end(self): #-------------------------------------------------------------------------------- # Set up a stream and have a mock instrument (producer) send data #-------------------------------------------------------------------------------- self.event.clear() # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) #-------------------------------------------------------------------------------- # Start persisting the data on the stream # - Get the ingestion configuration from the resource registry # - Create the dataset # - call persist_data_stream to setup the subscription for the ingestion workers # on the stream that you specify which causes the data to be persisted #-------------------------------------------------------------------------------- ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) #-------------------------------------------------------------------------------- # Now the granules are ingesting and persisted #-------------------------------------------------------------------------------- self.launch_producer(stream_id) self.wait_until_we_have_enough_granules(dataset_id,40) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_id) self.assertIsInstance(replay_data, Granule) rdt = RecordDictionaryTool.load_from_granule(replay_data) self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:]) self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all()) #-------------------------------------------------------------------------------- # Now to try the streamed approach #-------------------------------------------------------------------------------- replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition) self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id) log.info('Process ID: %s', process_id) replay_client = ReplayClient(process_id) #-------------------------------------------------------------------------------- # Create the listening endpoint for the the retriever to talk to #-------------------------------------------------------------------------------- sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription) subscriber.start() self.addCleanup(subscriber.stop) self.data_retriever.start_replay_agent(self.replay_id) self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched') replay_client.start_replay() self.assertTrue(self.event.wait(10)) self.data_retriever.cancel_replay_agent(self.replay_id) #-------------------------------------------------------------------------------- # Test the slicing capabilities #-------------------------------------------------------------------------------- granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)}) rdt = RecordDictionaryTool.load_from_granule(granule) b = rdt['time'] == np.arange(5) self.assertTrue(b.all() if not isinstance(b,bool) else b) def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) def test_ingestion_pause(self): ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() ingestion_config_id = self.get_ingestion_config() self.start_ingestion(ctd_stream_id, dataset_id) self.addCleanup(self.stop_ingestion, ctd_stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) publisher = StandaloneStreamPublisher(ctd_stream_id, route) monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id) monitor.event.clear() rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.assertFalse(monitor.event.wait(1)) self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20)) def test_last_granule(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) self.publish_hifi(stream_id,route, 0) self.publish_hifi(stream_id,route, 1) self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two success = False def verifier(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(10) + 10 if not isinstance(comp,bool): return comp.all() return False success = poll(verifier) self.assertTrue(success) success = False def verify_points(): replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(15,20) if not isinstance(comp,bool): return comp.all() return False success = poll(verify_points) self.assertTrue(success) def test_replay_with_parameters(self): #-------------------------------------------------------------------------------- # Create the configurations and the dataset #-------------------------------------------------------------------------------- # Get a precompiled parameter dictionary with basic ctd fields pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True) # Add a field that supports binary data input. bin_context = ParameterContext('binary', param_type=ArrayType()) context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump())) # Add another field that supports dictionary elements. rec_context = ParameterContext('records', param_type=RecordType()) context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump())) pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time') stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id) stream_id, route = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.publish_fake_data(stream_id, route) self.assertTrue(dataset_monitor.wait()) query = { 'start_time': 0 - 2208988800, 'end_time': 19 - 2208988800, 'stride_time' : 2, 'parameters': ['time','temp'] } retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query) rdt = RecordDictionaryTool.load_from_granule(retrieved_data) np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2)) self.assertEquals(set(rdt.iterkeys()), set(['time','temp'])) extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp']) self.assertTrue(extents['time']>=20) self.assertTrue(extents['temp']>=20) def test_repersist_data(self): stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) self.publish_hifi(stream_id,route,0) self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id,20) config_id = self.get_ingestion_config() self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id) self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id) self.addCleanup(self.stop_ingestion, stream_id) self.publish_hifi(stream_id,route,2) self.publish_hifi(stream_id,route,3) self.wait_until_we_have_enough_granules(dataset_id,40) success = False with gevent.timeout.Timeout(5): while not success: replay_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(replay_granule) comp = rdt['time'] == np.arange(0,40) if not isinstance(comp,bool): success = comp.all() gevent.sleep(1) self.assertTrue(success) @unittest.skip('deprecated') def test_correct_time(self): # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. # the conversion factor between unix and NTP time unix_now = np.floor(time.time()) ntp_now = unix_now + 2208988800 unix_ago = unix_now - 20 ntp_ago = unix_ago + 2208988800 stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a') coverage.insert_timesteps(20) coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now)) temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id) self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2) self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.') def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all()) def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.wait()) def test_sparse_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_sparse() stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) self.start_ingestion(stream_id,dataset_id) self.addCleanup(self.stop_ingestion, stream_id) # Publish initial granule # the first one has the sparse value set inside it, sets lat to 45 and lon to -71 ntp_now = time.time() + 2208988800 rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [''] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] publisher = StandaloneStreamPublisher(stream_id, route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) # Check the values and make sure they're correct np.testing.assert_allclose(rdt_out['time'], rdt['time']) np.testing.assert_allclose(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['lat'], np.array([45])) np.testing.assert_allclose(rdt_out['lon'], np.array([-71])) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) # We're going to change the lat/lon rdt = ph.get_rdt(stream_def_id) rdt['time'] = time.time() + 2208988800 rdt['lat'] = [46] rdt['lon'] = [-73] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['time'], rdt['time']) for i in xrange(9): ntp_now = time.time() + 2208988800 rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['pressure'] = [256.8] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10)) np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9)) np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
class TransformPrototypeIntTest(IonIntegrationTestCase): def setUp(self): super(TransformPrototypeIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.rrc = ResourceRegistryServiceClient() self.dataset_management = DatasetManagementServiceClient() self.pubsub_management = PubsubManagementServiceClient() self.ssclient = SchedulerServiceClient() self.event_publisher = EventPublisher() self.user_notification = UserNotificationServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.exchange_names = [] self.exchange_points = [] def tearDown(self): for xn in self.exchange_names: xni = self.container.ex_manager.create_xn_queue(xn) xni.delete() for xp in self.exchange_points: xpi = self.container.ex_manager.create_xp(xp) xpi.delete() def now_utc(self): return time.time() def _create_interval_timer_with_end_time(self,timer_interval= None, end_time = None ): ''' A convenience method to set up an interval timer with an end time ''' self.timer_received_time = 0 self.timer_interval = timer_interval start_time = self.now_utc() if not end_time: end_time = start_time + 2 * timer_interval + 1 log.debug("got the end time here!! %s" % end_time) # Set up the interval timer. The scheduler will publish event with origin set as "Interval Timer" sid = self.ssclient.create_interval_timer(start_time="now" , interval=self.timer_interval, end_time=end_time, event_origin="Interval Timer", event_subtype="") def cleanup_timer(scheduler, schedule_id): """ Do a friendly cancel of the scheduled event. If it fails, it's ok. """ try: scheduler.cancel_timer(schedule_id) except: log.warn("Couldn't cancel") self.addCleanup(cleanup_timer, self.ssclient, sid) return sid @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_processing(self): ''' Test that events are processed by the transforms according to a provided algorithm ''' #------------------------------------------------------------------------------------- # Set up the scheduler for an interval timer with an end time #------------------------------------------------------------------------------------- id = self._create_interval_timer_with_end_time(timer_interval=2) self.assertIsNotNone(id) #------------------------------------------------------------------------------------- # Create an event alert transform.... # The configuration for the Event Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- configuration = { 'process':{ 'event_type': 'ResourceEvent', 'timer_origin': 'Interval Timer', 'instrument_origin': 'My_favorite_instrument' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'event_alert_transform', module='ion.processes.data.transforms.event_alert_transform', class_name='EventAlertTransform', configuration= configuration) self.addCleanup(self.process_dispatcher.cancel_process, pid) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish events and make assertions about alerts #------------------------------------------------------------------------------------- queue = gevent.queue.Queue() def event_received(message, headers): queue.put(message) event_subscriber = EventSubscriber( origin="EventAlertTransform", event_type="DeviceEvent", callback=event_received) event_subscriber.start() self.addCleanup(event_subscriber.stop) # publish event twice for i in xrange(5): self.event_publisher.publish_event( event_type = 'ExampleDetectableEvent', origin = "My_favorite_instrument", voltage = 5, telemetry = 10, temperature = 20) gevent.sleep(0.1) self.assertTrue(queue.empty()) #publish event the third time but after a time interval larger than 2 seconds gevent.sleep(5) #------------------------------------------------------------------------------------- # Make assertions about the alert event published by the EventAlertTransform #------------------------------------------------------------------------------------- event = queue.get(timeout=10) log.debug("Alarm event received from the EventAertTransform %s" % event) self.assertEquals(event.type_, "DeviceEvent") self.assertEquals(event.origin, "EventAlertTransform") #------------------------------------------------------------------------------------------------ # Now clear the event queue being populated by alarm events and publish normally once again #------------------------------------------------------------------------------------------------ queue.queue.clear() for i in xrange(5): self.event_publisher.publish_event( event_type = 'ExampleDetectableEvent', origin = "My_favorite_instrument", voltage = 5, telemetry = 10, temperature = 20) gevent.sleep(0.1) self.assertTrue(queue.empty()) log.debug("This completes the requirement that the EventAlertTransform publishes \ an alarm event when it does not hear from the instrument for some time.") @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_stream_processing(self): #-------------------------------------------------------------------------------- #Test that streams are processed by the transforms according to a provided algorithm #-------------------------------------------------------------------------------- #todo: In this simple implementation, we are checking if the stream has the word, PUBLISH, #todo(contd) and if the word VALUE=<number> exists and that number is less than something #todo later on we are going to use complex algorithms to make this prototype powerful #------------------------------------------------------------------------------------- # Start a subscriber to listen for an alert event from the Stream Alert Transform #------------------------------------------------------------------------------------- queue = gevent.queue.Queue() def event_received(message, headers): queue.put(message) event_subscriber = EventSubscriber( origin="StreamAlertTransform", event_type="DeviceEvent", callback=event_received) event_subscriber.start() self.addCleanup(event_subscriber.stop) #------------------------------------------------------------------------------------- # The configuration for the Stream Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- config = { 'process':{ 'queue_name': 'a_queue', 'value': 10, 'event_type':'DeviceEvent' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'transform_data_process', module='ion.processes.data.transforms.event_alert_transform', class_name='StreamAlertTransform', configuration= config) self.addCleanup(self.process_dispatcher.cancel_process, pid) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish streams and make assertions about alerts #------------------------------------------------------------------------------------- exchange_name = 'a_queue' exchange_point = 'test_exchange' routing_key = 'stream_id.stream' stream_route = StreamRoute(exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(exchange_name) xp = self.container.ex_manager.create_xp(exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) message = "A dummy example message containing the word PUBLISH, and with VALUE = 5 . This message" +\ " will trigger an alert event from the StreamAlertTransform because the value provided is "\ "less than 10 that was passed in through the config." pub.publish(message) event = queue.get(timeout=10) self.assertEquals(event.type_, "DeviceEvent") self.assertEquals(event.origin, "StreamAlertTransform") # self.purge_queues(exchange_name) # def purge_queues(self, exchange_name): # xn = self.container.ex_manager.create_xn_queue(exchange_name) # xn.purge() @staticmethod def create_process(name= '', module = '', class_name = '', configuration = None): ''' A helper method to create a process ''' producer_definition = ProcessDefinition(name=name) producer_definition.executable = { 'module':module, 'class': class_name } process_dispatcher = ProcessDispatcherServiceClient() procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) pid = process_dispatcher.schedule_process(process_definition_id= procdef_id, configuration=configuration) return pid @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_demo_stream_granules_processing(self): """ Test that the Demo Stream Alert Transform is functioning. The transform coordinates with the scheduler. It is configured to listen to a source that publishes granules. It publishes a DeviceStatusEvent if it receives a granule with bad data or a DeviceCommsEvent if no granule has arrived between two timer events. The transform is configured at launch using a config dictionary. """ #------------------------------------------------------------------------------------- # Start a subscriber to listen for an alert event from the Stream Alert Transform #------------------------------------------------------------------------------------- queue_bad_data = gevent.queue.Queue() queue_no_data = gevent.queue.Queue() def bad_data(message, headers): log.debug("Got a BAD data event: %s" % message) if message.type_ == "DeviceStatusEvent": queue_bad_data.put(message) def no_data(message, headers): log.debug("Got a NO data event: %s" % message) queue_no_data.put(message) event_subscriber_bad_data = EventSubscriber( origin="instrument_1", event_type="DeviceStatusEvent", callback=bad_data) event_subscriber_no_data = EventSubscriber( origin="instrument_1", event_type="DeviceCommsEvent", callback=no_data) event_subscriber_bad_data.start() event_subscriber_no_data.start() self.addCleanup(event_subscriber_bad_data.stop) self.addCleanup(event_subscriber_no_data.stop) #------------------------------------------------------------------------------------- # The configuration for the Stream Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- self.valid_values = [-100, 100] self.timer_interval = 5 self.queue_name = 'a_queue' config = { 'process':{ 'timer_interval': self.timer_interval, 'queue_name': self.queue_name, 'variable_name': 'input_voltage', 'time_field_name': 'preferred_timestamp', 'valid_values': self.valid_values, 'timer_origin': 'Interval Timer', 'event_origin': 'instrument_1' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'DemoStreamAlertTransform', module='ion.processes.data.transforms.event_alert_transform', class_name='DemoStreamAlertTransform', configuration= config) self.addCleanup(self.process_dispatcher.cancel_process, pid) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish streams and make assertions about alerts #------------------------------------------------------------------------------------- pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name= 'platform_eng_parsed', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('demo_stream', parameter_dictionary_id=pdict_id) stream_id, stream_route = self.pubsub_management.create_stream( name='test_demo_alert', exchange_point='exch_point_1', stream_definition_id=stream_def_id) sub_1 = self.pubsub_management.create_subscription(name='sub_1', stream_ids=[stream_id], exchange_points=['exch_point_1'], exchange_name=self.queue_name) self.pubsub_management.activate_subscription(sub_1) self.exchange_names.append('sub_1') self.exchange_points.append('exch_point_1') #------------------------------------------------------------------------------------- # publish a *GOOD* granule #------------------------------------------------------------------------------------- self.length = 2 val = numpy.array([random.uniform(0,50) for l in xrange(self.length)]) self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val) self.assertTrue(queue_bad_data.empty()) #------------------------------------------------------------------------------------- # publish a few *BAD* granules #------------------------------------------------------------------------------------- self.number = 2 val = numpy.array([(110 + l) for l in xrange(self.length)]) self._publish_granules(stream_id= stream_id, stream_route= stream_route, number= self.number, values=val) for number in xrange(self.number): event = queue_bad_data.get(timeout=40) self.assertEquals(event.type_, "DeviceStatusEvent") self.assertEquals(event.origin, "instrument_1") self.assertEquals(event.state, DeviceStatusType.STATUS_WARNING) self.assertEquals(event.valid_values, self.valid_values) self.assertEquals(event.sub_type, 'input_voltage') self.assertTrue(set(event.values) == set(val)) s = set(event.time_stamps) cond = s in [set(numpy.array([1 for l in xrange(self.length)]).tolist()), set(numpy.array([2 for l in xrange(self.length)]).tolist())] self.assertTrue(cond) # To ensure that only the bad values generated the alert events. Queue should be empty now self.assertEquals(queue_bad_data.qsize(), 0) #------------------------------------------------------------------------------------- # Do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status #------------------------------------------------------------------------------------- event = queue_no_data.get(timeout=15) self.assertEquals(event.type_, "DeviceCommsEvent") self.assertEquals(event.origin, "instrument_1") self.assertEquals(event.origin_type, "PlatformDevice") self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION) self.assertEquals(event.sub_type, 'input_voltage') #------------------------------------------------------------------------------------- # Empty the queues and repeat tests #------------------------------------------------------------------------------------- queue_bad_data.queue.clear() queue_no_data.queue.clear() #------------------------------------------------------------------------------------- # publish a *GOOD* granule again #------------------------------------------------------------------------------------- val = numpy.array([(l + 20) for l in xrange(self.length)]) self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val) self.assertTrue(queue_bad_data.empty()) #------------------------------------------------------------------------------------- # Again do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status #------------------------------------------------------------------------------------- event = queue_no_data.get(timeout=20) self.assertEquals(event.type_, "DeviceCommsEvent") self.assertEquals(event.origin, "instrument_1") self.assertEquals(event.origin_type, "PlatformDevice") self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION) self.assertEquals(event.sub_type, 'input_voltage') #------------------------------------------------------------------------------------- # Again do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status #------------------------------------------------------------------------------------- ar = gevent.event.AsyncResult() def poller(ar, method, *args): events_in_db = method(*args) if len(events_in_db) > 0: ar.set(events_in_db) return True else: return False poll(poller, ar, self.user_notification.find_events, 'instrument_1') # events_in_db = self.user_notification.find_events(origin='instrument_1') events_in_db = ar.get(10) log.debug("events::: %s" % events_in_db) bad_data_events = [] no_data_events = [] for event in events_in_db: if event.type_ == 'DeviceStatusEvent': bad_data_events.append(event) self.assertEquals(event.origin, "instrument_1") self.assertEquals(event.status, DeviceStatusType.STATUS_WARNING) self.assertEquals(event.valid_values, self.valid_values) self.assertEquals(event.sub_type, 'input_voltage') elif event.type_ == 'DeviceCommsEvent': no_data_events.append(event) self.assertEquals(event.origin, "instrument_1") self.assertEquals(event.origin_type, "PlatformDevice") self.assertEquals(event.status, DeviceCommsType.DATA_DELIVERY_INTERRUPTION) self.assertEquals(event.sub_type, 'input_voltage') self.assertTrue(len(bad_data_events) > 0) self.assertTrue(len(no_data_events) > 0) log.debug("This satisfies L4-CI-SA-RQ-114 : 'Marine facility shall monitor marine infrastructure usage by instruments.'" " The req is satisfied because the stream alert transform" "is able to send device status and communication events over selected time intervals. This capability will be " "augmented in the future.") def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) times = numpy.array([number for l in xrange(self.length)]) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))] rdt['time'] = times g = rdt.to_granule() g.data_producer_id = 'instrument_1' log.debug("granule #%s published by instrument:: %s" % ( number,g)) pub.publish(g) @staticmethod def makeEpochTime(date_time = None): """ provides the seconds since epoch give a python datetime object. @param date_time Python datetime object @retval seconds_since_epoch int """ seconds_since_epoch = calendar.timegm(date_time.timetuple()) return seconds_since_epoch
def instrument_test_driver(container): org_client = OrgManagementServiceClient(node=container.node) id_client = IdentityManagementServiceClient(node=container.node) system_actor = id_client.find_actor_identity_by_name(name=CFG.system.system_actor) log.info('system actor:' + system_actor._id) sa_header_roles = get_role_message_headers(org_client.find_all_roles_by_user(system_actor._id)) # Names of agent data streams to be configured. parsed_stream_name = 'ctd_parsed' raw_stream_name = 'ctd_raw' # Driver configuration. #Simulator driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method':'ethernet', 'device_addr': CFG.device.sbe37.host, 'device_port': CFG.device.sbe37.port, 'server_addr': 'localhost', 'server_port': 8888 } } } #Hardware _container_client = ContainerAgentClient(node=container.node, name=container.name) # Create a pubsub client to create streams. _pubsub_client = PubsubManagementServiceClient(node=container.node) # A callback for processing subscribed-to data. def consume(message, headers): log.info('Subscriber received message: %s', str(message)) # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=container, node=container.node) subs = [] # Create streams for each stream named in driver. stream_config = {} for (stream_name, val) in PACKET_CONFIG.iteritems(): stream_def = ctd_stream_definition(stream_id=None) stream_def_id = _pubsub_client.create_stream_definition( container=stream_def) stream_id = _pubsub_client.create_stream( name=stream_name, stream_definition_id=stream_def_id, original=True, encoding='ION R2', headers={'ion-actor-id': system_actor._id, 'ion-actor-roles': sa_header_roles }) stream_config[stream_name] = stream_id # Create subscriptions for each stream. exchange_name = '%s_queue' % stream_name sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume) sub.start() query = StreamQuery(stream_ids=[stream_id]) sub_id = _pubsub_client.create_subscription(\ query=query, exchange_name=exchange_name ) _pubsub_client.activate_subscription(sub_id) subs.append(sub) # Create agent config. agent_resource_id = '123xyz' agent_config = { 'driver_config' : driver_config, 'stream_config' : stream_config, 'agent' : {'resource_id': agent_resource_id} } # Launch an instrument agent process. _ia_name = 'agent007' _ia_mod = 'ion.agents.instrument.instrument_agent' _ia_class = 'InstrumentAgent' _ia_pid = _container_client.spawn_process(name=_ia_name, module=_ia_mod, cls=_ia_class, config=agent_config) log.info('got pid=%s for resource_id=%s' % (str(_ia_pid), str(agent_resource_id)))
def test_dm_integration(self): ''' test_salinity_transform Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here to run as a script (don't forget the imports of course!) #----------------------------- # Create some service clients... pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) # declare some handy variables datastore_name = 'test_dm_integration' ### ### In the beginning there were two stream definitions... ### # create a stream definition for the data from the ctd simulator ctd_stream_def = SBE37_CDM_stream_definition() ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data') # create a stream definition for the data from the salinity Transform sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream') ### ### And two process definitions... ### # one for the ctd simulator... producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.ctd_stream_publisher', 'class':'SimpleCtdPublisher' } ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) # one for the salinity transform producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'class':'SalinityTransform' } salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) #--------------------------- # Set up ingestion - this is an operator concern - not done by SA in a deployed system #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=1 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #--------------------------- # Set up the producer (CTD Simulator) #--------------------------- # Create the stream ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id) # Set up the datasets ctd_dataset_id = dataset_management_service.create_dataset( stream_id=ctd_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Configure ingestion of this dataset ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id = ctd_dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service #--------------------------- # Set up the salinity transform #--------------------------- # Create the stream sal_stream_id = pubsub_management_service.create_stream(stream_definition_id=sal_stream_def_id) # Set up the datasets sal_dataset_id = dataset_management_service.create_dataset( stream_id=sal_stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Configure ingestion of the salinity as a dataset sal_dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id = sal_dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id! ) # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service # Create a subscription as input to the transform sal_transform_input_subscription_id = pubsub_management_service.create_subscription( query = StreamQuery(stream_ids=[ctd_stream_id,]), exchange_name='salinity_transform_input') # how do we make these names??? i.e. Should they be anonymous? # create the salinity transform sal_transform_id = transform_management_service.create_transform( name='example salinity transform', in_subscription_id=sal_transform_input_subscription_id, out_streams={'output':sal_stream_id,}, process_definition_id = salinity_transform_procdef_id, # no configuration needed at this time... ) # start the transform - for a test case it makes sense to do it before starting the producer but it is not required transform_management_service.activate_transform(transform_id=sal_transform_id) # Start the ctd simulator to produce some data configuration = { 'process':{ 'stream_id':ctd_stream_id, } } ctd_sim_pid = process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration) ### ### Make a subscriber in the test to listen for salinity data ### salinity_subscription_id = pubsub_management_service.create_subscription( query=StreamQuery([sal_stream_id,]), exchange_name = 'salinity_test', name = "test salinity subscription", ) pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node) result = gevent.event.AsyncResult() results = [] def message_received(message, headers): # Heads log.warn('Salinity data received!') results.append(message) if len(results) >3: result.set(True) subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received) subscriber.start() # after the queue has been created it is safe to activate the subscription pubsub_management_service.activate_subscription(subscription_id=salinity_subscription_id) # Assert that we have received data assertions(result.get(timeout=10)) # stop the flow parse the messages... process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data for message in results: psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message) # Test the handy info method for the names of fields in the stream def assertions('salinity' in psd.list_field_names()) # you have to know the name of the coverage in stream def salinity = psd.get_values('salinity') import numpy assertions(isinstance(salinity, numpy.ndarray)) assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
class CtdTransformsIntTest(IonIntegrationTestCase): def setUp(self): super(CtdTransformsIntTest, self).setUp() self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.queue_cleanup = [] self.exchange_cleanup = [] self.pubsub = PubsubManagementServiceClient() self.process_dispatcher = ProcessDispatcherServiceClient() self.dataset_management = DatasetManagementServiceClient() self.exchange_name = 'ctd_L0_all_queue' self.exchange_point = 'test_exchange' self.i = 0 def tearDown(self): for queue in self.queue_cleanup: xn = self.container.ex_manager.create_xn_queue(queue) xn.delete() for exchange in self.exchange_cleanup: xp = self.container.ex_manager.create_xp(exchange) xp.delete() def test_ctd_L0_all(self): ''' Test that packets are processed by the ctd_L0_all transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='ctd_L0_all', description='For testing ctd_L0_all') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L0_all' process_definition.executable['class'] = 'ctd_L0_all' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('ctd_all_stream_def', parameter_dictionary_id=pdict_id) cond_stream_id, _ = self.pubsub.create_stream('test_cond', exchange_point='science_data', stream_definition_id=stream_def_id) pres_stream_id, _ = self.pubsub.create_stream('test_pres', exchange_point='science_data', stream_definition_id=stream_def_id) temp_stream_id, _ = self.pubsub.create_stream('test_temp', exchange_point='science_data', stream_definition_id=stream_def_id) config.process.publish_streams.conductivity = cond_stream_id config.process.publish_streams.pressure = pres_stream_id config.process.publish_streams.temperature = temp_stream_id # Schedule the process pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscribers that will receive the conductivity, temperature and pressure granules from # the ctd transform #--------------------------------------------------------------------------------------------- ar_cond = gevent.event.AsyncResult() def subscriber1(m, r, s): ar_cond.set(m) sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1) self.addCleanup(sub_cond.stop) ar_temp = gevent.event.AsyncResult() def subscriber2(m,r,s): ar_temp.set(m) sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2) self.addCleanup(sub_temp.stop) ar_pres = gevent.event.AsyncResult() def subscriber3(m,r,s): ar_pres.set(m) sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3) self.addCleanup(sub_pres.stop) sub_cond_id= self.pubsub.create_subscription('subscription_cond', stream_ids=[cond_stream_id], exchange_name='sub_cond') sub_temp_id = self.pubsub.create_subscription('subscription_temp', stream_ids=[temp_stream_id], exchange_name='sub_temp') sub_pres_id = self.pubsub.create_subscription('subscription_pres', stream_ids=[pres_stream_id], exchange_name='sub_pres') self.pubsub.activate_subscription(sub_cond_id) self.pubsub.activate_subscription(sub_temp_id) self.pubsub.activate_subscription(sub_pres_id) self.queue_cleanup.append(sub_cond.xn.queue) self.queue_cleanup.append(sub_temp.xn.queue) self.queue_cleanup.append(sub_pres.xn.queue) sub_cond.start() sub_temp.start() sub_pres.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result_cond = ar_cond.get(timeout=10) result_temp = ar_temp.get(timeout=10) result_pres = ar_pres.get(timeout=10) out_dict = {} out_dict['c'] = RecordDictionaryTool.load_from_granule(result_cond)['conductivity'] out_dict['t'] = RecordDictionaryTool.load_from_granule(result_temp)['temp'] out_dict['p'] = RecordDictionaryTool.load_from_granule(result_pres)['pressure'] # Check that the transform algorithm was successfully executed self.check_granule_splitting(publish_granule, out_dict) def test_ctd_L1_conductivity(self): ''' Test that packets are processed by the ctd_L1_conductivity transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='CTDL1ConductivityTransform', description='For testing CTDL1ConductivityTransform') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L1_conductivity' process_definition.executable['class'] = 'CTDL1ConductivityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_def_id = self.pubsub.create_stream_definition('cond_stream_def', parameter_dictionary_id=pdict_id) cond_stream_id, _ = self.pubsub.create_stream('test_conductivity', exchange_point='science_data', stream_definition_id=stream_def_id) config.process.publish_streams.conductivity = cond_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscribers that will receive the conductivity, temperature and pressure granules from # the ctd transform #--------------------------------------------------------------------------------------------- ar_cond = gevent.event.AsyncResult() def subscriber1(m, r, s): ar_cond.set(m) sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1) self.addCleanup(sub_cond.stop) sub_cond_id = self.pubsub.create_subscription('subscription_cond', stream_ids=[cond_stream_id], exchange_name='sub_cond') self.pubsub.activate_subscription(sub_cond_id) self.queue_cleanup.append(sub_cond.xn.queue) sub_cond.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result_cond = ar_cond.get(timeout=10) self.assertTrue(isinstance(result_cond, Granule)) rdt = RecordDictionaryTool.load_from_granule(result_cond) self.assertTrue(rdt.__contains__('conductivity')) self.check_cond_algorithm_execution(publish_granule, result_cond) def check_cond_algorithm_execution(self, publish_granule, granule_from_transform): input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform) output_data = output_rdt_transform['conductivity'] input_data = input_rdt_to_transform['conductivity'] self.assertTrue(((input_data / 100000.0) - 0.5).all() == output_data.all()) def check_pres_algorithm_execution(self, publish_granule, granule_from_transform): input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform) output_data = output_rdt_transform['pressure'] input_data = input_rdt_to_transform['pressure'] self.assertTrue(input_data.all() == output_data.all()) def check_temp_algorithm_execution(self, publish_granule, granule_from_transform): input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform) output_data = output_rdt_transform['temp'] input_data = input_rdt_to_transform['temp'] self.assertTrue(((input_data / 10000.0) - 10).all() == output_data.all()) def check_density_algorithm_execution(self, publish_granule, granule_from_transform): #------------------------------------------------------------------ # Calculate the correct density from the input granule data #------------------------------------------------------------------ input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform) conductivity = input_rdt_to_transform['conductivity'] pressure = input_rdt_to_transform['pressure'] temperature = input_rdt_to_transform['temp'] longitude = input_rdt_to_transform['lon'] latitude = input_rdt_to_transform['lat'] sp = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure) sa = SA_from_SP(sp, pressure, longitude, latitude) dens_value = rho(sa, temperature, pressure) out_density = output_rdt_transform['density'] #----------------------------------------------------------------------------- # Check that the output data from the transform has the correct density values #----------------------------------------------------------------------------- self.assertTrue(dens_value.all() == out_density.all()) def check_salinity_algorithm_execution(self, publish_granule, granule_from_transform): #------------------------------------------------------------------ # Calculate the correct density from the input granule data #------------------------------------------------------------------ input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule) output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform) conductivity = input_rdt_to_transform['conductivity'] pressure = input_rdt_to_transform['pressure'] temperature = input_rdt_to_transform['temp'] sal_value = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure) out_salinity = output_rdt_transform['salinity'] #----------------------------------------------------------------------------- # Check that the output data from the transform has the correct density values #----------------------------------------------------------------------------- self.assertTrue(sal_value.all() == out_salinity.all()) def check_granule_splitting(self, publish_granule, out_dict): ''' This checks that the ctd_L0_all transform is able to split out one of the granules from the whole granule fed into the transform ''' input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule) in_cond = input_rdt_to_transform['conductivity'] in_pressure = input_rdt_to_transform['pressure'] in_temp = input_rdt_to_transform['temp'] out_cond = out_dict['c'] out_pres = out_dict['p'] out_temp = out_dict['t'] self.assertTrue(in_cond.all() == out_cond.all()) self.assertTrue(in_pressure.all() == out_pres.all()) self.assertTrue(in_temp.all() == out_temp.all()) def test_ctd_L1_pressure(self): ''' Test that packets are processed by the ctd_L1_pressure transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='CTDL1PressureTransform', description='For testing CTDL1PressureTransform') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L1_pressure' process_definition.executable['class'] = 'CTDL1PressureTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('pres_stream_def', parameter_dictionary_id=pdict_id) pres_stream_id, _ = self.pubsub.create_stream('test_pressure', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.pressure = pres_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscribers that will receive the pressure granules from # the ctd transform #--------------------------------------------------------------------------------------------- ar_pres = gevent.event.AsyncResult() def subscriber3(m,r,s): ar_pres.set(m) sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3) self.addCleanup(sub_pres.stop) sub_pres_id = self.pubsub.create_subscription('subscription_pres', stream_ids=[pres_stream_id], exchange_name='sub_pres') self.pubsub.activate_subscription(sub_pres_id) self.queue_cleanup.append(sub_pres.xn.queue) sub_pres.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_pres.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('pressure')) self.check_pres_algorithm_execution(publish_granule, result) def test_ctd_L1_temperature(self): ''' Test that packets are processed by the ctd_L1_temperature transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='CTDL1TemperatureTransform', description='For testing CTDL1TemperatureTransform') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L1_temperature' process_definition.executable['class'] = 'CTDL1TemperatureTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('temp_stream_def', parameter_dictionary_id=pdict_id) temp_stream_id, _ = self.pubsub.create_stream('test_temperature', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.temperature = temp_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create subscriber that will receive the temperature granule from # the ctd transform #--------------------------------------------------------------------------------------------- ar_temp = gevent.event.AsyncResult() def subscriber2(m,r,s): ar_temp.set(m) sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2) self.addCleanup(sub_temp.stop) sub_temp_id = self.pubsub.create_subscription('subscription_temp', stream_ids=[temp_stream_id], exchange_name='sub_temp') self.pubsub.activate_subscription(sub_temp_id) self.queue_cleanup.append(sub_temp.xn.queue) sub_temp.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_temp.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('temp')) self.check_temp_algorithm_execution(publish_granule, result) def test_ctd_L2_density(self): ''' Test that packets are processed by the ctd_L1_density transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='DensityTransform', description='For testing DensityTransform') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L2_density' process_definition.executable['class'] = 'DensityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point config.process.interval = 1.0 pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('dens_stream_def', parameter_dictionary_id=pdict_id) dens_stream_id, _ = self.pubsub.create_stream('test_density', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.density = dens_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create a subscriber that will receive the density granule from the ctd transform #--------------------------------------------------------------------------------------------- ar_dens = gevent.event.AsyncResult() def subscriber3(m,r,s): ar_dens.set(m) sub_dens = StandaloneStreamSubscriber('sub_dens', subscriber3) self.addCleanup(sub_dens.stop) sub_dens_id = self.pubsub.create_subscription('subscription_dens', stream_ids=[dens_stream_id], exchange_name='sub_dens') self.pubsub.activate_subscription(sub_dens_id) self.queue_cleanup.append(sub_dens.xn.queue) sub_dens.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_dens.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('density')) self.check_density_algorithm_execution(publish_granule, result) def test_ctd_L2_salinity(self): ''' Test that packets are processed by the ctd_L1_salinity transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='SalinityTransform', description='For testing SalinityTransform') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L2_salinity' process_definition.executable['class'] = 'SalinityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('sal_stream_def', parameter_dictionary_id=pdict_id) sal_stream_id, _ = self.pubsub.create_stream('test_salinity', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.salinity = sal_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create a subscriber that will receive the salinity granule from the ctd transform #--------------------------------------------------------------------------------------------- ar_sal = gevent.event.AsyncResult() def subscriber3(m,r,s): ar_sal.set(m) sub_sal = StandaloneStreamSubscriber('sub_sal', subscriber3) self.addCleanup(sub_sal.stop) sub_sal_id = self.pubsub.create_subscription('subscription_sal', stream_ids=[sal_stream_id], exchange_name='sub_sal') self.pubsub.activate_subscription(sub_sal_id) self.queue_cleanup.append(sub_sal.xn.queue) sub_sal.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_sal.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('salinity')) self.check_salinity_algorithm_execution(publish_granule, result) def _get_new_ctd_packet(self, stream_definition_id, length): rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) rdt['time'] = numpy.arange(self.i, self.i+length) for field in rdt: if isinstance(rdt._pdict.get_context(field).param_type, QuantityType): rdt[field] = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) g = rdt.to_granule() self.i+=length return g def test_presf_L0_splitter(self): ''' Test that packets are processed by the ctd_L1_pressure transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='Presf L0 Splitter', description='For testing Presf L0 Splitter') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.presf_L0_splitter' process_definition.executable['class'] = 'PresfL0Splitter' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('pres_stream_def', parameter_dictionary_id=pdict_id) pres_stream_id, _ = self.pubsub.create_stream('test_pressure', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.absolute_pressure = pres_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) # #--------------------------------------------------------------------------------------------- # # Create subscribers that will receive the pressure granules from # # the ctd transform # #--------------------------------------------------------------------------------------------- # # ar_pres = gevent.event.AsyncResult() # def subscriber3(m,r,s): # ar_pres.set(m) # sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3) # self.addCleanup(sub_pres.stop) # # sub_pres_id = self.pubsub.create_subscription('subscription_pres', # stream_ids=[pres_stream_id], # exchange_name='sub_pres') # # self.pubsub.activate_subscription(sub_pres_id) # # self.queue_cleanup.append(sub_pres.xn.queue) # # sub_pres.start() # # #------------------------------------------------------------------------------------------------------ # # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform # #------------------------------------------------------------------------------------------------------ # # # Do all the routing stuff for the publishing # routing_key = 'stream_id.stream' # stream_route = StreamRoute(self.exchange_point, routing_key) # # xn = self.container.ex_manager.create_xn_queue(self.exchange_name) # xp = self.container.ex_manager.create_xp(self.exchange_point) # xn.bind('stream_id.stream', xp) # # pub = StandaloneStreamPublisher('stream_id', stream_route) # # # Build a packet that can be published # self.px_ctd = SimpleCtdPublisher() # publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # # # Publish the packet # pub.publish(publish_granule) # # #------------------------------------------------------------------------------------------------------ # # Make assertions about whether the ctd transform executed its algorithm and published the correct # # granules # #------------------------------------------------------------------------------------------------------ # # # Get the granule that is published by the ctd transform post processing # result = ar_pres.get(timeout=10) # self.assertTrue(isinstance(result, Granule)) # # rdt = RecordDictionaryTool.load_from_granule(result) # self.assertTrue(rdt.__contains__('pressure')) # # self.check_pres_algorithm_execution(publish_granule, result) # def test_presf_L1(self): ''' Test that packets are processed by the ctd_L1_pressure transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='PresfL1Transform', description='For testing PresfL1Transform') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.presf_L1' process_definition.executable['class'] = 'PresfL1Transform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('pres_stream_def', parameter_dictionary_id=pdict_id) pres_stream_id, _ = self.pubsub.create_stream('test_pressure', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.seafloor_pressure = pres_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)
def test_blog_ingestion_replay(self): #----------------------------------------------------------------------------------------------------- # Do this statement just once in your script #----------------------------------------------------------------------------------------------------- cc = self.container #------------------------------------------------------------------------------------------------------- # Make a registrar object - this is work usually done for you by the container in a transform or data stream process #------------------------------------------------------------------------------------------------------- subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node) #----------------------------------------------------------------------------------------------------- # Service clients #----------------------------------------------------------------------------------------------------- ingestion_cli = IngestionManagementServiceClient(node=cc.node) dr_cli = DataRetrieverServiceClient(node=cc.node) dsm_cli = DatasetManagementServiceClient(node=cc.node) pubsub_cli = PubsubManagementServiceClient(node=cc.node) #------------------------------------------------------------------------------------------------------- # Create and activate ingestion configuration #------------------------------------------------------------------------------------------------------- ingestion_configuration_id = ingestion_cli.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name='dm_datastore',datastore_profile='EXAMPLES'), hdf_storage=HdfStorage(), number_of_workers=6, ) # activates the transforms... so bindings will be created in this step ingestion_cli.activate_ingestion_configuration(ingestion_configuration_id) #------------------------------------------------------------------------------------------------------ # Create subscriber to listen to the messages published to the ingestion #------------------------------------------------------------------------------------------------------ # Define the query we want query = ExchangeQuery() # Create the stateful listener to hold the captured data for comparison with replay captured_input = BlogListener() # Make a subscription to the input stream to ingestion subscription_id = pubsub_cli.create_subscription(query = query, exchange_name='input_capture_queue' ,name = 'input_capture_queue') # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here # Normally the container creates and starts subscribers for you when a transform process is spawned subscriber = subscriber_registrar.create_subscriber(exchange_name='input_capture_queue', callback=captured_input.blog_store) subscriber.start() captured_input.subscriber = subscriber pubsub_cli.activate_subscription(subscription_id) #------------------------------------------------------------------------------------------------------- # Launching blog scraper #------------------------------------------------------------------------------------------------------- blogs = [ 'saintsandspinners', 'strobist', 'voodoofunk' ] log.debug('before spawning blog scraper') for blog in blogs: config = {'process':{'type':'stream_process','blog':blog}} cc.spawn_process(name=blog, module='ion.services.dm.ingestion.example.blog_scraper', cls='FeedStreamer', config=config) # wait ten seconds for some data to come in... log.warn('Sleeping for 10 seconds to wait for some input') time.sleep(10) #------------------------------------------------------------------------------------------------------ # For 3 posts captured, make 3 replays and verify we get back what came in #------------------------------------------------------------------------------------------------------ # Cute list comprehension method does not give enough control #self.assertTrue(len(captured_input.blogs)>3) #post_ids = [id for idx, id in enumerate(captured_input.blogs.iterkeys()) if idx < 3] post_ids = [] for post_id, blog in captured_input.blogs.iteritems(): # Use items not iter items - I copy of fixed length log.info('Captured Input: %s' % post_id) if len(blog.get('comments',[])) > 2: post_ids.append(post_id) if len(post_ids) >3: break ###======================================================= ### This section is not scriptable ###======================================================= if len(post_ids) < 3: self.fail('Not enough comments returned by the blog scrappers in 30 seconds') if len(captured_input.blogs) < 1: self.fail('No data returned in ten seconds by the blog scrappers!') ###======================================================= ### End non-scriptable ###======================================================= #------------------------------------------------------------------------------------------------------ # Create subscriber to listen to the replays #------------------------------------------------------------------------------------------------------ captured_replays = {} for idx, post_id in enumerate(post_ids): # Create the stateful listener to hold the captured data for comparison with replay dataset_id = dsm_cli.create_dataset( stream_id=post_id, datastore_name='dm_datastore', view_name='posts/posts_join_comments') replay_id, stream_id =dr_cli.define_replay(dataset_id) query = StreamQuery(stream_ids=[stream_id]) captured_replay = BlogListener() #------------------------------------------------------------------------------------------------------ # Create subscriber to listen to the messages published to the ingestion #------------------------------------------------------------------------------------------------------ # Make a subscription to the input stream to ingestion subscription_name = 'replay_capture_queue_%d' % idx subscription_id = pubsub_cli.create_subscription(query = query, exchange_name=subscription_name ,name = subscription_name) # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here # Normally the container creates and starts subscribers for you when a transform process is spawned subscriber = subscriber_registrar.create_subscriber(exchange_name=subscription_name, callback=captured_replay.blog_store) subscriber.start() captured_replay.subscriber = subscriber pubsub_cli.activate_subscription(subscription_id) #------------------------------------------------------------------------------------------------------ # Start the replay and listen to the results! #------------------------------------------------------------------------------------------------------ dr_cli.start_replay(replay_id) captured_replays[post_id] = captured_replay ###======================================================= ### The rest is not scriptable ###======================================================= # wait five seconds for some data to come in... log.warn('Sleeping for 5 seconds to wait for some output') time.sleep(5) matched_comments={} for post_id, captured_replay in captured_replays.iteritems(): # There should be only one blog in here! self.assertEqual(len(captured_replay.blogs),1) replayed_blog = captured_replay.blogs[post_id] input_blog = captured_input.blogs[post_id] self.assertEqual(replayed_blog['post'].content, input_blog['post'].content) # can't deterministically assert that the number of comments is the same... matched_comments[post_id] = 0 for updated, comment in replayed_blog.get('comments',{}).iteritems(): self.assertIn(updated, input_blog['comments']) matched_comments[post_id] += 1 # Assert that we got some comments back! self.assertTrue(sum(matched_comments.values()) > 0) log.info('Matched comments on the following blogs: %s' % matched_comments)