def _trigger_func(self, stream_id): point_def = ctd_stream_definition(stream_id=stream_id) point_constructor = PointSupplementConstructor(point_definition=point_def) while True: length = 1 c = [random.uniform(0.0,75.0) for i in xrange(length)] t = [random.uniform(-1.7, 21.0) for i in xrange(length)] p = [random.lognormvariate(1,2) for i in xrange(length)] lat = [random.uniform(-90.0, 90.0) for i in xrange(length)] lon = [random.uniform(0.0, 360.0) for i in xrange(length)] tvar = [self.last_time + i for i in xrange(1,length+1)] self.last_time = max(tvar) point_id = point_constructor.add_point(time=tvar,location=(lon[0],lat[0])) point_constructor.add_point_coverage(point_id=point_id, coverage_id='temperature', values=t) point_constructor.add_point_coverage(point_id=point_id, coverage_id='pressure', values=p) point_constructor.add_point_coverage(point_id=point_id, coverage_id='conductivity', values=c) ctd_packet = point_constructor.get_stream_granule() log.warn('SimpleCtdPublisher sending %d values!' % length) self.publisher.publish(ctd_packet) time.sleep(2.0)
def define_replay(self, dataset_id='', query=None, delivery_format=None): ''' Define the stream that will contain the data from data store by streaming to an exchange name. ''' # Get the datastore name from the dataset object, use dm_datastore by default. if not dataset_id: raise BadRequest('(Data Retriever Service %s): No dataset provided.' % self.name) dataset = self.clients.dataset_management.read_dataset(dataset_id=dataset_id) datastore_name = dataset.datastore_name delivery_format = delivery_format or {} view_name = dataset.view_name key_id = dataset.primary_view_key # Make a new definition container definition_container = ctd_stream_definition() # Tell pubsub about our definition that we want to use and setup the association so clients can figure out # What belongs on the stream definition_id = self.clients.pubsub_management.create_stream_definition(container=definition_container) # Make a stream replay_stream_id = self.clients.pubsub_management.create_stream(stream_definition_id=definition_id) replay = Replay() replay.delivery_format = delivery_format #----------------------------- #@todo: Add in CEI integration #----------------------------- replay.process_id = 0 replay_id, rev = self.clients.resource_registry.create(replay) replay._id = replay_id replay._rev = rev config = {'process':{ 'query':query, 'datastore_name':datastore_name, 'view_name':view_name, 'key_id':key_id, 'delivery_format':dict({'container':definition_container}, **delivery_format), 'publish_streams':{'output':replay_stream_id} } } pid = self.clients.process_dispatcher.schedule_process( process_definition_id=self.process_definition_id, configuration=config ) replay.process_id = pid self.clients.resource_registry.update(replay) self.clients.resource_registry.create_association(replay_id, PRED.hasStream, replay_stream_id) return (replay_id, replay_stream_id)
def on_start(self): ''' Creates a publisher for each stream_id passed in as publish_streams Creates an attribute with the name matching the stream name which corresponds to the publisher ex: say we have publish_streams:{'output': my_output_stream_id } then the instance has an attribute output which corresponds to the publisher for the stream in my_output_stream_id ''' # Get the stream(s) stream_id = self.CFG.get('process',{}).get('stream_id','') self.greenlet_queue = [] # Stream creation is done in SA, but to make the example go for demonstration create one here if it is not provided... if not stream_id: ctd_def = ctd_stream_definition(stream_id=stream_id) pubsub_cli = PubsubManagementServiceClient(node=self.container.node) stream_id = pubsub_cli.create_stream( name='Example CTD Data', stream_definition=ctd_def, original=True, encoding='ION R2') self.stream_publisher_registrar = StreamPublisherRegistrar(process=self,node=self.container.node) # Needed to get the originator's stream_id self.stream_id= stream_id self.publisher = self.stream_publisher_registrar.create_publisher(stream_id=stream_id) self.last_time = 0 g = Greenlet(self._trigger_func, stream_id) log.debug('Starting publisher thread for simple ctd data.') g.start() self.greenlet_queue.append(g)
def test_dm_integration(self): ''' test_dm_integration Test full DM Services Integration ''' cc = self.container assertions = self.assertTrue #----------------------------- # Copy below here #----------------------------- pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) transform_management_service = TransformManagementServiceClient(node=cc.node) process_dispatcher = ProcessDispatcherServiceClient(node=cc.node) process_list = [] datasets = [] datastore_name = 'test_dm_integration' #--------------------------- # Set up ingestion #--------------------------- # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile log.debug('Calling create_ingestion_configuration') ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), number_of_workers=8 ) # ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) ctd_stream_def = ctd_stream_definition() stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Junk definition') #--------------------------- # Set up the producers (CTD Simulators) #--------------------------- # Launch five simulated CTD producers for iteration in xrange(5): # Make a stream to output on stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id) #--------------------------- # Set up the datasets #--------------------------- dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) # Keep track of the datasets datasets.append(dataset_id) stream_policy_id = ingestion_management_service.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) producer_definition = ProcessDefinition() producer_definition.executable = { 'module':'ion.processes.data.ctd_stream_publisher', 'class':'SimpleCtdPublisher' } configuration = { 'process':{ 'stream_id':stream_id, 'datastore_name':datastore_name } } procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition) log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id) pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration) # Keep track, we'll kill 'em later. process_list.append(pid) # Get about 4 seconds of data time.sleep(4) #--------------------------- # Stop producing data #--------------------------- for process in process_list: process_dispatcher.cancel_process(process) #---------------------------------------------- # The replay and the transform, a love story. #---------------------------------------------- # Happy Valentines to the clever coder who catches the above! transform_definition = ProcessDefinition() transform_definition.executable = { 'module':'ion.processes.data.transforms.transform_example', 'class':'TransformCapture' } transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition) dataset_id = datasets.pop() # Just need one for now replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id) #-------------------------------------------- # I'm Selling magazine subscriptions here! #-------------------------------------------- subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]), exchange_name='transform_capture_point') #-------------------------------------------- # Start the transform (capture) #-------------------------------------------- transform_id = transform_management_service.create_transform( name='capture_transform', in_subscription_id=subscription, process_definition_id=transform_definition_id ) transform_management_service.activate_transform(transform_id=transform_id) #-------------------------------------------- # BEGIN REPLAY! #-------------------------------------------- data_retriever_service.start_replay(replay_id=replay_id) #-------------------------------------------- # Lets get some boundaries #-------------------------------------------- bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id) assertions('latitude_bounds' in bounds, 'dataset_id: %s' % dataset_id) assertions('longitude_bounds' in bounds) assertions('pressure_bounds' in bounds) #-------------------------------------------- # Make sure the transform capture worked #-------------------------------------------- time.sleep(3) # Give the other processes up to 3 seconds to catch up stats = os.stat(FileSystem.get_url(FS.TEMP,'transform_output')) assertions(stats.st_blksize > 0) # BEAUTIFUL! FileSystem.unlink(FileSystem.get_url(FS.TEMP,'transform_output'))
def test_replay_integration(self): ''' Test full DM Services Integration ''' cc = self.container ### Every thing below here can be run as a script: pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) resource_registry_service = ResourceRegistryServiceClient(node=cc.node) #------------------------------------------------------------------------------------------------------ # Datastore name #------------------------------------------------------------------------------------------------------ datastore_name = 'test_replay_integration' #------------------------------------------------------------------------------------------------------ # Spawn process #------------------------------------------------------------------------------------------------------ pid = cc.spawn_process(name='dummy_process_for_test', module='pyon.ion.process', cls='SimpleProcess', config={}) dummy_process = cc.proc_manager.procs[pid] #------------------------------------------------------------------------------------------------------ # Set up subscriber #------------------------------------------------------------------------------------------------------ # Normally the user does not see or create the publisher, this is part of the containers business. # For the test we need to set it up explicitly publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node) subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node) #------------------------------------------------------------------------------------------------------ # Set up ingestion #------------------------------------------------------------------------------------------------------ # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id='science_data', couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), hdf_storage=HdfStorage(), number_of_workers=1, ) ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) #------------------------------------------------------------------------------------------------------ # Grab the transforms acting as ingestion workers #------------------------------------------------------------------------------------------------------ transforms = [resource_registry_service.read(assoc.o) for assoc in resource_registry_service.find_associations(ingestion_configuration_id, PRED.hasTransform)] proc_1 = cc.proc_manager.procs[transforms[0].process_id] log.info("PROCESS 1: %s" % str(proc_1)) #------------------------------------------------------------------------------------------------------ # Set up the test hooks for the gevent event AsyncResult object #------------------------------------------------------------------------------------------------------ def ingestion_worker_received(message, headers): ar.set(message) proc_1.ingest_process_test_hook = ingestion_worker_received #------------------------------------------------------------------------------------------------------ # Set up the producers (CTD Simulators) #------------------------------------------------------------------------------------------------------ ctd_stream_def = ctd_stream_definition() stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Junk definition') stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id) #------------------------------------------------------------------------------------------------------ # Set up the dataset config #------------------------------------------------------------------------------------------------------ dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/stream_join_granule' ) dataset_config_id = ingestion_management_service.create_dataset_configuration( dataset_id = dataset_id, archive_data = True, archive_metadata = True, ingestion_configuration_id = ingestion_configuration_id ) #------------------------------------------------------------------------------------------------------ # Launch a ctd_publisher #------------------------------------------------------------------------------------------------------ publisher = publisher_registrar.create_publisher(stream_id=stream_id) #------------------------------------------------------------------------ # Create a packet and publish it #------------------------------------------------------------------------ ctd_packet = _create_packet(stream_id) published_hdfstring = ctd_packet.identifiables['ctd_data'].values publisher.publish(ctd_packet) #------------------------------------------------------------------------------------------------------ # Catch what the ingestion worker gets! Assert it is the same packet that was published! #------------------------------------------------------------------------------------------------------ packet = ar.get(timeout=2) #------------------------------------------------------------------------------------------------------ # Create subscriber to listen to the replays #------------------------------------------------------------------------------------------------------ replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id) query = StreamQuery(stream_ids=[replay_stream_id]) subscription_id = pubsub_management_service.create_subscription(query = query, exchange_name='replay_capture_point' ,name = 'replay_capture_point') # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here # Normally the container creates and starts subscribers for you when a transform process is spawned subscriber = subscriber_registrar.create_subscriber(exchange_name='replay_capture_point', callback=_subscriber_call_back) subscriber.start() pubsub_management_service.activate_subscription(subscription_id) #------------------------------------------------------------------------------------------------------ # Start the replay #------------------------------------------------------------------------------------------------------ data_retriever_service.start_replay(replay_id) #------------------------------------------------------------------------------------------------------ # Get the hdf string from the captured stream in the replay #------------------------------------------------------------------------------------------------------ retrieved_hdf_string = ar2.get(timeout=2) ### Non scriptable portion of the test #------------------------------------------------------------------------------------------------------ # Assert that it matches the message we sent #------------------------------------------------------------------------------------------------------ self.assertEquals(packet.identifiables['stream_encoding'].sha1, ctd_packet.identifiables['stream_encoding'].sha1) self.assertEquals(retrieved_hdf_string, published_hdfstring)
def setUp(self): """ Setup the test environment to exersice use of instrumet agent, including: * define driver_config parameters. * create container with required services and container client. * create publication stream ids for each driver data stream. * create stream_config parameters. * create and activate subscriptions for agent data streams. * spawn instrument agent process and create agent client. * add cleanup functions to cause subscribers to get stopped. """ # Names of agent data streams to be configured. parsed_stream_name = 'ctd_parsed' raw_stream_name = 'ctd_raw' # Driver configuration. self.driver_config = { 'svr_addr': 'localhost', 'cmd_port': 5556, 'evt_port': 5557, 'dvr_mod': 'ion.services.mi.drivers.sbe37_driver', 'dvr_cls': 'SBE37Driver', 'comms_config': { SBE37Channel.CTD: { 'method':'ethernet', 'device_addr': '137.110.112.119', 'device_port': 4001, 'server_addr': 'localhost', 'server_port': 8888 } }, 'packet_config' : { parsed_stream_name : ('prototype.sci_data.ctd_stream', 'ctd_stream_packet'), raw_stream_name : None } } # Start container. self._start_container() # Establish endpoint with container. self._container_client = ContainerAgentClient(node=self.container.node, name=self.container.name) # Bring up services in a deploy file. self._container_client.start_rel_from_url('res/deploy/r2dm.yml') # Create a pubsub client to create streams. self._pubsub_client = PubsubManagementServiceClient( node=self.container.node) # Create parsed stream. The stream name must match one # used by the driver to label packet data. parsed_stream_def = ctd_stream_definition(stream_id=None) parsed_stream_def_id = self._pubsub_client.create_stream_definition( container=parsed_stream_def) parsed_stream_id = self._pubsub_client.create_stream( name=parsed_stream_name, stream_definition_id=parsed_stream_def_id, original=True, encoding='ION R2') # Create raw stream. The stream name must match one used by the # driver to label packet data. This stream does not yet have a # packet definition so will not be published. raw_stream_def = ctd_stream_definition(stream_id=None) raw_stream_def_id = self._pubsub_client.create_stream_definition( container=raw_stream_def) raw_stream_id = self._pubsub_client.create_stream(name=raw_stream_name, stream_definition_id=raw_stream_def_id, original=True, encoding='ION R2') # Define stream configuration. self.stream_config = { parsed_stream_name : parsed_stream_id, raw_stream_name : raw_stream_id } # A callback for processing subscribed-to data. def consume(message, headers): log.info('Subscriber received message: %s', str(message)) # Create a stream subscriber registrar to create subscribers. subscriber_registrar = StreamSubscriberRegistrar(process=self.container, node=self.container.node) # Create and activate parsed data subscription. parsed_sub = subscriber_registrar.create_subscriber(exchange_name=\ 'parsed_queue', callback=consume) parsed_sub.start() parsed_query = StreamQuery(stream_ids=[parsed_stream_id]) parsed_sub_id = self._pubsub_client.create_subscription(\ query=parsed_query, exchange_name='parsed_queue') self._pubsub_client.activate_subscription(parsed_sub_id) # Create and activate raw data subscription. raw_sub = subscriber_registrar.create_subscriber(exchange_name=\ 'raw_queue', callback=consume) raw_sub.start() raw_query = StreamQuery(stream_ids=[raw_stream_id]) raw_sub_id = self._pubsub_client.create_subscription(\ query=raw_query, exchange_name='raw_queue') self._pubsub_client.activate_subscription(raw_sub_id) # Create agent config. self.agent_config = { 'driver_config' : self.driver_config, 'stream_config' : self.stream_config } # Launch an instrument agent process. self._ia_name = 'agent007' self._ia_mod = 'ion.services.mi.instrument_agent' self._ia_class = 'InstrumentAgent' self._ia_pid = self._container_client.spawn_process(name=self._ia_name, module=self._ia_mod, cls=self._ia_class, config=self.agent_config) log.info('got pid=%s', str(self._ia_pid)) # Start a resource agent client to talk with the instrument agent. self._ia_client = ResourceAgentClient('123xyz', name=self._ia_pid, process=FakeProcess()) log.info('got ia client %s', str(self._ia_client)) # Add cleanup function to stop subscribers. def stop_subscriber(sub_list): for sub in sub_list: sub.stop() self.addCleanup(stop_subscriber, [parsed_sub, raw_sub])