예제 #1
0
class DatasetManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        import couchdb
        super(DatasetManagementIntTest,self).setUp()
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2dm.yml')


        self.db = self.container.datastore_manager.get_datastore('scidata', DataStore.DS_PROFILE.SCIDATA)
        self.db_raw = self.db.server

        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.ingestion_client = IngestionManagementServiceClient(node=self.container.node)

    def _random_data(self, entropy):
        random_pressures = [(random.random()*100) for i in xrange(entropy)]
        random_salinity = [(random.random()*28) for i in xrange(entropy)]
        random_temperature = [(random.random()*10)+32 for i in xrange(entropy)]
        random_times = [random.randrange(1328205227, 1328896395) for i in xrange(entropy)]
        random_lat = [(random.random()*10)+30 for i in xrange(entropy)]
        random_lon = [(random.random()*10)+70 for i in xrange(entropy)]
        return [random_pressures, random_salinity, random_temperature, random_times, random_lat, random_lon]

    def _generate_point(self, entropy=5):
        points = []
        random_values = self._random_data(entropy)
        point = ctd_stream_packet(stream_id='test_data', p=random_values[0], c=random_values[1], t=random_values[2],time=random_values[3], lat=random_values[4], lon=random_values[5], create_hdf=False)
        return point


    def test_get_dataset_bounds(self):
        for i in xrange(3):
            point = self._generate_point()
            self.db.create(point)

        dataset_id = self.dataset_management_client.create_dataset(stream_id='test_data', datastore_name='scidata')


        bounds = self.dataset_management_client.get_dataset_bounds(dataset_id=dataset_id)

        self.assertTrue(bounds['latitude_bounds'][0] > 30.0)
        self.assertTrue(bounds['latitude_bounds'][1] < 40.0)
        self.assertTrue(bounds['longitude_bounds'][0] > 70.0)
        self.assertTrue(bounds['longitude_bounds'][1] < 80.0)

        self.dataset_management_client.delete_dataset(dataset_id)
        
    @unittest.skip('not ready yet')
    def test_dataset_ingestion(self):
        couch_storage = { 'server':'localhost', 'database':'scidata'}
        ingestion_configuration_id = self.ingestion_client.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=couch_storage,
            hdf_storage={},
            number_of_workers=4,
            default_policy={})
class ScienceGranuleIngestionIntTest(IonIntegrationTestCase):
    def setUp(self):
        self.datastore_name = 'datasets'
        self.exchange_point = 'science_data'
        self.exchange_space = 'science_granule_ingestion'
        self.queue_name     = self.exchange_space
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        self.ingestion_management = IngestionManagementServiceClient()
        self.pubsub               = PubsubManagementServiceClient()


    def build_granule(self):
        return ScienceGranuleIngestionWorkerUnitTest.build_granule()

    def launch_worker(self):
        cfg = DotDict()

        cfg.process.datastore_name = self.datastore_name
        cfg.process.queue_name = self.queue_name

        #@todo: replace with CEI friendly calls

        pid = self.container.spawn_process('ingest_worker', 'ion.processes.data.ingestion.science_granule_ingestion_worker','ScienceGranuleIngestionWorker',cfg)
        return pid

    def create_ingestion_config(self):
        ingest_queue = IngestionQueue(name=self.exchange_space, type='science_granule')
        config_id = self.ingestion_management.create_ingestion_configuration(name='standard_ingest', exchange_point_id=self.exchange_point, queues=[ingest_queue])
        return config_id
            
    def create_stream(self):
        stream_id = self.pubsub.create_stream()
        return stream_id

    def poll(self, evaluation_callback,  *args, **kwargs):
        now = time.time()
        cutoff = now + 5
        done = False
        while not done:
            if evaluation_callback(*args,**kwargs):
                done = True
            if now >= cutoff:
                raise Timeout('No results found within the allotted time')
            now = time.time()
        return True
예제 #3
0
    def on_start(self):
        super(IngestionLauncher, self).on_start()
        exchange_point = self.CFG.get("process", {}).get("exchange_point", "science_data")
        couch_storage = self.CFG.get("process", {}).get("couch_storage", {})
        couch_storage = CouchStorage(**couch_storage)
        hdf_storage = self.CFG.get("process", {}).get("hdf_storage", {})
        number_of_workers = self.CFG.get("process", {}).get("number_of_workers", 2)

        ingestion_management_service = IngestionManagementServiceClient(node=self.container.node)
        ingestion_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id=exchange_point,
            couch_storage=couch_storage,
            hdf_storage=hdf_storage,
            number_of_workers=number_of_workers,
            default_policy={},
        )
        ingestion_management_service.activate_ingestion_configuration(ingestion_id)
예제 #4
0
    def on_start(self):
        super(IngestionLauncher,self).on_start()

        exchange_point = self.CFG.get_safe('ingestion.exchange_point','science_data')
        couch_opts = self.CFG.get_safe('ingestion.couch_storage',{})
        couch_storage = CouchStorage(**couch_opts)
        hdf_opts = self.CFG.get_safe('ingestion.hdf_storage',{})
        hdf_storage = HdfStorage(**hdf_opts)
        number_of_workers = self.CFG.get_safe('ingestion.number_of_workers',2)

        ingestion_management_service = IngestionManagementServiceClient(node=self.container.node)
        ingestion_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id=exchange_point,
            couch_storage=couch_storage,
            hdf_storage=hdf_storage,
            number_of_workers=number_of_workers
        )
        ingestion_management_service.activate_ingestion_configuration(ingestion_id)
예제 #5
0
    def on_start(self):
        super(IngestionLauncher, self).on_start()

        exchange_point = self.CFG.get_safe('ingestion.exchange_point',
                                           'science_data')
        couch_opts = self.CFG.get_safe('ingestion.couch_storage', {})
        couch_storage = CouchStorage(**couch_opts)
        hdf_opts = self.CFG.get_safe('ingestion.hdf_storage', {})
        hdf_storage = HdfStorage(**hdf_opts)
        number_of_workers = self.CFG.get_safe('ingestion.number_of_workers', 2)

        ingestion_management_service = IngestionManagementServiceClient(
            node=self.container.node)
        ingestion_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id=exchange_point,
            couch_storage=couch_storage,
            hdf_storage=hdf_storage,
            number_of_workers=number_of_workers)
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_id)
예제 #6
0
    def test_blog_ingestion_replay(self):

        #-----------------------------------------------------------------------------------------------------
        # Do this statement just once in your script
        #-----------------------------------------------------------------------------------------------------
        cc = self.container

        #-------------------------------------------------------------------------------------------------------
        # Make a registrar object - this is work usually done for you by the container in a transform or data stream process
        #-------------------------------------------------------------------------------------------------------

        subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node)

        #-----------------------------------------------------------------------------------------------------
        # Service clients
        #-----------------------------------------------------------------------------------------------------

        ingestion_cli = IngestionManagementServiceClient(node=cc.node)
        dr_cli = DataRetrieverServiceClient(node=cc.node)
        dsm_cli = DatasetManagementServiceClient(node=cc.node)
        pubsub_cli = PubsubManagementServiceClient(node=cc.node)

        #-------------------------------------------------------------------------------------------------------
        # Create and activate ingestion configuration
        #-------------------------------------------------------------------------------------------------------

        ingestion_configuration_id = ingestion_cli.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name='dm_datastore',datastore_profile='EXAMPLES'),
            hdf_storage=HdfStorage(),
            number_of_workers=6,
        )
        # activates the transforms... so bindings will be created in this step
        ingestion_cli.activate_ingestion_configuration(ingestion_configuration_id)

        #------------------------------------------------------------------------------------------------------
        # Create subscriber to listen to the messages published to the ingestion
        #------------------------------------------------------------------------------------------------------

        # Define the query we want
        query = ExchangeQuery()

        # Create the stateful listener to hold the captured data for comparison with replay
        captured_input = BlogListener()


        # Make a subscription to the input stream to ingestion
        subscription_id = pubsub_cli.create_subscription(query = query, exchange_name='input_capture_queue' ,name = 'input_capture_queue')


        # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
        # Normally the container creates and starts subscribers for you when a transform process is spawned
        subscriber = subscriber_registrar.create_subscriber(exchange_name='input_capture_queue', callback=captured_input.blog_store)
        subscriber.start()

        captured_input.subscriber = subscriber

        pubsub_cli.activate_subscription(subscription_id)


        #-------------------------------------------------------------------------------------------------------
        # Launching blog scraper
        #-------------------------------------------------------------------------------------------------------

        blogs = [
            'saintsandspinners',
            'strobist',
            'voodoofunk'
        ]


        log.debug('before spawning blog scraper')

        for blog in blogs:
            config = {'process':{'type':'stream_process','blog':blog}}
            cc.spawn_process(name=blog,
                module='ion.services.dm.ingestion.example.blog_scraper',
                cls='FeedStreamer',
                config=config)

        # wait ten seconds for some data to come in...
        log.warn('Sleeping for 10 seconds to wait for some input')
        time.sleep(10)


        #------------------------------------------------------------------------------------------------------
        # For 3 posts captured, make 3 replays and verify we get back what came in
        #------------------------------------------------------------------------------------------------------


        # Cute list comprehension method does not give enough control
        #self.assertTrue(len(captured_input.blogs)>3)
        #post_ids = [id for idx, id in enumerate(captured_input.blogs.iterkeys()) if idx < 3]

        post_ids = []




        for post_id, blog in captured_input.blogs.iteritems(): # Use items not iter items - I copy of fixed length

            log.info('Captured Input: %s' % post_id)
            if len(blog.get('comments',[])) > 2:
                post_ids.append(post_id)

            if len(post_ids) >3:
                break

        ###=======================================================
        ### This section is not scriptable
        ###=======================================================


        if len(post_ids) < 3:
            self.fail('Not enough comments returned by the blog scrappers in 30 seconds')

        if len(captured_input.blogs) < 1:
            self.fail('No data returned in ten seconds by the blog scrappers!')

        ###=======================================================
        ### End non-scriptable
        ###=======================================================


        #------------------------------------------------------------------------------------------------------
        # Create subscriber to listen to the replays
        #------------------------------------------------------------------------------------------------------

        captured_replays = {}

        for idx, post_id in enumerate(post_ids):
            # Create the stateful listener to hold the captured data for comparison with replay


            dataset_id = dsm_cli.create_dataset(
                stream_id=post_id,
                datastore_name='dm_datastore',
                view_name='posts/posts_join_comments')

            replay_id, stream_id =dr_cli.define_replay(dataset_id)


            query = StreamQuery(stream_ids=[stream_id])

            captured_replay = BlogListener()

            #------------------------------------------------------------------------------------------------------
            # Create subscriber to listen to the messages published to the ingestion
            #------------------------------------------------------------------------------------------------------

            # Make a subscription to the input stream to ingestion
            subscription_name = 'replay_capture_queue_%d' % idx
            subscription_id = pubsub_cli.create_subscription(query = query, exchange_name=subscription_name ,name = subscription_name)


            # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
            # Normally the container creates and starts subscribers for you when a transform process is spawned
            subscriber = subscriber_registrar.create_subscriber(exchange_name=subscription_name, callback=captured_replay.blog_store)
            subscriber.start()

            captured_replay.subscriber = subscriber

            pubsub_cli.activate_subscription(subscription_id)

            #------------------------------------------------------------------------------------------------------
            # Start the replay and listen to the results!
            #------------------------------------------------------------------------------------------------------

            dr_cli.start_replay(replay_id)

            captured_replays[post_id] = captured_replay


        ###=======================================================
        ### The rest is not scriptable
        ###=======================================================


        # wait five seconds for some data to come in...
        log.warn('Sleeping for 5 seconds to wait for some output')
        time.sleep(5)

        matched_comments={}
        for post_id, captured_replay in captured_replays.iteritems():

            # There should be only one blog in here!
            self.assertEqual(len(captured_replay.blogs),1)

            replayed_blog = captured_replay.blogs[post_id]

            input_blog = captured_input.blogs[post_id]

            self.assertEqual(replayed_blog['post'].content, input_blog['post'].content)

            # can't deterministically assert that the number of comments is the same...
            matched_comments[post_id] = 0

            for updated, comment in replayed_blog.get('comments',{}).iteritems():
                self.assertIn(updated, input_blog['comments'])
                matched_comments[post_id] += 1


        # Assert that we got some comments back!
        self.assertTrue(sum(matched_comments.values()) > 0)

        log.info('Matched comments on the following blogs: %s' % matched_comments)
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue


        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------


        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'



        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream')



        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.ctd_stream_publisher',
            'class':'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class':'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)



        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=1
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)



        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id)


        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = ctd_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------


        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(stream_definition_id=sal_stream_def_id)


        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = sal_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service



        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query = StreamQuery(stream_ids=[ctd_stream_id,]),
            exchange_name='salinity_transform_input') # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={'output':sal_stream_id,},
            process_definition_id = salinity_transform_procdef_id,
            # no configuration needed at this time...
            )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(transform_id=sal_transform_id)



        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)


        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([sal_stream_id,]),
            exchange_name = 'salinity_test',
            name = "test salinity subscription",
            )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node)

        result = gevent.event.AsyncResult()
        results = []
        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) >3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(subscription_id=salinity_subscription_id)


        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data



        for message in results:

            psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())


            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
class IngestionManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        self.ingestion_management = IngestionManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.ingest_name = "basic"
        self.exchange = "testdata"

    @staticmethod
    def clean_subscriptions():
        ingestion_management = IngestionManagementServiceClient()
        pubsub = PubsubManagementServiceClient()
        rr = ResourceRegistryServiceClient()
        ingestion_config_ids = ingestion_management.list_ingestion_configurations(id_only=True)
        for ic in ingestion_config_ids:

            assocs = rr.find_associations(subject=ic, predicate=PRED.hasSubscription, id_only=False)
            for assoc in assocs:
                rr.delete_association(assoc)
                try:
                    pubsub.deactivate_subscription(assoc.o)
                except:
                    pass
                pubsub.delete_subscription(assoc.o)

    def create_ingest_config(self):
        self.queue = IngestionQueue(name="test", type="testdata")

        # Create the ingestion config
        ingestion_config_id = self.ingestion_management.create_ingestion_configuration(
            name=self.ingest_name, exchange_point_id=self.exchange, queues=[self.queue]
        )
        return ingestion_config_id

    def test_ingestion_config_crud(self):
        ingestion_config_id = self.create_ingest_config()

        ingestion_config = self.ingestion_management.read_ingestion_configuration(ingestion_config_id)
        self.assertTrue(ingestion_config.name == self.ingest_name)
        self.assertTrue(ingestion_config.queues[0].name == "test")
        self.assertTrue(ingestion_config.queues[0].type == "testdata")

        ingestion_config.name = "another"

        self.ingestion_management.update_ingestion_configuration(ingestion_config)

        # Create an association just to make sure that it will delete them
        sub = Subscription()
        sub_id, _ = self.resource_registry.create(sub)
        assoc_id, _ = self.resource_registry.create_association(
            subject=ingestion_config_id, predicate=PRED.hasSubscription, object=sub_id
        )

        self.ingestion_management.delete_ingestion_configuration(ingestion_config_id)

        with self.assertRaises(NotFound):
            self.resource_registry.read(assoc_id)

    def test_list_ingestion(self):

        # Create the ingest_config
        config_id = self.create_ingest_config()

        retval = self.ingestion_management.list_ingestion_configurations(id_only=True)
        # Nice thing about this is that it breaks if r2dm adds an ingest_config
        self.assertTrue(config_id in retval)
예제 #9
0
class DMCollaborationIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        config = DotDict()
        config.bootstrap.processes.ingestion.module = 'ion.processes.data.ingestion.ingestion_worker_a'
        config.bootstrap.processes.replay.module    = 'ion.processes.data.replay.replay_process_a'
        self.container.start_rel_from_url('res/deploy/r2dm.yml', config)


        self.datastore_name = 'test_datasets'
        self.pubsub_management    = PubsubManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()

    def subscriber_action(self, msg, header):
        if not hasattr(self,'received'):
            self.received = 0
        if not hasattr(self, 'async_done'):
            self.async_done = AsyncResult()
        self.received += 1
        if self.received >= 2:
            self.async_done.set(True)


    def test_ingest_to_replay(self):

        self.async_done = AsyncResult()
        sysname = get_sys_name()


        datastore = self.container.datastore_manager.get_datastore(self.datastore_name,'SCIDATA')


        producer_definition = ProcessDefinition(name='Example Data Producer')
        producer_definition.executable = {
            'module':'ion.processes.data.example_data_producer',
            'class' :'ExampleDataProducer'
        }

        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)
        
        ingestion_configuration_id = self.ingestion_management.create_ingestion_configuration(
            exchange_point_id = 'science_data',
            couch_storage=CouchStorage(datastore_name=self.datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=1
        )

        self.ingestion_management.activate_ingestion_configuration(
                ingestion_configuration_id=ingestion_configuration_id)

        stream_id = self.pubsub_management.create_stream(name='data stream')
        
        dataset_id = self.dataset_management.create_dataset(
            stream_id = stream_id, 
            datastore_name = self.datastore_name,
        )

        self.ingestion_management.create_dataset_configuration(
            dataset_id = dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id
        )

        configuration = {
            'process': {
                'stream_id' : stream_id
            }
        }

        self.process_dispatcher.schedule_process(process_definition_id, configuration=configuration)

        replay_id, stream_id = self.data_retriever.define_replay(dataset_id = dataset_id)

        subscriber = Subscriber(name=('%s.science_data' % sysname, 'test_queue'), callback=self.subscriber_action, binding='%s.data' % stream_id)
        gevent.spawn(subscriber.listen)

        done = False
        while not done:
            results = datastore.query_view('manifest/by_dataset')
            if len(results) >= 2:
                done = True

        self.data_retriever.start_replay(replay_id)

        self.async_done.get(timeout=10)
예제 #10
0
    def test_replay_integration(self):
        '''
        Test full DM Services Integration
        '''

        cc = self.container

        ### Every thing below here can be run as a script:


        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        resource_registry_service = ResourceRegistryServiceClient(node=cc.node)

        #------------------------------------------------------------------------------------------------------
        # Datastore name
        #------------------------------------------------------------------------------------------------------

        datastore_name = 'test_replay_integration'

        #------------------------------------------------------------------------------------------------------
        # Spawn process
        #------------------------------------------------------------------------------------------------------

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})

        dummy_process = cc.proc_manager.procs[pid]

        #------------------------------------------------------------------------------------------------------
        # Set up subscriber
        #------------------------------------------------------------------------------------------------------

        # Normally the user does not see or create the publisher, this is part of the containers business.
        # For the test we need to set it up explicitly
        publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node)
        subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node)


        #------------------------------------------------------------------------------------------------------
        # Set up ingestion
        #------------------------------------------------------------------------------------------------------

        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'),
            hdf_storage=HdfStorage(),
            number_of_workers=1,
        )

        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #------------------------------------------------------------------------------------------------------
        # Grab the transforms acting as ingestion workers
        #------------------------------------------------------------------------------------------------------

        transforms = [resource_registry_service.read(assoc.o)
                      for assoc in resource_registry_service.find_associations(ingestion_configuration_id, PRED.hasTransform)]

        proc_1 = cc.proc_manager.procs[transforms[0].process_id]
        log.info("PROCESS 1: %s" % str(proc_1))

        #------------------------------------------------------------------------------------------------------
        # Set up the test hooks for the gevent event AsyncResult object
        #------------------------------------------------------------------------------------------------------

        def ingestion_worker_received(message, headers):
            ar.set(message)

        proc_1.ingest_process_test_hook = ingestion_worker_received

        #------------------------------------------------------------------------------------------------------
        # Set up the producers (CTD Simulators)
        #------------------------------------------------------------------------------------------------------

        ctd_stream_def = ctd_stream_definition()

        stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Junk definition')


        stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)

        #------------------------------------------------------------------------------------------------------
        # Set up the dataset config
        #------------------------------------------------------------------------------------------------------


        dataset_id = dataset_management_service.create_dataset(
            stream_id=stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id
        )

        #------------------------------------------------------------------------------------------------------
        # Launch a ctd_publisher
        #------------------------------------------------------------------------------------------------------

        publisher = publisher_registrar.create_publisher(stream_id=stream_id)

        #------------------------------------------------------------------------
        # Create a packet and publish it
        #------------------------------------------------------------------------

        ctd_packet = _create_packet(stream_id)
        published_hdfstring = ctd_packet.identifiables['ctd_data'].values

        publisher.publish(ctd_packet)

        #------------------------------------------------------------------------------------------------------
        # Catch what the ingestion worker gets! Assert it is the same packet that was published!
        #------------------------------------------------------------------------------------------------------

        packet = ar.get(timeout=2)

        #------------------------------------------------------------------------------------------------------
        # Create subscriber to listen to the replays
        #------------------------------------------------------------------------------------------------------

        replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id)

        query = StreamQuery(stream_ids=[replay_stream_id])

        subscription_id = pubsub_management_service.create_subscription(query = query, exchange_name='replay_capture_point' ,name = 'replay_capture_point')

        # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
        # Normally the container creates and starts subscribers for you when a transform process is spawned
        subscriber = subscriber_registrar.create_subscriber(exchange_name='replay_capture_point', callback=_subscriber_call_back)
        subscriber.start()

        pubsub_management_service.activate_subscription(subscription_id)

        #------------------------------------------------------------------------------------------------------
        # Start the replay
        #------------------------------------------------------------------------------------------------------

        data_retriever_service.start_replay(replay_id)

        #------------------------------------------------------------------------------------------------------
        # Get the hdf string from the captured stream in the replay
        #------------------------------------------------------------------------------------------------------

        retrieved_hdf_string  = ar2.get(timeout=2)


        ### Non scriptable portion of the test

        #------------------------------------------------------------------------------------------------------
        # Assert that it matches the message we sent
        #------------------------------------------------------------------------------------------------------

        self.assertEquals(packet.identifiables['stream_encoding'].sha1, ctd_packet.identifiables['stream_encoding'].sha1)


        self.assertEquals(retrieved_hdf_string, published_hdfstring)
    def test_replay_integration(self):
        '''
        test_replay_integration
        '''
        import numpy as np
        # Keep the import it's used in the vector comparison below even though pycharm says its unused.

        cc = self.container
        XP = self.XP
        assertions = self.assertTrue

        ### Every thing below here can be run as a script:
        log.debug('Got it')

        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)

        datastore_name = 'dm_test_replay_integration'

        producer = Publisher(name=(XP,'stream producer'))

        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id=XP,
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            hdf_storage=HdfStorage(),
            number_of_workers=1
        )

        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id
        )

        definition = SBE37_CDM_stream_definition()
        data_stream_id = definition.data_stream_id
        encoding_id = definition.identifiables[data_stream_id].encoding_id
        element_count_id = definition.identifiables[data_stream_id].element_count_id

        stream_def_id = pubsub_management_service.create_stream_definition(
            container=definition
        )
        stream_id = pubsub_management_service.create_stream(
            stream_definition_id=stream_def_id
        )

        dataset_id = dataset_management_service.create_dataset(
            stream_id=stream_id,
            datastore_name=datastore_name,
            view_name='datasets/dataset_by_id'
        )
        ingestion_management_service.create_dataset_configuration(
            dataset_id=dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id = ingestion_configuration_id
        )
        definition.stream_resource_id = stream_id

        packet = _create_packet(definition)
        input_file = FileSystem.mktemp()
        input_file.write(packet.identifiables[data_stream_id].values)
        input_file_path = input_file.name
        input_file.close()

        fields=[
            'conductivity',
            'height',
            'latitude',
            'longitude',
            'pressure',
            'temperature',
            'time'
        ]

        input_vectors = acquire_data([input_file_path],fields , 2).next()

        producer.publish(msg=packet, to_name=(XP,'%s.data' % stream_id))

        replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id)
        ar = gevent.event.AsyncResult()
        def sub_listen(msg, headers):

            assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.')
            hdf_string = msg.identifiables[data_stream_id].values
            sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
            assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.')
            assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value)
            output_file = FileSystem.mktemp()
            output_file.write(msg.identifiables[data_stream_id].values)
            output_file_path = output_file.name
            output_file.close()
            output_vectors = acquire_data([output_file_path],fields,2).next()
            for field in fields:
                comparison = (input_vectors[field]['values']==output_vectors[field]['values'])
                assertions(comparison.all(), 'vector mismatch: %s vs %s' %
                                             (input_vectors[field]['values'],output_vectors[field]['values']))
            FileSystem.unlink(output_file_path)
            ar.set(True)

        subscriber = Subscriber(name=(XP,'replay listener'),callback=sub_listen)

        g = gevent.Greenlet(subscriber.listen, binding='%s.data' % replay_stream_id)
        g.start()

        data_retriever_service.start_replay(replay_id)

        ar.get(timeout=10)

        FileSystem.unlink(input_file_path)
예제 #12
0
    def test_raw_stream_integration(self):
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there was one stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        raw_ctd_stream_def = SBE37_RAW_stream_definition()
        raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=raw_ctd_stream_def, name='Simulated RAW CTD data')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.raw_stream_publisher',
            'class': 'RawStreamPublisher'
        }

        raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        raw_ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=raw_ctd_stream_def_id)

        # Set up the datasets
        raw_ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=raw_ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=raw_ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': raw_ctd_stream_id,
            }
        }
        raw_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=raw_ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        raw_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                raw_ctd_stream_id,
            ]),
            exchange_name='raw_test',
            name="test raw subscription",
        )

        # this is okay - even in cei mode!
        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Raw data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='raw_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=raw_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            raw_sim_pid
        )  # kill the ctd simulator process - that is enough data

        gevent.sleep(1)

        for message in results:

            sha1 = message.identifiables['stream_encoding'].sha1

            data = message.identifiables['data_stream'].values

            filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw")

            with open(filename, 'r') as f:

                assertions(data == f.read())
class TestIMSDeployAsPrimaryDevice(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        self._start_container()

        # self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        print "started services"

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(node=self.container.node)

    def cleanupprocs(self):
        stm = os.popen("ps -e | grep ion.agents.port.logger_process")
        procs = stm.read()
        if len(procs) > 0:
            procs = procs.split()
            if procs[0].isdigit():
                pid = int(procs[0])
                os.kill(pid, signal.SIGKILL)
        stm = os.popen("ps -e | grep ion.agents.instrument.zmq_driver_process")
        procs = stm.read()
        if len(procs) > 0:
            procs = procs.split()
            if procs[0].isdigit():
                pid = int(procs[0])
                os.kill(pid, signal.SIGKILL)
        stm = os.popen("rm /tmp/*.pid.txt")

    @unittest.skip("timeout on start inst agent?")
    def test_reassignPrimaryDevice(self):

        # ensure no processes or pids are left around by agents or Sims
        self.cleanupprocs()

        # Set up the preconditions
        # Set up the preconditions
        # ingestion configuration parameters
        self.exchange_point_id = "science_data"
        self.number_of_workers = 2
        self.hdf_storage = HdfStorage(relative_path="ingest")
        self.couch_storage = CouchStorage(datastore_name="test_datastore")
        self.XP = "science_data"
        self.exchange_name = "ingestion_queue"

        # -------------------------------
        # Create ingestion configuration and activate it
        # -------------------------------
        ingestion_configuration_id = self.ingestclient.create_ingestion_configuration(
            exchange_point_id=self.exchange_point_id,
            couch_storage=self.couch_storage,
            hdf_storage=self.hdf_storage,
            number_of_workers=self.number_of_workers,
        )
        print "test_deployAsPrimaryDevice: ingestion_configuration_id", ingestion_configuration_id

        # activate an ingestion configuration
        ret = self.ingestclient.activate_ingestion_configuration(ingestion_configuration_id)

        # -------------------------------
        # Create InstrumentModel
        # -------------------------------
        instModel_obj = IonObject(
            RT.InstrumentModel, name="SBE37IMModel", description="SBE37IMModel", model_label="SBE37IMModel"
        )
        try:
            instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentModel: %s" % ex)
        print "test_deployAsPrimaryDevice: new InstrumentModel id = ", instModel_id

        # -------------------------------
        # Create InstrumentAgent
        # -------------------------------
        instAgent_obj = IonObject(
            RT.InstrumentAgent,
            name="agent007",
            description="SBE37IMAgent",
            driver_module="ion.agents.instrument.instrument_agent",
            driver_class="InstrumentAgent",
        )
        try:
            instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentAgent: %s" % ex)
        print "test_deployAsPrimaryDevice: new InstrumentAgent id = ", instAgent_id

        self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id)

        # -------------------------------
        # Create Instrument Site
        # -------------------------------
        instrumentSite_obj = IonObject(RT.InstrumentSite, name="instrumentSite1", description="SBE37IMInstrumentSite")
        try:
            instrumentSite_id = self.omsclient.create_instrument_site(instrument_site=instrumentSite_obj, parent_id="")
        except BadRequest as ex:
            self.fail("failed to create new InstrumentSite: %s" % ex)
        print "test_deployAsPrimaryDevice: new instrumentSite id = ", instrumentSite_id

        self.omsclient.assign_instrument_model_to_instrument_site(instModel_id, instrumentSite_id)

        # -------------------------------
        # Create Old InstrumentDevice
        # -------------------------------
        instDevice_obj = IonObject(
            RT.InstrumentDevice,
            name="SBE37IMDeviceYear1",
            description="SBE37IMDevice for the FIRST year of deployment",
            serial_number="12345",
        )
        try:
            oldInstDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)
            self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, oldInstDevice_id)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentDevice: %s" % ex)

        print "test_deployAsPrimaryDevice: new Year 1 InstrumentDevice id = ", oldInstDevice_id

        # deploy this device to the logical slot
        self.omsclient.deploy_instrument_device_to_instrument_site(oldInstDevice_id, instrumentSite_id)

        self.rrclient.execute_lifecycle_transition(oldInstDevice_id, LCE.DEPLOY)
        self.rrclient.execute_lifecycle_transition(oldInstDevice_id, LCE.ENABLE)

        # set this device as the current primary device
        self.omsclient.deploy_as_primary_instrument_device_to_instrument_site(oldInstDevice_id, instrumentSite_id)

        # -------------------------------
        # Create InstrumentAgentInstance for OldInstrumentDevice to hold configuration information
        # cmd_port=5556, evt_port=5557, comms_method="ethernet", comms_device_address=CFG.device.sbe37.host, comms_device_port=CFG.device.sbe37.port,
        # -------------------------------
        instAgentInstance_obj = IonObject(
            RT.InstrumentAgentInstance,
            name="SBE37IMAgentInstanceYear1",
            description="SBE37IMAgentInstance Year 1",
            svr_addr="localhost",
            driver_module="ion.agents.instrument.drivers.sbe37.sbe37_driver",
            driver_class="SBE37Driver",
            cmd_port=5556,
            evt_port=5557,
            comms_method="ethernet",
            comms_device_address="localhost",
            comms_device_port=4001,
            comms_server_address="localhost",
            comms_server_port=8888,
        )
        oldInstAgentInstance_id = self.imsclient.create_instrument_agent_instance(
            instAgentInstance_obj, instAgent_id, oldInstDevice_id
        )

        # -------------------------------
        # Create CTD Parsed as the Year 1 data product
        # -------------------------------
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def)

        print "test_deployAsPrimaryDevice: new Stream Definition id = ", ctd_stream_def_id

        print "Creating new CDM data product with a stream definition"
        dp_obj = IonObject(RT.DataProduct, name="ctd_parsed_year1", description="ctd stream test year 1")
        try:
            ctd_parsed_data_product_year1 = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id)
        except BadRequest as ex:
            self.fail("failed to create new data product: %s" % ex)
        print "new ctd_parsed_data_product_id = ", ctd_parsed_data_product_year1

        self.damsclient.assign_data_product(
            input_resource_id=oldInstDevice_id, data_product_id=ctd_parsed_data_product_year1
        )

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_year1, PRED.hasStream, None, True)
        print "test_deployAsPrimaryDevice: Data product streams1 = ", stream_ids

        # -------------------------------
        # Create New InstrumentDevice
        # -------------------------------
        instDevice_obj_2 = IonObject(
            RT.InstrumentDevice,
            name="SBE37IMDeviceYear2",
            description="SBE37IMDevice for the SECOND year of deployment",
            serial_number="67890",
        )
        try:
            newInstDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj_2)
            self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, newInstDevice_id)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentDevice: %s" % ex)

        print "test_deployAsPrimaryDevice: new  Year 2 InstrumentDevice id = ", newInstDevice_id

        # deploy this device to the logical slot
        self.omsclient.deploy_instrument_device_to_instrument_site(newInstDevice_id, instrumentSite_id)

        # set the LCSTATE
        self.rrclient.execute_lifecycle_transition(newInstDevice_id, LCE.DEPLOY)
        self.rrclient.execute_lifecycle_transition(newInstDevice_id, LCE.ENABLE)

        instDevice_obj_2 = self.rrclient.read(newInstDevice_id)
        log.debug("test_deployAsPrimaryDevice: Create New InstrumentDevice LCSTATE: %s ", str(instDevice_obj_2.lcstate))

        # -------------------------------
        # Create InstrumentAgentInstance for NewInstrumentDevice to hold configuration information
        # -------------------------------
        instAgentInstance_new__obj = IonObject(
            RT.InstrumentAgentInstance,
            name="SBE37IMAgentInstanceYear2",
            description="SBE37IMAgentInstance Year 2",
            svr_addr="localhost",
            driver_module="ion.agents.instrument.drivers.sbe37.sbe37_driver",
            driver_class="SBE37Driver",
            cmd_port=5556,
            evt_port=5557,
            comms_method="ethernet",
            comms_device_address="localhost",
            comms_device_port=4002,
            comms_server_address="localhost",
            comms_server_port=8888,
        )
        newInstAgentInstance_id = self.imsclient.create_instrument_agent_instance(
            instAgentInstance_new__obj, instAgent_id, newInstDevice_id
        )

        # -------------------------------
        # Create CTD Parsed as the Year 2 data product
        # -------------------------------
        # create a stream definition for the data from the ctd simulator
        #        ctd_stream_def = SBE37_CDM_stream_definition()
        #        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def)

        print "test_deployAsPrimaryDevice: new Stream Definition id = ", ctd_stream_def_id

        print "Creating new CDM data product with a stream definition"
        dp_obj = IonObject(RT.DataProduct, name="ctd_parsed_year2", description="ctd stream test year 2")
        try:
            ctd_parsed_data_product_year2 = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id)
        except BadRequest as ex:
            self.fail("failed to create new data product: %s" % ex)
        print "new ctd_parsed_data_product_id = ", ctd_parsed_data_product_year2

        self.damsclient.assign_data_product(
            input_resource_id=newInstDevice_id, data_product_id=ctd_parsed_data_product_year2
        )

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_year2, PRED.hasStream, None, True)
        print "test_deployAsPrimaryDevice: Data product streams2 = ", stream_ids

        # -------------------------------
        # Logical Data Product: Data Process Definition
        # -------------------------------
        #        log.debug(" test_deployAsPrimaryDevice: create data process definition logical_transform")
        #        dpd_obj = IonObject(RT.DataProcessDefinition,
        #                            name='logical_transform',
        #                            description='send the packet from the in stream to the out stream unchanged',
        #                            module='ion.processes.data.transforms.logical_transform',
        #                            class_name='logical_transform',
        #                            process_source='some_source_reference')
        #        try:
        #            logical_transform_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        #        except BadRequest as ex:
        #            self.fail("failed to create new ctd_L0_all data process definition: %s" %ex)

        # -------------------------------
        # L0 Conductivity - Temperature - Pressure: Data Process Definition
        # -------------------------------
        log.debug("test_deployAsPrimaryDevice: create data process definition ctd_L0_all")
        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name="ctd_L0_all",
            description="transform ctd package into three separate L0 streams",
            module="ion.processes.data.transforms.ctd.ctd_L0_all",
            class_name="ctd_L0_all",
            process_source="some_source_reference",
        )
        try:
            ctd_L0_all_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except BadRequest as ex:
            self.fail("failed to create new ctd_L0_all data process definition: %s" % ex)

        # -------------------------------
        # Logical Transform: Output Data Products
        # -------------------------------
        #        outgoing_logical_stream_def = SBE37_CDM_stream_definition()
        #        outgoing_logical_stream_def_id = self.pubsubclient.create_stream_definition(container=outgoing_logical_stream_def)
        #        self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_logical_stream_def_id, logical_transform_dprocdef_id )
        #
        #        log.debug("test_deployAsPrimaryDevice: create output parsed data product for Logical Instrument")
        #        ctd_logical_output_dp_obj = IonObject(RT.DataProduct, name='ctd_parsed_logical',description='ctd parsed from the logical instrument')
        #        instrument_site_output_dp_id = self.dataproductclient.create_data_product(ctd_logical_output_dp_obj, outgoing_logical_stream_def_id)
        #        self.dataproductclient.activate_data_product_persistence(data_product_id=instrument_site_output_dp_id, persist_data=True, persist_metadata=True)

        # -------------------------------
        # L0 Conductivity - Temperature - Pressure: Output Data Products
        # -------------------------------

        outgoing_stream_l0_conductivity = L0_conductivity_stream_definition()
        outgoing_stream_l0_conductivity_id = self.pubsubclient.create_stream_definition(
            container=outgoing_stream_l0_conductivity, name="L0_Conductivity"
        )
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            outgoing_stream_l0_conductivity_id, ctd_L0_all_dprocdef_id
        )

        outgoing_stream_l0_pressure = L0_pressure_stream_definition()
        outgoing_stream_l0_pressure_id = self.pubsubclient.create_stream_definition(
            container=outgoing_stream_l0_pressure, name="L0_Pressure"
        )
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            outgoing_stream_l0_pressure_id, ctd_L0_all_dprocdef_id
        )

        outgoing_stream_l0_temperature = L0_temperature_stream_definition()
        outgoing_stream_l0_temperature_id = self.pubsubclient.create_stream_definition(
            container=outgoing_stream_l0_temperature, name="L0_Temperature"
        )
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            outgoing_stream_l0_temperature_id, ctd_L0_all_dprocdef_id
        )

        self.output_products = {}
        log.debug("test_deployAsPrimaryDevice: create output data product L0 conductivity")
        ctd_l0_conductivity_output_dp_obj = IonObject(
            RT.DataProduct, name="L0_Conductivity", description="transform output conductivity"
        )
        ctd_l0_conductivity_output_dp_id = self.dataproductclient.create_data_product(
            ctd_l0_conductivity_output_dp_obj, outgoing_stream_l0_conductivity_id
        )
        self.output_products["conductivity"] = ctd_l0_conductivity_output_dp_id
        # self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_conductivity_output_dp_id, persist_data=True, persist_metadata=True)

        log.debug("test_deployAsPrimaryDevice: create output data product L0 pressure")
        ctd_l0_pressure_output_dp_obj = IonObject(
            RT.DataProduct, name="L0_Pressure", description="transform output pressure"
        )
        ctd_l0_pressure_output_dp_id = self.dataproductclient.create_data_product(
            ctd_l0_pressure_output_dp_obj, outgoing_stream_l0_pressure_id
        )
        self.output_products["pressure"] = ctd_l0_pressure_output_dp_id
        # self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_pressure_output_dp_id, persist_data=True, persist_metadata=True)

        log.debug("test_deployAsPrimaryDevice: create output data product L0 temperature")
        ctd_l0_temperature_output_dp_obj = IonObject(
            RT.DataProduct, name="L0_Temperature", description="transform output temperature"
        )
        ctd_l0_temperature_output_dp_id = self.dataproductclient.create_data_product(
            ctd_l0_temperature_output_dp_obj, outgoing_stream_l0_temperature_id
        )
        self.output_products["temperature"] = ctd_l0_temperature_output_dp_id
        # self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_temperature_output_dp_id, persist_data=True, persist_metadata=True)

        # -------------------------------
        # CTD Logical: Create the data process
        # -------------------------------
        #        log.debug("test_deployAsPrimaryDevice: create ctd_parsed logical  data_process start")
        #        try:
        #            ctd_parsed_logical_data_process_id = self.dataprocessclient.create_data_process(logical_transform_dprocdef_id, ctd_parsed_data_product_year1, {'output':instrument_site_output_dp_id})
        #            self.dataprocessclient.activate_data_process(ctd_parsed_logical_data_process_id)
        #        except BadRequest as ex:
        #            self.fail("failed to create new data process: %s" %ex)
        #        log.debug("test_deployAsPrimaryDevice: create L0 all data_process return")

        # -------------------------------
        # L0 Conductivity - Temperature - Pressure: Create the data process, listening to  Sim1   (later: logical instrument output product)
        # -------------------------------
        log.debug("test_deployAsPrimaryDevice: create L0 all data_process start")
        try:
            ctd_l0_all_data_process_id = self.dataprocessclient.create_data_process(
                ctd_L0_all_dprocdef_id, [ctd_parsed_data_product_year1], self.output_products
            )
            self.dataprocessclient.activate_data_process(ctd_l0_all_data_process_id)
        except BadRequest as ex:
            self.fail("failed to create new data process: %s" % ex)
        log.debug("test_deployAsPrimaryDevice: create L0 all data_process return")

        # -------------------------------
        # Launch InstrumentAgentInstance Sim1, connect to the resource agent client
        # -------------------------------
        self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=oldInstAgentInstance_id)

        inst_agent1_instance_obj = self.imsclient.read_instrument_agent_instance(oldInstAgentInstance_id)
        print "test_deployAsPrimaryDevice: Instrument agent instance obj: = ", inst_agent1_instance_obj

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client_sim1 = ResourceAgentClient(
            "iaclient Sim1", name=inst_agent1_instance_obj.agent_process_id, process=FakeProcess()
        )
        print "activate_instrument: got _ia_client_sim1 %s", self._ia_client_sim1
        log.debug(" test_deployAsPrimaryDevice:: got _ia_client_sim1 %s", str(self._ia_client_sim1))

        # -------------------------------
        # Launch InstrumentAgentInstance Sim2, connect to the resource agent client
        # -------------------------------
        self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=newInstAgentInstance_id)

        inst_agent2_instance_obj = self.imsclient.read_instrument_agent_instance(newInstAgentInstance_id)
        print "test_deployAsPrimaryDevice: Instrument agent instance obj: = ", inst_agent2_instance_obj

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client_sim2 = ResourceAgentClient(
            "iaclient Sim2", name=inst_agent2_instance_obj.agent_process_id, process=FakeProcess()
        )
        print "activate_instrument: got _ia_client_sim2 %s", self._ia_client_sim2
        log.debug(" test_deployAsPrimaryDevice:: got _ia_client_sim2 %s", str(self._ia_client_sim2))

        # -------------------------------
        # Streaming Sim1 (old instrument)
        # -------------------------------

        cmd = AgentCommand(command="initialize")
        retval = self._ia_client_sim1.execute_agent(cmd)
        print retval
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 initialize %s", str(retval))

        time.sleep(2)

        cmd = AgentCommand(command="go_active")
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 go_active %s", str(reply))
        time.sleep(2)

        cmd = AgentCommand(command="run")
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 run %s", str(reply))
        time.sleep(2)

        log.debug("test_activateInstrument: calling go_streaming ")
        cmd = AgentCommand(command="go_streaming")
        reply = self._ia_client_sim1.execute(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 go_streaming %s", str(reply))

        # -------------------------------
        # Streaming Sim 2 (new instrument)
        # -------------------------------

        cmd = AgentCommand(command="initialize")
        retval = self._ia_client_sim2.execute_agent(cmd)
        print retval
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 initialize %s", str(retval))

        time.sleep(2)

        cmd = AgentCommand(command="go_active")
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 go_active %s", str(reply))
        time.sleep(2)

        cmd = AgentCommand(command="run")
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 run %s", str(reply))
        time.sleep(2)

        log.debug("test_activateInstrument: calling go_streaming ")
        cmd = AgentCommand(command="go_streaming")
        reply = self._ia_client_sim2.execute(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 go_streaming %s", str(reply))

        time.sleep(20)

        # -------------------------------
        # Shutdown Sim1 (old instrument)
        # -------------------------------
        log.debug("test_activateInstrument: calling go_observatory")
        cmd = AgentCommand(command="go_observatory")
        reply = self._ia_client_sim1.execute(cmd)
        log.debug("test_activateInstrument: _ia_client_sim1 return from go_observatory   %s", str(reply))
        time.sleep(5)

        log.debug("test_deployAsPrimaryDevice:: calling go_inactive ")
        cmd = AgentCommand(command="go_inactive")
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 return from go_inactive %s", str(reply))
        time.sleep(2)

        log.debug("test_deployAsPrimaryDevice:: calling reset ")
        cmd = AgentCommand(command="reset")
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 return from reset %s", str(reply))
        time.sleep(2)

        # -------------------------------
        # Shutdown Sim2 (old instrument)
        # -------------------------------
        log.debug("test_activateInstrument: calling go_observatory")
        cmd = AgentCommand(command="go_observatory")
        reply = self._ia_client_sim2.execute(cmd)
        log.debug("test_activateInstrument: _ia_client_sim2 return from go_observatory   %s", str(reply))
        time.sleep(8)

        log.debug("test_deployAsPrimaryDevice:: calling go_inactive ")
        cmd = AgentCommand(command="go_inactive")
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 return from go_inactive %s", str(reply))
        time.sleep(2)

        log.debug("test_deployAsPrimaryDevice:: calling reset ")
        cmd = AgentCommand(command="reset")
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 return from reset %s", str(reply))
        time.sleep(2)

        log.debug("test_deployAsPrimaryDevice:: calling reset ")
        cmd = AgentCommand(command="reset")
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 return from reset %s", str(reply))
        time.sleep(2)

        self.imsclient.stop_instrument_agent_instance(instrument_agent_instance_id=oldInstAgentInstance_id)
        self.imsclient.stop_instrument_agent_instance(instrument_agent_instance_id=newInstAgentInstance_id)
    def test_replay_integration(self):
        '''
        test_replay_integration
        '''
        import numpy as np
        # Keep the import it's used in the vector comparison below even though pycharm says its unused.

        cc = self.container
        XP = self.XP
        assertions = self.assertTrue

        ### Every thing below here can be run as a script:
        log.debug('Got it')

        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)

        datastore_name = 'dm_test_replay_integration'

        producer = Publisher(name=(XP, 'stream producer'))

        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id=XP,
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            hdf_storage=HdfStorage(),
            number_of_workers=1)

        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        definition = SBE37_CDM_stream_definition()
        data_stream_id = definition.data_stream_id
        encoding_id = definition.identifiables[data_stream_id].encoding_id
        element_count_id = definition.identifiables[
            data_stream_id].element_count_id

        stream_def_id = pubsub_management_service.create_stream_definition(
            container=definition)
        stream_id = pubsub_management_service.create_stream(
            stream_definition_id=stream_def_id)

        dataset_id = dataset_management_service.create_dataset(
            stream_id=stream_id,
            datastore_name=datastore_name,
            view_name='datasets/dataset_by_id')
        ingestion_management_service.create_dataset_configuration(
            dataset_id=dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=ingestion_configuration_id)
        definition.stream_resource_id = stream_id

        packet = _create_packet(definition)
        input_file = FileSystem.mktemp()
        input_file.write(packet.identifiables[data_stream_id].values)
        input_file_path = input_file.name
        input_file.close()

        fields = [
            'conductivity', 'height', 'latitude', 'longitude', 'pressure',
            'temperature', 'time'
        ]

        input_vectors = acquire_data([input_file_path], fields, 2).next()

        producer.publish(msg=packet, to_name=(XP, '%s.data' % stream_id))

        replay_id, replay_stream_id = data_retriever_service.define_replay(
            dataset_id)
        ar = gevent.event.AsyncResult()

        def sub_listen(msg, headers):

            assertions(isinstance(msg, StreamGranuleContainer),
                       'replayed message is not a granule.')
            hdf_string = msg.identifiables[data_stream_id].values
            sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
            assertions(sha1 == msg.identifiables[encoding_id].sha1,
                       'Checksum failed.')
            assertions(
                msg.identifiables[element_count_id].value == 1,
                'record replay count is incorrect %d.' %
                msg.identifiables[element_count_id].value)
            output_file = FileSystem.mktemp()
            output_file.write(msg.identifiables[data_stream_id].values)
            output_file_path = output_file.name
            output_file.close()
            output_vectors = acquire_data([output_file_path], fields, 2).next()
            for field in fields:
                comparison = (input_vectors[field]['values'] ==
                              output_vectors[field]['values'])
                assertions(
                    comparison.all(), 'vector mismatch: %s vs %s' %
                    (input_vectors[field]['values'],
                     output_vectors[field]['values']))
            FileSystem.unlink(output_file_path)
            ar.set(True)

        subscriber = Subscriber(name=(XP, 'replay listener'),
                                callback=sub_listen)

        g = gevent.Greenlet(subscriber.listen,
                            binding='%s.data' % replay_stream_id)
        g.start()

        data_retriever_service.start_replay(replay_id)

        ar.get(timeout=10)

        FileSystem.unlink(input_file_path)
class TestIMSDeployAsPrimaryDevice(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        self._start_container()

        #self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        print 'started services'

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(node=self.container.node)


    def cleanupprocs(self):
       stm = os.popen('ps -e | grep ion.agents.port.logger_process')
       procs = stm.read()
       if len(procs) > 0:
           procs = procs.split()
           if procs[0].isdigit():
               pid = int(procs[0])
               os.kill(pid,signal.SIGKILL)
       stm = os.popen('ps -e | grep ion.agents.instrument.zmq_driver_process')
       procs = stm.read()
       if len(procs) > 0:
           procs = procs.split()
           if procs[0].isdigit():
               pid = int(procs[0])
               os.kill(pid,signal.SIGKILL)
#       stm = os.popen('rm /tmp/*.pid.txt')


    @unittest.skip ("Deprecated by IngestionManagement refactor, timeout on start inst agent?")
    def test_deploy_activate_full(self):

        # ensure no processes or pids are left around by agents or Sims
        self.cleanupprocs()

        # Set up the preconditions
        # Set up the preconditions
        # ingestion configuration parameters
        exchange_point_id = 'science_data'
        exchange_name = 'ingestion_queue'

        #-------------------------------
        # Create ingestion configuration and activate it
        #-------------------------------
        q = IngestionQueue()
        ingestion_configuration_id = self.ingestclient.create_ingestion_configuration(
            name=exchange_name,
            exchange_point_id=exchange_point_id,
            queues=[q])

        print 'test_deployAsPrimaryDevice: ingestion_configuration_id', ingestion_configuration_id

        # activate an ingestion configuration
        # = self.ingestclient.activate_ingestion_configuration(ingestion_configuration_id)

        #-------------------------------
        # Create InstrumentModel
        #-------------------------------
        instModel_obj = IonObject(RT.InstrumentModel, name='SBE37IMModel', description="SBE37IMModel", model="SBE37IMModel" )
        try:
            instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentModel: %s" %ex)
        print 'test_deployAsPrimaryDevice: new InstrumentModel id = ', instModel_id


        #-------------------------------
        # Create InstrumentAgent
        #-------------------------------
        instAgent_obj = IonObject(RT.InstrumentAgent, name='agent007', description="SBE37IMAgent", driver_module="ion.agents.instrument.instrument_agent", driver_class="InstrumentAgent" )
        try:
            instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentAgent: %s" %ex)
        print 'test_deployAsPrimaryDevice: new InstrumentAgent id = ', instAgent_id

        self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id)



        #-------------------------------
        # Create Instrument Site
        #-------------------------------
        instrumentSite_obj = IonObject(RT.InstrumentSite, name='instrumentSite1', description="SBE37IMInstrumentSite" )
        try:
            instrumentSite_id = self.omsclient.create_instrument_site(instrument_site=instrumentSite_obj, parent_id='')
        except BadRequest as ex:
            self.fail("failed to create new InstrumentSite: %s" %ex)
        print 'test_deployAsPrimaryDevice: new instrumentSite id = ', instrumentSite_id

        self.omsclient.assign_instrument_model_to_instrument_site(instModel_id, instrumentSite_id)



        #-------------------------------
        # Logical Transform: Output Data Products
        #-------------------------------
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def)

        log.debug("test_deployAsPrimaryDevice: create output parsed data product for Logical Instrument")

        craft = CoverageCraft
        sdom, tdom = craft.create_domains()
        sdom = sdom.dump()
        tdom = tdom.dump()
        parameter_dictionary = craft.create_parameters()
        parameter_dictionary = parameter_dictionary.dump()

        ctd_logical_output_dp_obj = IonObject(RT.DataProduct,
            name='ctd_parsed_logical',
            description='ctd parsed from the logical instrument',
            temporal_domain = tdom,
            spatial_domain = sdom)

        instrument_site_output_dp_id = self.dataproductclient.create_data_product(ctd_logical_output_dp_obj,
                                                                                  ctd_stream_def_id,
                                                                                    parameter_dictionary)
        self.dataproductclient.activate_data_product_persistence(data_product_id=instrument_site_output_dp_id)

        self.omsclient.create_site_data_product(instrumentSite_id, instrument_site_output_dp_id)

        #-------------------------------
        # Create Old InstrumentDevice
        #-------------------------------
        instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDeviceYear1',
                                   description="SBE37IMDevice for the FIRST year of deployment",
                                   serial_number="12345" )
        try:
            oldInstDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)
            self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, oldInstDevice_id)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentDevice: %s" %ex)

        print 'test_deployAsPrimaryDevice: new Year 1 InstrumentDevice id = ', oldInstDevice_id

        self.rrclient.execute_lifecycle_transition(oldInstDevice_id, LCE.DEPLOY)
        self.rrclient.execute_lifecycle_transition(oldInstDevice_id, LCE.ENABLE)


        #-------------------------------
        # Create Old Deployment
        #-------------------------------
        deployment_obj = IonObject(RT.Deployment, name='first deployment')

        oldDeployment_id = self.omsclient.create_deployment(deployment_obj)

        # deploy this device to the logical slot
        self.imsclient.deploy_instrument_device(oldInstDevice_id, oldDeployment_id)
        self.omsclient.deploy_instrument_site(instrumentSite_id, oldDeployment_id)



        #-------------------------------
        # Create InstrumentAgentInstance for OldInstrumentDevice to hold configuration information
        # cmd_port=5556, evt_port=5557, comms_method="ethernet", comms_device_address=CFG.device.sbe37.host, comms_device_port=CFG.device.sbe37.port,
        #-------------------------------
        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance,
                                          name='SBE37IMAgentInstanceYear1',
                                          description="SBE37IMAgentInstance Year 1",
                                          svr_addr="localhost",
                                          driver_module="mi.instrument.seabird.sbe37smb.ooicore.driver",
                                          driver_class="SBE37Driver",
                                          cmd_port=5556,
                                          evt_port=5557,
                                          comms_method="ethernet",
                                          comms_device_address="localhost",
                                          comms_device_port=4001,
                                          comms_server_address="localhost",
                                          comms_server_port=8888)
        oldInstAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj,
                                                                                  instAgent_id,
                                                                                  oldInstDevice_id)



        #-------------------------------
        # Create CTD Parsed as the Year 1 data product and attach to instrument
        #-------------------------------


        print 'test_deployAsPrimaryDevice: new Stream Definition id = ', ctd_stream_def_id

        print 'Creating new CDM data product with a stream definition'

        dp_obj = IonObject(RT.DataProduct,
            name='ctd_parsed_year1',
            description='ctd stream test year 1',
            temporal_domain = tdom,
            spatial_domain = sdom)

        ctd_parsed_data_product_year1 = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary)

        print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product_year1

        self.damsclient.assign_data_product(input_resource_id=oldInstDevice_id,
                                            data_product_id=ctd_parsed_data_product_year1)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_year1, PRED.hasStream, None, True)
        print 'test_deployAsPrimaryDevice: Data product streams1 = ', stream_ids

        #-------------------------------
        # Create New InstrumentDevice
        #-------------------------------
        instDevice_obj_2 = IonObject(RT.InstrumentDevice,
                                     name='SBE37IMDeviceYear2',
                                     description="SBE37IMDevice for the SECOND year of deployment",
                                     serial_number="67890" )
        try:
            newInstDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj_2)
            self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, newInstDevice_id)
        except BadRequest as ex:
            self.fail("failed to create new InstrumentDevice: %s" %ex)

        print 'test_deployAsPrimaryDevice: new  Year 2 InstrumentDevice id = ', newInstDevice_id

        #set the LCSTATE
        self.rrclient.execute_lifecycle_transition(newInstDevice_id, LCE.DEPLOY)
        self.rrclient.execute_lifecycle_transition(newInstDevice_id, LCE.ENABLE)

        instDevice_obj_2 = self.rrclient.read(newInstDevice_id)
        log.debug("test_deployAsPrimaryDevice: Create New InstrumentDevice LCSTATE: %s ",
                  str(instDevice_obj_2.lcstate))




        #-------------------------------
        # Create Old Deployment
        #-------------------------------
        deployment_obj = IonObject(RT.Deployment, name='second deployment')

        newDeployment_id = self.omsclient.create_deployment(deployment_obj)

        # deploy this device to the logical slot
        self.imsclient.deploy_instrument_device(newInstDevice_id, newDeployment_id)
        self.omsclient.deploy_instrument_site(instrumentSite_id, newDeployment_id)


        #-------------------------------
        # Create InstrumentAgentInstance for NewInstrumentDevice to hold configuration information
        #-------------------------------
        instAgentInstance_new__obj = IonObject(RT.InstrumentAgentInstance,
                                               name='SBE37IMAgentInstanceYear2',
                                               description="SBE37IMAgentInstance Year 2",
                                               svr_addr="localhost",
                                               driver_module="mi.instrument.seabird.sbe37smb.ooicore.driver",
                                               driver_class="SBE37Driver",
                                               cmd_port=5556, evt_port=5557,
                                               comms_method="ethernet",
                                               comms_device_address="localhost",
                                               comms_device_port=4002,
                                               comms_server_address="localhost",
                                               comms_server_port=8888)
        newInstAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_new__obj,
                                                                                  instAgent_id,
                                                                                  newInstDevice_id)


        #-------------------------------
        # Create CTD Parsed as the Year 2 data product
        #-------------------------------
        # create a stream definition for the data from the ctd simulator
#        ctd_stream_def = SBE37_CDM_stream_definition()
#        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def)

        print 'test_deployAsPrimaryDevice: new Stream Definition id = ', ctd_stream_def_id

        print 'Creating new CDM data product with a stream definition'

        dp_obj = IonObject(RT.DataProduct,
            name='ctd_parsed_year2',
            description='ctd stream test year 2',
            temporal_domain = tdom,
            spatial_domain = sdom)

        ctd_parsed_data_product_year2 = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary)

        print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product_year2

        self.damsclient.assign_data_product(input_resource_id=newInstDevice_id,
                                            data_product_id=ctd_parsed_data_product_year2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_year2, PRED.hasStream, None, True)
        print 'test_deployAsPrimaryDevice: Data product streams2 = ', stream_ids


        #-------------------------------
        # L0 Conductivity - Temperature - Pressure: Data Process Definition
        #-------------------------------
        log.debug("test_deployAsPrimaryDevice: create data process definition ctd_L0_all")
        dpd_obj = IonObject(RT.DataProcessDefinition,
                            name='ctd_L0_all',
                            description='transform ctd package into three separate L0 streams',
                            module='ion.processes.data.transforms.ctd.ctd_L0_all',
                            class_name='ctd_L0_all',
                            process_source='some_source_reference')
        try:
            ctd_L0_all_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except BadRequest as ex:
            self.fail("failed to create new ctd_L0_all data process definition: %s" %ex)


        #-------------------------------
        # L0 Conductivity - Temperature - Pressure: Output Data Products
        #-------------------------------

        outgoing_stream_l0_conductivity = L0_conductivity_stream_definition()
        outgoing_stream_l0_conductivity_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_conductivity, name='L0_Conductivity')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_conductivity_id, ctd_L0_all_dprocdef_id )

        outgoing_stream_l0_pressure = L0_pressure_stream_definition()
        outgoing_stream_l0_pressure_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_pressure, name='L0_Pressure')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_pressure_id, ctd_L0_all_dprocdef_id )

        outgoing_stream_l0_temperature = L0_temperature_stream_definition()
        outgoing_stream_l0_temperature_id = self.pubsubclient.create_stream_definition(container=outgoing_stream_l0_temperature, name='L0_Temperature')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(outgoing_stream_l0_temperature_id, ctd_L0_all_dprocdef_id )


        self.output_products={}
        log.debug("test_deployAsPrimaryDevice: create output data product L0 conductivity")

        ctd_l0_conductivity_output_dp_obj = IonObject(RT.DataProduct,
            name='L0_Conductivity',
            description='transform output conductivity',
            temporal_domain = tdom,
            spatial_domain = sdom)

        ctd_l0_conductivity_output_dp_id = self.dataproductclient.create_data_product(  ctd_l0_conductivity_output_dp_obj,
                                                                                        outgoing_stream_l0_conductivity_id,
                                                                                        parameter_dictionary)
        self.output_products['conductivity'] = ctd_l0_conductivity_output_dp_id
        #self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_conductivity_output_dp_id)


        log.debug("test_deployAsPrimaryDevice: create output data product L0 pressure")

        ctd_l0_pressure_output_dp_obj = IonObject(  RT.DataProduct,
                                                    name='L0_Pressure',
                                                    description='transform output pressure',
                                                    temporal_domain = tdom,
                                                    spatial_domain = sdom)

        ctd_l0_pressure_output_dp_id = self.dataproductclient.create_data_product(  ctd_l0_pressure_output_dp_obj,
                                                                                    outgoing_stream_l0_pressure_id,
                                                                                    parameter_dictionary )
        self.output_products['pressure'] = ctd_l0_pressure_output_dp_id
        #self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_pressure_output_dp_id)

        log.debug("test_deployAsPrimaryDevice: create output data product L0 temperature")

        ctd_l0_temperature_output_dp_obj = IonObject(   RT.DataProduct,
                                                        name='L0_Temperature',
                                                        description='transform output temperature',
                                                        temporal_domain = tdom,
                                                        spatial_domain = sdom)

        ctd_l0_temperature_output_dp_id = self.dataproductclient.create_data_product(   ctd_l0_temperature_output_dp_obj,
                                                                                        outgoing_stream_l0_temperature_id,
                                                                                        parameter_dictionary)
        self.output_products['temperature'] = ctd_l0_temperature_output_dp_id
        #self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l0_temperature_output_dp_id)



        #-------------------------------
        # L0 Conductivity - Temperature - Pressure: Create the data process, listening to  Sim1   (later: logical instrument output product)
        #-------------------------------
        log.debug("test_deployAsPrimaryDevice: create L0 all data_process start")
        try:
            ctd_l0_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L0_all_dprocdef_id, [ctd_parsed_data_product_year1], self.output_products)
            self.dataprocessclient.activate_data_process(ctd_l0_all_data_process_id)
        except BadRequest as ex:
            self.fail("failed to create new data process: %s" %ex)
        log.debug("test_deployAsPrimaryDevice: create L0 all data_process return")

        #--------------------------------
        # Activate the deployment
        #--------------------------------
        self.omsclient.activate_deployment(oldDeployment_id)


        #-------------------------------
        # Launch InstrumentAgentInstance Sim1, connect to the resource agent client
        #-------------------------------
        self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=oldInstAgentInstance_id)

        inst_agent1_instance_obj= self.imsclient.read_instrument_agent_instance(oldInstAgentInstance_id)
        print 'test_deployAsPrimaryDevice: Instrument agent instance obj: = ', inst_agent1_instance_obj

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client_sim1 = ResourceAgentClient('iaclient Sim1', name=inst_agent1_instance_obj.agent_process_id,  process=FakeProcess())
        print 'activate_instrument: got _ia_client_sim1 %s', self._ia_client_sim1
        log.debug(" test_deployAsPrimaryDevice:: got _ia_client_sim1 %s", str(self._ia_client_sim1))


        #-------------------------------
        # Launch InstrumentAgentInstance Sim2, connect to the resource agent client
        #-------------------------------
        self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=newInstAgentInstance_id)

        inst_agent2_instance_obj= self.imsclient.read_instrument_agent_instance(newInstAgentInstance_id)
        print 'test_deployAsPrimaryDevice: Instrument agent instance obj: = ', inst_agent2_instance_obj

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client_sim2 = ResourceAgentClient('iaclient Sim2', name=inst_agent2_instance_obj.agent_process_id,  process=FakeProcess())
        print 'activate_instrument: got _ia_client_sim2 %s', self._ia_client_sim2
        log.debug(" test_deployAsPrimaryDevice:: got _ia_client_sim2 %s", str(self._ia_client_sim2))


        #-------------------------------
        # Streaming Sim1 (old instrument)
        #-------------------------------

        cmd = AgentCommand(command='initialize')
        retval = self._ia_client_sim1.execute_agent(cmd)
        print retval
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 initialize %s", str(retval))

        time.sleep(2)

        cmd = AgentCommand(command='go_active')
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 go_active %s", str(reply))
        time.sleep(2)

        cmd = AgentCommand(command='run')
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 run %s", str(reply))
        time.sleep(2)

        log.debug("test_activateInstrument: calling go_streaming ")
        cmd = AgentCommand(command='go_streaming')
        reply = self._ia_client_sim1.execute(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 go_streaming %s", str(reply))




        #-------------------------------
        # Streaming Sim 2 (new instrument)
        #-------------------------------

        cmd = AgentCommand(command='initialize')
        retval = self._ia_client_sim2.execute_agent(cmd)
        print retval
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 initialize %s", str(retval))

        time.sleep(2)

        cmd = AgentCommand(command='go_active')
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 go_active %s", str(reply))
        time.sleep(2)

        cmd = AgentCommand(command='run')
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 run %s", str(reply))
        time.sleep(2)

        log.debug("test_activateInstrument: calling go_streaming ")
        cmd = AgentCommand(command='go_streaming')
        reply = self._ia_client_sim2.execute(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 go_streaming %s", str(reply))




        time.sleep(20)


        #-------------------------------
        # Shutdown Sim1 (old instrument)
        #-------------------------------
        log.debug("test_activateInstrument: calling go_observatory")
        cmd = AgentCommand(command='go_observatory')
        reply = self._ia_client_sim1.execute(cmd)
        log.debug("test_activateInstrument: _ia_client_sim1 return from go_observatory   %s", str(reply))
        time.sleep(5)


        log.debug("test_deployAsPrimaryDevice:: calling go_inactive ")
        cmd = AgentCommand(command='go_inactive')
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 return from go_inactive %s", str(reply))
        time.sleep(2)

        log.debug("test_deployAsPrimaryDevice:: calling reset ")
        cmd = AgentCommand(command='reset')
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 return from reset %s", str(reply))
        time.sleep(2)


        #-------------------------------
        # Shutdown Sim2 (old instrument)
        #-------------------------------
        log.debug("test_activateInstrument: calling go_observatory")
        cmd = AgentCommand(command='go_observatory')
        reply = self._ia_client_sim2.execute(cmd)
        log.debug("test_activateInstrument: _ia_client_sim2 return from go_observatory   %s", str(reply))
        time.sleep(8)


        log.debug("test_deployAsPrimaryDevice:: calling go_inactive ")
        cmd = AgentCommand(command='go_inactive')
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 return from go_inactive %s", str(reply))
        time.sleep(2)





        log.debug("test_deployAsPrimaryDevice:: calling reset ")
        cmd = AgentCommand(command='reset')
        reply = self._ia_client_sim1.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim1 return from reset %s", str(reply))
        time.sleep(2)

        log.debug("test_deployAsPrimaryDevice:: calling reset ")
        cmd = AgentCommand(command='reset')
        reply = self._ia_client_sim2.execute_agent(cmd)
        log.debug("test_deployAsPrimaryDevice:: _ia_client_sim2 return from reset %s", str(reply))
        time.sleep(2)

        self.imsclient.stop_instrument_agent_instance(instrument_agent_instance_id=oldInstAgentInstance_id)
        self.imsclient.stop_instrument_agent_instance(instrument_agent_instance_id=newInstAgentInstance_id)
    def test_usgs_integration(self):
        '''
        test_usgs_integration
        Test full DM Services Integration using usgs
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here
        #-----------------------------
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        process_list = []
        datasets = []

        datastore_name = 'test_usgs_integration'


        #---------------------------
        # Set up ingestion
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=8
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        usgs_stream_def = USGS_stream_definition()

        stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition')


        #---------------------------
        # Set up the producers (CTD Simulators)
        #---------------------------
        # Launch five simulated CTD producers
        for iteration in xrange(2):
            # Make a stream to output on

            stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)

            #---------------------------
            # Set up the datasets
            #---------------------------
            dataset_id = dataset_management_service.create_dataset(
                stream_id=stream_id,
                datastore_name=datastore_name,
                view_name='datasets/stream_join_granule'
            )
            # Keep track of the datasets
            datasets.append(dataset_id)

            stream_policy_id = ingestion_management_service.create_dataset_configuration(
                dataset_id = dataset_id,
                archive_data = True,
                archive_metadata = True,
                ingestion_configuration_id = ingestion_configuration_id
            )


            producer_definition = ProcessDefinition()
            producer_definition.executable = {
                'module':'eoi.agent.handler.usgs_stream_publisher',
                'class':'UsgsPublisher'
            }
            configuration = {
                'process':{
                    'stream_id':stream_id,
                    }
            }
            procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
            log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
            pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)


            # Keep track, we'll kill 'em later.
            process_list.append(pid)
            # Get about 4 seconds of data
        time.sleep(4)

        #---------------------------
        # Stop producing data
        #---------------------------

        for process in process_list:
            process_dispatcher.cancel_process(process)

        #----------------------------------------------
        # The replay and the transform, a love story.
        #----------------------------------------------
        # Happy Valentines to the clever coder who catches the above!

        transform_definition = ProcessDefinition()
        transform_definition.executable = {
            'module':'ion.processes.data.transforms.transform_example',
            'class':'TransformCapture'
        }
        transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition)

        dataset_id = datasets.pop() # Just need one for now
        replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id)

        #--------------------------------------------
        # I'm Selling magazine subscriptions here!
        #--------------------------------------------

        subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]),
            exchange_name='transform_capture_point')

        #--------------------------------------------
        # Start the transform (capture)
        #--------------------------------------------
        transform_id = transform_management_service.create_transform(
            name='capture_transform',
            in_subscription_id=subscription,
            process_definition_id=transform_definition_id
        )

        transform_management_service.activate_transform(transform_id=transform_id)

        #--------------------------------------------
        # BEGIN REPLAY!
        #--------------------------------------------

        data_retriever_service.start_replay(replay_id=replay_id)

        #--------------------------------------------
        # Lets get some boundaries
        #--------------------------------------------

        bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
예제 #17
0
class IngestionManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.ingestion_management = IngestionManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.ingest_name = 'basic'
        self.exchange = 'testdata'

    @staticmethod
    def clean_subscriptions():
        ingestion_management = IngestionManagementServiceClient()
        pubsub = PubsubManagementServiceClient()
        rr = ResourceRegistryServiceClient()
        ingestion_config_ids = ingestion_management.list_ingestion_configurations(
            id_only=True)
        for ic in ingestion_config_ids:
            subscription_ids, assocs = rr.find_objects(
                subject=ic, predicate=PRED.hasSubscription, id_only=True)
            for subscription_id, assoc in zip(subscription_ids, assocs):
                rr.delete_association(assoc)
                try:
                    pubsub.deactivate_subscription(subscription_id)
                except:
                    log.exception("Unable to decativate subscription: %s",
                                  subscription_id)
                pubsub.delete_subscription(subscription_id)

    def create_ingest_config(self):
        self.queue = IngestionQueue(name='test', type='testdata')

        # Create the ingestion config
        ingestion_config_id = self.ingestion_management.create_ingestion_configuration(
            name=self.ingest_name,
            exchange_point_id=self.exchange,
            queues=[self.queue])
        return ingestion_config_id

    def test_ingestion_config_crud(self):
        ingestion_config_id = self.create_ingest_config()

        ingestion_config = self.ingestion_management.read_ingestion_configuration(
            ingestion_config_id)
        self.assertTrue(ingestion_config.name == self.ingest_name)
        self.assertTrue(ingestion_config.queues[0].name == 'test')
        self.assertTrue(ingestion_config.queues[0].type == 'testdata')

        ingestion_config.name = 'another'

        self.ingestion_management.update_ingestion_configuration(
            ingestion_config)

        # Create an association just to make sure that it will delete them
        sub = Subscription()
        sub_id, _ = self.resource_registry.create(sub)
        assoc_id, _ = self.resource_registry.create_association(
            subject=ingestion_config_id,
            predicate=PRED.hasSubscription,
            object=sub_id)

        self.ingestion_management.delete_ingestion_configuration(
            ingestion_config_id)

        with self.assertRaises(NotFound):
            self.resource_registry.read(assoc_id)

    def test_list_ingestion(self):

        # Create the ingest_config
        config_id = self.create_ingest_config()

        retval = self.ingestion_management.list_ingestion_configurations(
            id_only=True)
        # Nice thing about this is that it breaks if r2dm adds an ingest_config
        self.assertTrue(config_id in retval)
예제 #18
0
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(
            container=SalinityTransform.outgoing_stream_def,
            name='Scalar Salinity data stream')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.ctd_stream_publisher',
            'class': 'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class': 'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=ctd_stream_def_id)

        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------

        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=sal_stream_def_id)

        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=sal_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery(stream_ids=[
                ctd_stream_id,
            ]),
            exchange_name='salinity_transform_input'
        )  # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={
                'output': sal_stream_id,
            },
            process_definition_id=salinity_transform_procdef_id,
            # no configuration needed at this time...
        )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(
            transform_id=sal_transform_id)

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                sal_stream_id,
            ]),
            exchange_name='salinity_test',
            name="test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        for message in results:

            psd = PointSupplementStreamParser(
                stream_definition=SalinityTransform.outgoing_stream_def,
                stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())

            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) >
                       0.0)  # salinity should always be greater than 0
예제 #19
0
    def test_usgs_integration(self):
        '''
        test_usgs_integration
        Test full DM Services Integration using usgs
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here
        #-----------------------------
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        process_list = []
        datasets = []

        datastore_name = 'test_usgs_integration'


        #---------------------------
        # Set up ingestion
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=8
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        usgs_stream_def = USGS_stream_definition()

        stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition')


        #---------------------------
        # Set up the producers (CTD Simulators)
        #---------------------------
        # Launch five simulated CTD producers
        for iteration in xrange(2):
            # Make a stream to output on

            stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)

            #---------------------------
            # Set up the datasets
            #---------------------------
            dataset_id = dataset_management_service.create_dataset(
                stream_id=stream_id,
                datastore_name=datastore_name,
                view_name='datasets/stream_join_granule'
            )
            # Keep track of the datasets
            datasets.append(dataset_id)

            stream_policy_id = ingestion_management_service.create_dataset_configuration(
                dataset_id = dataset_id,
                archive_data = True,
                archive_metadata = True,
                ingestion_configuration_id = ingestion_configuration_id
            )


            producer_definition = ProcessDefinition()
            producer_definition.executable = {
                'module':'ion.agents.eoi.handler.usgs_stream_publisher',
                'class':'UsgsPublisher'
            }
            configuration = {
                'process':{
                    'stream_id':stream_id,
                    }
            }
            procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
            log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
            pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)


            # Keep track, we'll kill 'em later.
            process_list.append(pid)
            # Get about 4 seconds of data
        time.sleep(4)

        #---------------------------
        # Stop producing data
        #---------------------------

        for process in process_list:
            process_dispatcher.cancel_process(process)

        #----------------------------------------------
        # The replay and the transform, a love story.
        #----------------------------------------------
        # Happy Valentines to the clever coder who catches the above!

        transform_definition = ProcessDefinition()
        transform_definition.executable = {
            'module':'ion.processes.data.transforms.transform_example',
            'class':'TransformCapture'
        }
        transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition)

        dataset_id = datasets.pop() # Just need one for now
        replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id)

        #--------------------------------------------
        # I'm Selling magazine subscriptions here!
        #--------------------------------------------

        subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]),
            exchange_name='transform_capture_point')

        #--------------------------------------------
        # Start the transform (capture)
        #--------------------------------------------
        transform_id = transform_management_service.create_transform(
            name='capture_transform',
            in_subscription_id=subscription,
            process_definition_id=transform_definition_id
        )

        transform_management_service.activate_transform(transform_id=transform_id)

        #--------------------------------------------
        # BEGIN REPLAY!
        #--------------------------------------------

        data_retriever_service.start_replay(replay_id=replay_id)

        #--------------------------------------------
        # Lets get some boundaries
        #--------------------------------------------

        bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
예제 #20
0
    def test_raw_stream_integration(self):
        cc = self.container
        assertions = self.assertTrue

        # -----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        # -----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = "test_dm_integration"
        datastore = cc.datastore_manager.get_datastore(datastore_name, profile=DataStore.DS_PROFILE.SCIDATA)

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition(name="Example Data Producer")
        producer_definition.executable = {
            "module": "ion.processes.data.example_data_producer",
            "class": "ExampleDataProducer",
        }

        producer_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)

        # ---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        # ---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug("Calling create_ingestion_configuration")
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id="science_data",
            couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile="SCIDATA"),
            number_of_workers=1,
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id
        )

        # ---------------------------
        # Set up the producer (CTD Simulator)
        # ---------------------------

        # Create the stream
        stream_id = pubsub_management_service.create_stream(name="A data stream")

        # Set up the datasets
        dataset_id = dataset_management_service.create_dataset(
            stream_id=stream_id, datastore_name=datastore_name, view_name="Undefined!"
        )

        # Configure ingestion of this dataset
        dataset_ingest_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto dataset_ingest_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Start the ctd simulator to produce some data
        configuration = {"process": {"stream_id": stream_id}}
        producer_pid = process_dispatcher.schedule_process(
            process_definition_id=producer_procdef_id, configuration=configuration
        )

        found = False
        processes = cc.proc_manager.procs.values()
        for proc in processes:
            if isinstance(proc, IngestionWorker):
                found = True
                break
        self.assertTrue(found, "%s" % cc.proc_manager.procs)

        done = False
        while not done:
            results = datastore.query_view("manifest/by_dataset")
            if len(results) >= 5:
                done = True