Ejemplo n.º 1
0
    def setUp(self):
        # Start container

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)
        self.workflowclient = WorkflowManagementServiceClient(
            node=self.container.node)
        self.process_dispatcher = ProcessDispatcherServiceClient(
            node=self.container.node)

        self.ctd_stream_def = SBE37_CDM_stream_definition()
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()
        #container = Container()
        #print 'starting container'
        #container.start()
        #print 'started container

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        print 'started services'

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.dataset_management = self.datasetclient
Ejemplo n.º 3
0
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(
            node=self.container.node)
        self.dpmsclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(
            node=self.container.node)
        self.process_dispatcher = ProcessDispatcherServiceClient()

        self.dataset_management = DatasetManagementServiceClient()

        # deactivate all data processes when tests are complete
        def killAllDataProcesses():
            for proc_id in self.rrclient.find_resources(
                    RT.DataProcess, None, None, True)[0]:
                self.dataprocessclient.deactivate_data_process(proc_id)
                self.dataprocessclient.delete_data_process(proc_id)

        self.addCleanup(killAllDataProcesses)
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.dpmsclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(node=self.container.node)
        self.process_dispatcher   = ProcessDispatcherServiceClient()

        self.dataset_management = DatasetManagementServiceClient()

        # create missing data process definition
        dpd_obj = IonObject(RT.DataProcessDefinition,
                            name=LOGICAL_TRANSFORM_DEFINITION_NAME,
                            description="normally in preload",
                            module='ion.processes.data.transforms.logical_transform',
                            class_name='logical_transform')
        self.dataprocessclient.create_data_process_definition(dpd_obj)

        # deactivate all data processes when tests are complete
        def killAllDataProcesses():
            for proc_id in self.rrclient.find_resources(RT.DataProcess, None, None, True)[0]:
                self.dataprocessclient.deactivate_data_process(proc_id)
                self.dataprocessclient.delete_data_process(proc_id)
        self.addCleanup(killAllDataProcesses)
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()
        #container = Container()
        #print 'starting container'
        #container.start()
        #print 'started container'

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        print 'started services'

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(
            node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)

        #setup listerner vars
        self._data_greenlets = []
        self._no_samples = None
        self._samples_received = []
Ejemplo n.º 6
0
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli = DataProductManagementServiceClient()
        self.rrclient = ResourceRegistryServiceClient()
        self.damsclient = DataAcquisitionManagementServiceClient()
        self.pubsubcli = PubsubManagementServiceClient()
        self.ingestclient = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc = UserNotificationServiceClient()
        self.data_retriever = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(
            datastore_name)
        self.stream_def_id = self.pubsubcli.create_stream_definition(
            name='SBE37_CDM')

        self.process_definitions = {}
        ingestion_worker_definition = ProcessDefinition(
            name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':
            'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class': 'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(
            process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space = 'science_granule_ingestion'
        self.exchange_point = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(
            self.process_definitions['ingestion_worker'], configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)
Ejemplo n.º 7
0
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.ingestion_management = IngestionManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.ingest_name = 'basic'
        self.exchange = 'testdata'
    def __init__(self):
        self.ingestion_management = IngestionManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self._paused_streams = []
        self._w_covs = {}
        self._ro_covs = {}

        self._context_managed = False
Ejemplo n.º 9
0
    def setUp(self):
        import couchdb
        super(DatasetManagementIntTest,self).setUp()
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2dm.yml')


        self.db = self.container.datastore_manager.get_datastore('scidata', DataStore.DS_PROFILE.SCIDATA)
        self.db_raw = self.db.server

        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.ingestion_client = IngestionManagementServiceClient(node=self.container.node)
Ejemplo n.º 10
0
 def clean_subscriptions():
     ingestion_management = IngestionManagementServiceClient()
     pubsub = PubsubManagementServiceClient()
     rr     = ResourceRegistryServiceClient()
     ingestion_config_ids = ingestion_management.list_ingestion_configurations(id_only=True)
     for ic in ingestion_config_ids:
         subscription_ids, assocs = rr.find_objects(subject=ic, predicate=PRED.hasSubscription, id_only=True)
         for subscription_id, assoc in zip(subscription_ids, assocs):
             rr.delete_association(assoc)
             try:
                 pubsub.deactivate_subscription(subscription_id)
             except:
                 log.exception("Unable to decativate subscription: %s", subscription_id)
             pubsub.delete_subscription(subscription_id)
    def setUp(self):
        # Start container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
Ejemplo n.º 12
0
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0
        self.cci                  = 0
Ejemplo n.º 13
0
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        log.debug("Start rel from url")
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.DPMS = DataProductManagementServiceClient()
        self.RR = ResourceRegistryServiceClient()
        self.RR2 = EnhancedResourceRegistryClient(self.RR)
        self.DAMS = DataAcquisitionManagementServiceClient()
        self.PSMS = PubsubManagementServiceClient()
        self.ingestclient = IngestionManagementServiceClient()
        self.PD = ProcessDispatcherServiceClient()
        self.DSMS = DatasetManagementServiceClient()
        self.unsc = UserNotificationServiceClient()
        self.data_retriever = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------
        log.debug("get datastore")
        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(
            datastore_name)
        self.stream_def_id = self.PSMS.create_stream_definition(
            name='SBE37_CDM')

        self.process_definitions = {}
        ingestion_worker_definition = ProcessDefinition(
            name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':
            'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class': 'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.PD.create_process_definition(
            process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        self.addCleanup(self.cleaning_up)
Ejemplo n.º 14
0
    def setUp(self):
        # Start container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        log.debug("TestExternalDatasetAgentMgmt: started services")

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(
            node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)
Ejemplo n.º 15
0
    def on_start(self):
        super(IngestionLauncher, self).on_start()

        exchange_point = self.CFG.get_safe('ingestion.exchange_point',
                                           'science_data')
        couch_opts = self.CFG.get_safe('ingestion.couch_storage', {})
        couch_storage = CouchStorage(**couch_opts)
        hdf_opts = self.CFG.get_safe('ingestion.hdf_storage', {})
        hdf_storage = HdfStorage(**hdf_opts)
        number_of_workers = self.CFG.get_safe('ingestion.number_of_workers', 2)

        ingestion_management_service = IngestionManagementServiceClient(
            node=self.container.node)
        ingestion_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id=exchange_point,
            couch_storage=couch_storage,
            hdf_storage=hdf_storage,
            number_of_workers=number_of_workers)
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_id)
Ejemplo n.º 16
0
 def setUp(self):
     # Start container
     self._start_container()
     self.container.start_rel_from_url('res/deploy/r2deploy.yml')
     self.ingestion_management = IngestionManagementServiceClient()
     self.rr = self.container.resource_registry
Ejemplo n.º 17
0
    def test_usgs_integration(self):
        '''
        test_usgs_integration
        Test full DM Services Integration using usgs
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here
        #-----------------------------
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        process_list = []
        datasets = []

        datastore_name = 'test_usgs_integration'


        #---------------------------
        # Set up ingestion
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=8
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        usgs_stream_def = USGS_stream_definition()

        stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition')


        #---------------------------
        # Set up the producers (CTD Simulators)
        #---------------------------
        # Launch five simulated CTD producers
        for iteration in xrange(2):
            # Make a stream to output on

            stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)

            #---------------------------
            # Set up the datasets
            #---------------------------
            dataset_id = dataset_management_service.create_dataset(
                stream_id=stream_id,
                datastore_name=datastore_name,
                view_name='datasets/stream_join_granule'
            )
            # Keep track of the datasets
            datasets.append(dataset_id)

            stream_policy_id = ingestion_management_service.create_dataset_configuration(
                dataset_id = dataset_id,
                archive_data = True,
                archive_metadata = True,
                ingestion_configuration_id = ingestion_configuration_id
            )


            producer_definition = ProcessDefinition()
            producer_definition.executable = {
                'module':'eoi.agent.handler.usgs_stream_publisher',
                'class':'UsgsPublisher'
            }
            configuration = {
                'process':{
                    'stream_id':stream_id,
                    }
            }
            procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
            log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
            pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)


            # Keep track, we'll kill 'em later.
            process_list.append(pid)
            # Get about 4 seconds of data
        time.sleep(4)

        #---------------------------
        # Stop producing data
        #---------------------------

        for process in process_list:
            process_dispatcher.cancel_process(process)

        #----------------------------------------------
        # The replay and the transform, a love story.
        #----------------------------------------------
        # Happy Valentines to the clever coder who catches the above!

        transform_definition = ProcessDefinition()
        transform_definition.executable = {
            'module':'ion.processes.data.transforms.transform_example',
            'class':'TransformCapture'
        }
        transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition)

        dataset_id = datasets.pop() # Just need one for now
        replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id)

        #--------------------------------------------
        # I'm Selling magazine subscriptions here!
        #--------------------------------------------

        subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]),
            exchange_name='transform_capture_point')

        #--------------------------------------------
        # Start the transform (capture)
        #--------------------------------------------
        transform_id = transform_management_service.create_transform(
            name='capture_transform',
            in_subscription_id=subscription,
            process_definition_id=transform_definition_id
        )

        transform_management_service.activate_transform(transform_id=transform_id)

        #--------------------------------------------
        # BEGIN REPLAY!
        #--------------------------------------------

        data_retriever_service.start_replay(replay_id=replay_id)

        #--------------------------------------------
        # Lets get some boundaries
        #--------------------------------------------

        bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
    def test_replay_integration(self):
        '''
        test_replay_integration
        '''
        import numpy as np
        # Keep the import it's used in the vector comparison below even though pycharm says its unused.

        cc = self.container
        XP = self.XP
        assertions = self.assertTrue

        ### Every thing below here can be run as a script:
        log.debug('Got it')

        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)

        datastore_name = 'dm_test_replay_integration'

        producer = Publisher(name=(XP, 'stream producer'))

        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id=XP,
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            hdf_storage=HdfStorage(),
            number_of_workers=1)

        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        definition = SBE37_CDM_stream_definition()
        data_stream_id = definition.data_stream_id
        encoding_id = definition.identifiables[data_stream_id].encoding_id
        element_count_id = definition.identifiables[
            data_stream_id].element_count_id

        stream_def_id = pubsub_management_service.create_stream_definition(
            container=definition)
        stream_id = pubsub_management_service.create_stream(
            stream_definition_id=stream_def_id)

        dataset_id = dataset_management_service.create_dataset(
            stream_id=stream_id,
            datastore_name=datastore_name,
            view_name='datasets/dataset_by_id')
        ingestion_management_service.create_dataset_configuration(
            dataset_id=dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=ingestion_configuration_id)
        definition.stream_resource_id = stream_id

        packet = _create_packet(definition)
        input_file = FileSystem.mktemp()
        input_file.write(packet.identifiables[data_stream_id].values)
        input_file_path = input_file.name
        input_file.close()

        fields = [
            'conductivity', 'height', 'latitude', 'longitude', 'pressure',
            'temperature', 'time'
        ]

        input_vectors = acquire_data([input_file_path], fields, 2).next()

        producer.publish(msg=packet, to_name=(XP, '%s.data' % stream_id))

        replay_id, replay_stream_id = data_retriever_service.define_replay(
            dataset_id)
        ar = gevent.event.AsyncResult()

        def sub_listen(msg, headers):

            assertions(isinstance(msg, StreamGranuleContainer),
                       'replayed message is not a granule.')
            hdf_string = msg.identifiables[data_stream_id].values
            sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
            assertions(sha1 == msg.identifiables[encoding_id].sha1,
                       'Checksum failed.')
            assertions(
                msg.identifiables[element_count_id].value == 1,
                'record replay count is incorrect %d.' %
                msg.identifiables[element_count_id].value)
            output_file = FileSystem.mktemp()
            output_file.write(msg.identifiables[data_stream_id].values)
            output_file_path = output_file.name
            output_file.close()
            output_vectors = acquire_data([output_file_path], fields, 2).next()
            for field in fields:
                comparison = (input_vectors[field]['values'] ==
                              output_vectors[field]['values'])
                assertions(
                    comparison.all(), 'vector mismatch: %s vs %s' %
                    (input_vectors[field]['values'],
                     output_vectors[field]['values']))
            FileSystem.unlink(output_file_path)
            ar.set(True)

        subscriber = Subscriber(name=(XP, 'replay listener'),
                                callback=sub_listen)

        g = gevent.Greenlet(subscriber.listen,
                            binding='%s.data' % replay_stream_id)
        g.start()

        data_retriever_service.start_replay(replay_id)

        ar.get(timeout=10)

        FileSystem.unlink(input_file_path)
Ejemplo n.º 19
0
    def test_raw_stream_integration(self):
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there was one stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        raw_ctd_stream_def = SBE37_RAW_stream_definition()
        raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=raw_ctd_stream_def, name='Simulated RAW CTD data')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.raw_stream_publisher',
            'class': 'RawStreamPublisher'
        }

        raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        raw_ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=raw_ctd_stream_def_id)

        # Set up the datasets
        raw_ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=raw_ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=raw_ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': raw_ctd_stream_id,
            }
        }
        raw_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=raw_ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        raw_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                raw_ctd_stream_id,
            ]),
            exchange_name='raw_test',
            name="test raw subscription",
        )

        # this is okay - even in cei mode!
        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Raw data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='raw_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=raw_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            raw_sim_pid
        )  # kill the ctd simulator process - that is enough data

        gevent.sleep(1)

        for message in results:

            sha1 = message.identifiables['stream_encoding'].sha1

            data = message.identifiables['data_stream'].values

            filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw")

            with open(filename, 'r') as f:

                assertions(data == f.read())
Ejemplo n.º 20
0
    def on_start(self):

        log.debug("VizStreamProducer start")
        self.data_source_name = self.CFG.get('name')
        self.dataset = self.CFG.get('dataset')

        # create a pubsub client and a resource registry client
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)

        # Dummy instrument related clients
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.IngestClient = IngestionManagementServiceClient(
            node=self.container.node)

        # create the pubsub client
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)

        # Additional code for creating a dummy instrument
        """
        # Set up the preconditions. Look for an existing ingestion config
        while True:
            log.info("VisStreamLauncher:on_start: Waiting for an ingestion configuration to be available.")
            ingestion_cfgs, _ = self.rrclient.find_resources(RT.IngestionConfiguration, None, None, True)

            if len(ingestion_cfgs) > 0:
                break
            else:
                gevent.sleep(1)
        """

        # Check to see if the data_product already exists in the system (for e.g re launching the code after a crash)
        dp_ids, _ = self.rrclient.find_resources(RT.DataProduct, None,
                                                 self.data_source_name, True)
        if len(dp_ids) > 0:
            data_product_id = dp_ids[0]
            print '>>>>>>>>>>>>> Found dp_id = ', data_product_id
        else:
            # Create InstrumentModel
            instModel_obj = IonObject(RT.InstrumentModel,
                                      name=self.data_source_name,
                                      description=self.data_source_name,
                                      model_label=self.data_source_name)
            instModel_id = self.imsclient.create_instrument_model(
                instModel_obj)

            # Create InstrumentDevice
            instDevice_obj = IonObject(RT.InstrumentDevice,
                                       name=self.data_source_name,
                                       description=self.data_source_name,
                                       serial_number="12345")

            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)
            self.imsclient.assign_instrument_model_to_instrument_device(
                instModel_id, instDevice_id)

            # create a stream definition for the data from the ctd simulator
            ctd_stream_def = SBE37_CDM_stream_definition()
            ctd_stream_def_id = self.pubsubclient.create_stream_definition(
                container=ctd_stream_def)

            print 'Creating new CDM data product with a stream definition'
            dp_obj = IonObject(RT.DataProduct,
                               name=self.data_source_name,
                               description='ctd stream test')
            data_product_id = self.dpclient.create_data_product(
                dp_obj, ctd_stream_def_id)

            self.damsclient.assign_data_product(
                input_resource_id=instDevice_id,
                data_product_id=data_product_id)
            self.dpclient.activate_data_product_persistence(
                data_product_id=data_product_id,
                persist_data=True,
                persist_metadata=True)

            print '>>>>>>>>>>>> New dp_id = ', data_product_id

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id,
                                                   PRED.hasStream, None, True)

        if self.dataset == 'sinusoidal':
            pid = self.container.spawn_process(
                name='ctd_test.' + self.data_source_name,
                module='ion.processes.data.sinusoidal_stream_publisher',
                cls='SinusoidalCtdPublisher',
                config={'process': {
                    'stream_id': stream_ids[0]
                }})
        else:
            pid = self.container.spawn_process(
                name='ctd_test.' + self.data_source_name,
                module='ion.processes.data.ctd_stream_publisher',
                cls='SimpleCtdPublisher',
                config={'process': {
                    'stream_id': stream_ids[0]
                }})
Ejemplo n.º 21
0
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(
            container=SalinityTransform.outgoing_stream_def,
            name='Scalar Salinity data stream')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.ctd_stream_publisher',
            'class': 'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class': 'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=ctd_stream_def_id)

        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------

        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=sal_stream_def_id)

        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=sal_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery(stream_ids=[
                ctd_stream_id,
            ]),
            exchange_name='salinity_transform_input'
        )  # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={
                'output': sal_stream_id,
            },
            process_definition_id=salinity_transform_procdef_id,
            # no configuration needed at this time...
        )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(
            transform_id=sal_transform_id)

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                sal_stream_id,
            ]),
            exchange_name='salinity_test',
            name="test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        for message in results:

            psd = PointSupplementStreamParser(
                stream_definition=SalinityTransform.outgoing_stream_def,
                stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())

            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) >
                       0.0)  # salinity should always be greater than 0
Ejemplo n.º 22
0
    def on_start(self):

        log.debug("VizStreamProducer start")

        self.data_source_name = self.CFG.get_safe('name',
                                                  'sine_wave_generator')
        self.dataset = self.CFG.get_safe('dataset', 'sinusoidal')

        # create a pubsub client and a resource registry client
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)

        # Dummy instrument related clients
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(
            node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()

        # create the pubsub client
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)

        # Additional code for creating a dummy instrument
        """
        # Set up the preconditions. Look for an existing ingestion config
        while True:
            log.info("VisStreamLauncher:on_start: Waiting for an ingestion configuration to be available.")
            ingestion_cfgs, _ = self.rrclient.find_resources(RT.IngestionConfiguration, None, None, True)

            if len(ingestion_cfgs) > 0:
                break
            else:
                gevent.sleep(1)
        """

        # Check to see if the data_product already exists in the system (for e.g re launching the code after a crash)
        dp_ids, _ = self.rrclient.find_resources(RT.DataProduct, None,
                                                 self.data_source_name, True)
        if len(dp_ids) > 0:
            data_product_id = dp_ids[0]
        else:
            # Create InstrumentModel
            instModel_obj = IonObject(RT.InstrumentModel,
                                      name=self.data_source_name,
                                      description=self.data_source_name)
            instModel_id = self.imsclient.create_instrument_model(
                instModel_obj)

            # Create InstrumentDevice
            instDevice_obj = IonObject(RT.InstrumentDevice,
                                       name=self.data_source_name,
                                       description=self.data_source_name,
                                       serial_number="12345")

            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)
            self.imsclient.assign_instrument_model_to_instrument_device(
                instModel_id, instDevice_id)

            # create a stream definition for the data from the ctd simulator
            pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
                'ctd_parsed_param_dict', id_only=True)
            ctd_stream_def_id = self.pubsubclient.create_stream_definition(
                name="SBE37_CDM",
                description="SBE37_CDM",
                parameter_dictionary_id=pdict_id)

            tdom, sdom = time_series_domain()
            sdom = sdom.dump()
            tdom = tdom.dump()

            dp_obj = IonObject(RT.DataProduct,
                               name=self.data_source_name,
                               description='Example ctd stream',
                               temporal_domain=tdom,
                               spatial_domain=sdom)

            data_product_id = self.dpclient.create_data_product(
                dp_obj, stream_definition_id=ctd_stream_def_id)

            self.damsclient.assign_data_product(
                input_resource_id=instDevice_id,
                data_product_id=data_product_id)
            self.dpclient.activate_data_product_persistence(
                data_product_id=data_product_id)

        print ">>>>>>>>>>>>> Dataproduct for sine wave generator : ", data_product_id

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id,
                                                   PRED.hasStream, None, True)

        if self.dataset == 'sinusoidal':
            self.container.spawn_process(
                name='ctd_test.' + self.data_source_name,
                module='ion.processes.data.sinusoidal_stream_publisher',
                cls='SinusoidalCtdPublisher',
                config={'process': {
                    'stream_id': stream_ids[0]
                }})
        else:
            self.container.spawn_process(
                name='ctd_test.' + self.data_source_name,
                module='ion.processes.data.ctd_stream_publisher',
                cls='SimpleCtdPublisher',
                config={'process': {
                    'stream_id': stream_ids[0]
                }})
        """