Python PointSupplementStreamParser.list_field_names 예제들, prototype.sci_data.stream_parser.PointSupplementStreamParser.list_field_names Python 예제들

예제 #1

0

파일 보기

파일: visualization_service.py 프로젝트: seman/coi-services

    def process(self, packet):
        log.debug('(%s): Received Viz Data Packet' % self.name )
        #log.debug('(%s):   - Processing: %s' % (self.name,packet))

        # parse the incoming data
        psd = PointSupplementStreamParser(stream_definition=self.stream_def.container, stream_granule=packet)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.lock.acquire()
                self.graph_data[varname] = []
                self.lock.release()

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        with self.lock:
            for varname in psd.list_field_names():
                self.graph_data[varname].extend(vardict[varname])

예제 #2

0

파일 보기

파일: visualization_service.py 프로젝트: ooici-dm/coi-services

    def process(self, packet):
        log.debug('(%s): Received Viz Data Packet' % self.name)
        #log.debug('(%s):   - Processing: %s' % (self.name,packet))

        # parse the incoming data
        psd = PointSupplementStreamParser(
            stream_definition=self.stream_def.container, stream_granule=packet)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.lock.acquire()
                self.graph_data[varname] = []
                self.lock.release()

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        with self.lock:
            for varname in psd.list_field_names():
                self.graph_data[varname].extend(vardict[varname])

예제 #3

0

파일 보기

파일: matplotlib_graphs.py 프로젝트: ooici-eoi/coi-services

    def execute(self, granule):
        log.debug("Matplotlib transform: Received Viz Data Packet")

        # parse the incoming data
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.graph_data[varname] = []

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        for varname in psd.list_field_names():
            self.graph_data[varname].extend(vardict[varname])

        if (time.time() - self.lastRenderTime) > self.renderTimeThreshold:
            self.lastRenderTime = time.time()
            self.render_graphs()

        return self.out_granule

예제 #4

0

파일 보기

파일: test_workflow.py 프로젝트: ooici-dm/coi-services

    def _validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:

            try:
                psd = PointSupplementStreamParser(
                    stream_definition=self.ctd_stream_def,
                    stream_granule=message)
                temp = psd.get_values('temperature')
                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info('temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                psd = PointSupplementStreamParser(
                    stream_definition=SalinityTransform.outgoing_stream_def,
                    stream_granule=message)
                log.info(psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = psd.get_values('salinity')
                log.info('salinity=' + str(numpy.nanmin(salinity)))

                assertions(isinstance(salinity, numpy.ndarray))

                assertions(numpy.nanmin(salinity) >
                           0.0)  # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(
                        len(first_salinity_values) == len(
                            second_salinity_values))
                    for idx in range(0, len(first_salinity_values)):
                        assertions(first_salinity_values[idx] *
                                   2.0 == second_salinity_values[idx])

예제 #5

0

파일 보기

파일: simple_dispatcher.py 프로젝트: ooici-dm/coi-services

        def message_received(granule, h):

            stream_id = granule.stream_resource_id

            data_stream_id = granule.data_stream_id
            data_stream = granule.identifiables[data_stream_id]

            tstamp = get_datetime(data_stream.timestamp.value)

            records = granule.identifiables['record_count'].value

            log.info(
                'Received a message from stream %s with time stamp %s and %d records'
                % (stream_id, tstamp, records))

            if stream_id not in stream_defs:
                stream_defs[stream_id] = pubsub_cli.find_stream_definition(
                    stream_id, id_only=False).container
            stream_def = stream_defs.get(stream_id)

            sp = PointSupplementStreamParser(stream_definition=stream_def,
                                             stream_granule=granule)

            last_data = {}
            for field in sp.list_field_names():
                last_data[field] = sp.get_values(field)[-1]

            log.info('Last values in the message: %s' % str(last_data))

예제 #6

0

파일 보기

파일: simple_dispatcher.py 프로젝트: dstuebe/coi-services

        def message_received(granule, h):

            stream_id = granule.stream_resource_id

            data_stream_id = granule.data_stream_id
            data_stream = granule.identifiables[data_stream_id]

            tstamp = get_datetime(data_stream.timestamp.value)

            records = granule.identifiables['record_count'].value
            

            log.info('Received a message from stream %s with time stamp %s and %d records' % (stream_id, tstamp, records))


            if stream_id not in stream_defs:
                stream_defs[stream_id] = pubsub_cli.find_stream_definition(stream_id, id_only=False).container
            stream_def = stream_defs.get(stream_id)

            sp = PointSupplementStreamParser(stream_definition=stream_def, stream_granule=granule)

            last_data = {}
            for field in sp.list_field_names():
                last_data[field] = sp.get_values(field)[-1]

            log.info('Last values in the message: %s' % str(last_data))

예제 #7

0

파일 보기

    def get_last_value(self, granule):

        stream_resource_id = granule.stream_resource_id
        if not self.def_cache.has_key(stream_resource_id):
            stream_def = self.ps_cli.find_stream_definition(
                stream_id=stream_resource_id, id_only=False)
            self.def_cache[stream_resource_id] = stream_def.container

        definition = self.def_cache[stream_resource_id]

        psp = PointSupplementStreamParser(stream_definition=definition,
                                          stream_granule=granule)
        fields = psp.list_field_names()

        lu = LastUpdate()
        lu.timestamp = granule.identifiables[
            granule.data_stream_id].timestamp.value
        for field in fields:
            range_id = definition.identifiables[field].range_id
            lu.variables[field] = Variable()
            if definition.identifiables.has_key(field):
                lu.variables[field].definition = definition.identifiables[
                    field].definition
            if definition.identifiables.has_key(range_id):
                lu.variables[field].units = definition.identifiables[
                    range_id].unit_of_measure.code
            lu.variables[field].value = float(
                psp.get_values(field_name=field)[-1])
        return lu

예제 #8

0

파일 보기

파일: test_workflow.py 프로젝트: dstuebe/coi-services

    def _validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:

            try:
                psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message)
                temp = psd.get_values('temperature')
                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info( 'temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)
                log.info( psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = psd.get_values('salinity')
                log.info( 'salinity=' + str(numpy.nanmin(salinity)))

                assertions(isinstance(salinity, numpy.ndarray))

                assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(len(first_salinity_values) == len(second_salinity_values))
                    for idx in range(0,len(first_salinity_values)):
                        assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])

예제 #9

0

파일 보기

파일: last_update_cache.py 프로젝트: seman/coi-services

    def get_last_value(self,granule):

        stream_resource_id = granule.stream_resource_id
        if not self.def_cache.has_key(stream_resource_id):
            stream_def = self.ps_cli.find_stream_definition(stream_id=stream_resource_id, id_only=False)
            self.def_cache[stream_resource_id] = stream_def.container

        definition = self.def_cache[stream_resource_id]

        psp = PointSupplementStreamParser(stream_definition=definition, stream_granule=granule)
        fields = psp.list_field_names()


        lu = LastUpdate()
        lu.timestamp = granule.identifiables[granule.data_stream_id].timestamp.value
        for field in fields:
            range_id = definition.identifiables[field].range_id
            lu.variables[field] = Variable()
            if definition.identifiables.has_key(field):
                lu.variables[field].definition = definition.identifiables[field].definition
            if definition.identifiables.has_key(range_id):
                lu.variables[field].units = definition.identifiables[range_id].unit_of_measure.code
            lu.variables[field].value = float(psp.get_values(field_name=field)[-1])
        return lu

예제 #10

0

파일 보기

파일: test_salinity_transform.py 프로젝트: dstuebe/coi-services

    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue


        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------


        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'



        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream')



        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.ctd_stream_publisher',
            'class':'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class':'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)



        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=1
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)



        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id)


        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = ctd_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------


        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(stream_definition_id=sal_stream_def_id)


        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = sal_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service



        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query = StreamQuery(stream_ids=[ctd_stream_id,]),
            exchange_name='salinity_transform_input') # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={'output':sal_stream_id,},
            process_definition_id = salinity_transform_procdef_id,
            # no configuration needed at this time...
            )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(transform_id=sal_transform_id)



        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)


        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([sal_stream_id,]),
            exchange_name = 'salinity_test',
            name = "test salinity subscription",
            )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node)

        result = gevent.event.AsyncResult()
        results = []
        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) >3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(subscription_id=salinity_subscription_id)


        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data



        for message in results:

            psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())


            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

예제 #11

0

파일 보기

파일: visualization_service.py 프로젝트: ooici-dm/coi-services

    def process(self, packet):

        log.debug('(%s): Received Viz Data Packet' % (self.name))

        element_count_id = 0
        expected_range = []

        psd = PointSupplementStreamParser(stream_definition=self.stream_def,
                                          stream_granule=packet)
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        #if its the first time, init the dataTable
        if self.initDataTableFlag:
            # create data description from the variables in the message
            self.dataDescription = [('time', 'datetime', 'time')]

            # split the data string to extract variable names
            for varname in psd.list_field_names():
                if varname == 'time':
                    continue

                self.dataDescription.append((varname, 'number', varname))

            self.initDataTableFlag = False

        # Add the records to the datatable
        for i in xrange(arrLen):
            varTuple = []

            for varname, _, _ in self.dataDescription:
                val = float(vardict[varname][i])
                if varname == 'time':
                    varTuple.append(datetime.fromtimestamp(val))
                else:
                    varTuple.append(val)

            # Append the tuples to the data table
            self.dataTableContent.append(varTuple)

            if self.realtime_flag:
                # Maintain a sliding window for realtime transform processes
                realtime_window_size = 100
                if len(self.dataTableContent) > realtime_window_size:
                    # always pop the first element till window size is what we want
                    while len(self.dataTableContent) > realtime_window_size:
                        self.dataTableContent.pop(0)

        if not self.realtime_flag:
            # This is the historical view part. Make a note of now many records were received
            data_stream_id = self.stream_def.data_stream_id
            element_count_id = self.stream_def.identifiables[
                data_stream_id].element_count_id
            # From each granule you can check the constraint on the number of records
            expected_range = packet.identifiables[
                element_count_id].constraint.intervals[0]

            # The number of records in a given packet is:
            self.total_num_of_records_recvd += packet.identifiables[
                element_count_id].value

        # submit the Json version of the datatable to the viz service
        if self.realtime_flag:
            # create the google viz data table
            data_table = gviz_api.DataTable(self.dataDescription)
            data_table.LoadData(self.dataTableContent)

            # submit resulting table back using the out stream publisher
            msg = {
                "viz_product_type": "google_realtime_dt",
                "data_product_id": self.data_product_id,
                "data_table": data_table.ToJSonResponse()
            }
            self.out_stream_pub.publish(msg)
        else:
            # Submit table back to the service if we received all the replay data
            if self.total_num_of_records_recvd == (expected_range[1] + 1):
                # If the datatable received was too big, decimate on the fly to a fixed size
                max_google_dt_len = 1024
                if len(self.dataTableContent) > max_google_dt_len:
                    decimation_factor = int(
                        math.ceil(
                            len(self.dataTableContent) / (max_google_dt_len)))

                    tempDataTableContent = []
                    for i in xrange(0, len(self.dataTableContent),
                                    decimation_factor):
                        # check limits
                        if i >= len(self.dataTableContent):
                            break

                        tempDataTableContent.append(self.dataTableContent[i])

                    self.dataTableContent = tempDataTableContent

                data_table = gviz_api.DataTable(self.dataDescription)
                data_table.LoadData(self.dataTableContent)

                # submit resulting table back using the out stream publisher
                msg = {
                    "viz_product_type": "google_dt",
                    "data_product_id_token": self.data_product_id_token,
                    "data_table": data_table.ToJSonResponse()
                }
                self.out_stream_pub.publish(msg)
                return

        # clear the tuple for future use
        self.varTuple[:] = []

예제 #12

0

파일 보기

    def execute(self, granule):

        log.debug('(Google DT transform): Received Viz Data Packet' )

        self.dataDescription = []
        self.dataTableContent = []
        element_count_id = 0
        expected_range = []

        # NOTE : Detect somehow that this is a replay stream with a set number of expected granules. Based on this
        #       calculate the number of expected records and set the self.realtime_window_size bigger or equal to this
        #       number.



        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])


        #iinit the dataTable
        # create data description from the variables in the message
        self.dataDescription = [('time', 'datetime', 'time')]

        # split the data string to extract variable names
        for varname in psd.list_field_names():
            if varname == 'time':
                continue

            self.dataDescription.append((varname, 'number', varname))


        # Add the records to the datatable
        for i in xrange(arrLen):
            varTuple = []

            for varname,_,_ in self.dataDescription:
                val = float(vardict[varname][i])
                if varname == 'time':
                    #varTuple.append(datetime.fromtimestamp(val))
                    varTuple.append(val)
                else:
                    varTuple.append(val)

            # Append the tuples to the data table
            self.dataTableContent.append (varTuple)

            # Maintain a sliding window for realtime transform processes
            if len(self.dataTableContent) > self.realtime_window_size:
                # always pop the first element till window size is what we want
                while len(self.dataTableContent) > realtime_window_size:
                    self.dataTableContent.pop(0)


        """ To Do : Do we need to figure out the how many granules have been received for a replay stream ??

        if not self.realtime_flag:
            # This is the historical view part. Make a note of how many records were received
            in_data_stream_id = self.incoming_stream_def.data_stream_id
            element_count_id = self.incoming_stream_def.identifiables[in_data_stream_id].element_count_id
            # From each granule you can check the constraint on the number of records
            expected_range = granule.identifiables[element_count_id].constraint.intervals[0]

            # The number of records in a given packet is:
            self.total_num_of_records_recvd += packet.identifiables[element_count_id].value

        """

        # define an output container of data


        # submit the partial datatable to the viz service
        rdt = RecordDictionaryTool(taxonomy=tx)

        # submit resulting table back using the out stream publisher. The data_product_id is the input dp_id
        # responsible for the incoming data
        msg = {"viz_product_type": "google_realtime_dt",
               "data_product_id": "FAKE_DATAPRODUCT_ID_0000",
               "data_table_description": self.dataDescription,
               "data_table_content": self.dataTableContent}

        rdt['google_dt_components'] = numpy.array([msg])

        log.debug('Google DT transform: Sending a granule')
        out_granule = build_granule(data_producer_id='google_dt_transform', taxonomy=tx, record_dictionary=rdt)

        #self.publish(out_granule)

        # clear the tuple for future use
        self.varTuple[:] = []

        return out_granule

예제 #13

0

파일 보기

    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(
            container=SalinityTransform.outgoing_stream_def,
            name='Scalar Salinity data stream')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.ctd_stream_publisher',
            'class': 'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class': 'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=ctd_stream_def_id)

        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------

        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=sal_stream_def_id)

        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=sal_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery(stream_ids=[
                ctd_stream_id,
            ]),
            exchange_name='salinity_transform_input'
        )  # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={
                'output': sal_stream_id,
            },
            process_definition_id=salinity_transform_procdef_id,
            # no configuration needed at this time...
        )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(
            transform_id=sal_transform_id)

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                sal_stream_id,
            ]),
            exchange_name='salinity_test',
            name="test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        for message in results:

            psd = PointSupplementStreamParser(
                stream_definition=SalinityTransform.outgoing_stream_def,
                stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())

            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) >
                       0.0)  # salinity should always be greater than 0

예제 #14

0

파일 보기

파일: test_workflow.py 프로젝트: seman/coi-services

    def test_workflow_components(self):

        cc = self.container
        assertions = self.assertTrue


        #-------------------------------
        # Create CTD Parsed as the initial data product
        #-------------------------------
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')


        print 'Creating new CDM data product with a stream definition'
        dp_obj = IonObject(RT.DataProduct,name='ctd_parsed',description='ctd stream test')
        try:
            ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id)
        except Exception as ex:
            self.fail("failed to create new data product: %s" %ex)

        print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product

        instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" )
        instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)


        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product)

        self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product, persist_data=True, persist_metadata=True)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        ctd_stream_id = stream_ids[0]

        ###
        ###  Setup the first transformation
        ###

        # Salinity: Data Process Definition
        log.debug("Create data process definition SalinityTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='ctd_salinity',
            description='create a salinity data product',
            module='ion.processes.data.transforms.ctd.ctd_L2_salinity',
            class_name='SalinityTransform',
            process_source='SalinityTransform source code here...')
        try:
            ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Excpetion as ex:
            self.fail("failed to create new SalinityTransform data process definition: %s" %ex)


        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityTransform.outgoing_stream_def,  name='L2_salinity')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(sal_stream_def_id, ctd_L2_salinity_dprocdef_id )

        # Create the output data product of the transform
        log.debug("create output data product L2 Salinity")
        ctd_l2_salinity_output_dp_obj = IonObject(RT.DataProduct, name='L2_Salinity',description='transform output L2 salinity')
        ctd_l2_salinity_output_dp_id = self.dataproductclient.create_data_product(ctd_l2_salinity_output_dp_obj, sal_stream_def_id)
        self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l2_salinity_output_dp_id, persist_data=True, persist_metadata=True)


        # Create the Salinity transform data process
        log.debug("create L2_salinity data_process and start it")
        try:
            l2_salinity_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product, {'output':ctd_l2_salinity_output_dp_id})
            self.dataprocessclient.activate_data_process(l2_salinity_all_data_process_id)
        except BadRequest as ex:
            self.fail("failed to create new data process: %s" %ex)

        log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return")



        ###
        ###  Setup the second transformation
        ###

        # Salinity: Data Process Definition
        log.debug("Create data process definition SalinityDoublerTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='salinity_doubler',
            description='create a salinity doubler data product',
            module='ion.processes.data.transforms.example_double_salinity',
            class_name='SalinityDoubler',
            process_source='SalinityDoubler source code here...')
        try:
            salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new SalinityDoubler data process definition: %s" %ex)



        # create a stream definition for the data from the salinity Transform
        salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityDoubler.outgoing_stream_def,  name='SalinityDoubler')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(salinity_double_stream_def_id, salinity_doubler_dprocdef_id )

        # Create the output data product of the transform
        log.debug("create output data product SalinityDoubler")
        salinity_doubler_output_dp_obj = IonObject(RT.DataProduct, name='SalinityDoubler',description='transform output salinity doubler')
        salinity_doubler_output_dp_id = self.dataproductclient.create_data_product(salinity_doubler_output_dp_obj, salinity_double_stream_def_id)
        self.dataproductclient.activate_data_product_persistence(data_product_id=salinity_doubler_output_dp_id, persist_data=True, persist_metadata=True)


        # Create the Salinity transform data process
        log.debug("create L2_salinity data_process and start it")
        try:
            salinity_double_data_process_id = self.dataprocessclient.create_data_process(salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id, {'output':salinity_doubler_output_dp_id})
            self.dataprocessclient.activate_data_process(salinity_double_data_process_id)
        except BadRequest as ex:
            self.fail("failed to create new data process: %s" %ex)

        log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return")









        ###
        ### Start the process for producing the CTD data
        ###
        # process definition for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.ctd_stream_publisher',
            'class':'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)

        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
                }
        }
        ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)




        ## get the stream id for the transform outputs
        stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_stream_id = stream_ids[0]

        stream_ids, _ = self.rrclient.find_objects(salinity_doubler_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_dbl_stream_id = stream_ids[0]


        ###
        ### Make a subscriber in the test to listen for transformed data
        ###
        salinity_subscription_id = self.pubsubclient.create_subscription(
            query=StreamQuery([ctd_stream_id, sal_stream_id,sal_dbl_stream_id]),
            exchange_name = 'salinity_test',
            name = "test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node)

        result = gevent.event.AsyncResult()
        results = []
        def message_received(message, headers):
            # Heads
            log.warn(' data received!')
            results.append(message)
            if len(results) >15:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id)


        # Assert that we have received data
        assertions(result.get(timeout=20))

        #Stop the transform process

        # stop the flow parse the messages...
        self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data


        first_salinity_values = None

        for message in results:

            try:
                psd = PointSupplementStreamParser(stream_definition=ctd_stream_def, stream_granule=message)
                temp = psd.get_values('temperature')
                print psd.list_field_names()
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                print 'temperature=' + str(numpy.nanmin(temp))

                first_salinity_values = None

            else:
                psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)
                print psd.list_field_names()

                # Test the handy info method for the names of fields in the stream def
                assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = psd.get_values('salinity')
                print 'salinity=' + str(numpy.nanmin(salinity))

                assertions(isinstance(salinity, numpy.ndarray))

                assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(len(first_salinity_values) == len(second_salinity_values))
                    for idx in range(0,len(first_salinity_values)):
                        assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])

예제 #15

0

파일 보기

파일: visualization_service.py 프로젝트: seman/coi-services

    def process(self, packet):

        log.debug('(%s): Received Viz Data Packet' % (self.name) )

        element_count_id = 0
        expected_range = []

        psd = PointSupplementStreamParser(stream_definition=self.stream_def, stream_granule=packet)
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])


        #if its the first time, init the dataTable
        if self.initDataTableFlag:
            # create data description from the variables in the message
            self.dataDescription = [('time', 'datetime', 'time')]

            # split the data string to extract variable names
            for varname in psd.list_field_names():
                if varname == 'time':
                    continue

                self.dataDescription.append((varname, 'number', varname))

            self.initDataTableFlag = False


        # Add the records to the datatable
        for i in xrange(arrLen):
            varTuple = []

            for varname,_,_ in self.dataDescription:
                val = float(vardict[varname][i])
                if varname == 'time':
                    varTuple.append(datetime.fromtimestamp(val))
                else:
                    varTuple.append(val)

            # Append the tuples to the data table
            self.dataTableContent.append (varTuple)

            if self.realtime_flag:
                # Maintain a sliding window for realtime transform processes
                realtime_window_size = 100
                if len(self.dataTableContent) > realtime_window_size:
                    # always pop the first element till window size is what we want
                    while len(self.dataTableContent) > realtime_window_size:
                        self.dataTableContent.pop(0)


        if not self.realtime_flag:
            # This is the historical view part. Make a note of now many records were received
            data_stream_id = self.stream_def.data_stream_id
            element_count_id = self.stream_def.identifiables[data_stream_id].element_count_id
            # From each granule you can check the constraint on the number of records
            expected_range = packet.identifiables[element_count_id].constraint.intervals[0]

            # The number of records in a given packet is:
            self.total_num_of_records_recvd += packet.identifiables[element_count_id].value


        # submit the Json version of the datatable to the viz service
        if self.realtime_flag:
        # create the google viz data table
            data_table = gviz_api.DataTable(self.dataDescription)
            data_table.LoadData(self.dataTableContent)

            # submit resulting table back using the out stream publisher
            msg = {"viz_product_type": "google_realtime_dt",
                   "data_product_id": self.data_product_id,
                   "data_table": data_table.ToJSonResponse() }
            self.out_stream_pub.publish(msg)
        else:
            # Submit table back to the service if we received all the replay data
            if self.total_num_of_records_recvd == (expected_range[1] + 1):
                # If the datatable received was too big, decimate on the fly to a fixed size
                max_google_dt_len = 1024
                if len(self.dataTableContent) > max_google_dt_len:
                    decimation_factor = int(math.ceil(len(self.dataTableContent) / (max_google_dt_len)))

                    for i in xrange(len(self.dataTableContent) - 1, 0, -1):

                        if(i % decimation_factor == 0):
                            continue
                        self.dataTableContent.pop(i)

                data_table = gviz_api.DataTable(self.dataDescription)
                data_table.LoadData(self.dataTableContent)

                # submit resulting table back using the out stream publisher
                msg = {"viz_product_type": "google_dt",
                       "data_product_id_token": self.data_product_id_token,
                       "data_table": data_table.ToJSonResponse() }
                self.out_stream_pub.publish(msg)
                return


        # clear the tuple for future use
        self.varTuple[:] = []