def message_received(granule, h):

            stream_id = granule.stream_resource_id

            data_stream_id = granule.data_stream_id
            data_stream = granule.identifiables[data_stream_id]

            tstamp = get_datetime(data_stream.timestamp.value)

            records = granule.identifiables['record_count'].value
            

            log.info('Received a message from stream %s with time stamp %s and %d records' % (stream_id, tstamp, records))


            if stream_id not in stream_defs:
                stream_defs[stream_id] = pubsub_cli.find_stream_definition(stream_id, id_only=False).container
            stream_def = stream_defs.get(stream_id)

            sp = PointSupplementStreamParser(stream_definition=stream_def, stream_granule=granule)

            last_data = {}
            for field in sp.list_field_names():
                last_data[field] = sp.get_values(field)[-1]

            log.info('Last values in the message: %s' % str(last_data))
Example #2
0
    def get_last_value(self, granule):

        stream_resource_id = granule.stream_resource_id
        if not self.def_cache.has_key(stream_resource_id):
            stream_def = self.ps_cli.find_stream_definition(
                stream_id=stream_resource_id, id_only=False)
            self.def_cache[stream_resource_id] = stream_def.container

        definition = self.def_cache[stream_resource_id]

        psp = PointSupplementStreamParser(stream_definition=definition,
                                          stream_granule=granule)
        fields = psp.list_field_names()

        lu = LastUpdate()
        lu.timestamp = granule.identifiables[
            granule.data_stream_id].timestamp.value
        for field in fields:
            range_id = definition.identifiables[field].range_id
            lu.variables[field] = Variable()
            if definition.identifiables.has_key(field):
                lu.variables[field].definition = definition.identifiables[
                    field].definition
            if definition.identifiables.has_key(range_id):
                lu.variables[field].units = definition.identifiables[
                    range_id].unit_of_measure.code
            lu.variables[field].value = float(
                psp.get_values(field_name=field)[-1])
        return lu
    def process(self, packet):
        log.debug('(%s): Received Viz Data Packet' % self.name )
        #log.debug('(%s):   - Processing: %s' % (self.name,packet))

        # parse the incoming data
        psd = PointSupplementStreamParser(stream_definition=self.stream_def.container, stream_granule=packet)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.lock.acquire()
                self.graph_data[varname] = []
                self.lock.release()

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        with self.lock:
            for varname in psd.list_field_names():
                self.graph_data[varname].extend(vardict[varname])
    def execute(self, granule):
        log.debug("Matplotlib transform: Received Viz Data Packet")

        # parse the incoming data
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.graph_data[varname] = []

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        for varname in psd.list_field_names():
            self.graph_data[varname].extend(vardict[varname])

        if (time.time() - self.lastRenderTime) > self.renderTimeThreshold:
            self.lastRenderTime = time.time()
            self.render_graphs()

        return self.out_granule
        def message_received(granule, h):

            stream_id = granule.stream_resource_id

            data_stream_id = granule.data_stream_id
            data_stream = granule.identifiables[data_stream_id]

            tstamp = get_datetime(data_stream.timestamp.value)

            records = granule.identifiables['record_count'].value

            log.info(
                'Received a message from stream %s with time stamp %s and %d records'
                % (stream_id, tstamp, records))

            if stream_id not in stream_defs:
                stream_defs[stream_id] = pubsub_cli.find_stream_definition(
                    stream_id, id_only=False).container
            stream_def = stream_defs.get(stream_id)

            sp = PointSupplementStreamParser(stream_definition=stream_def,
                                             stream_granule=granule)

            last_data = {}
            for field in sp.list_field_names():
                last_data[field] = sp.get_values(field)[-1]

            log.info('Last values in the message: %s' % str(last_data))
Example #6
0
    def _validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:

            try:
                psd = PointSupplementStreamParser(
                    stream_definition=self.ctd_stream_def,
                    stream_granule=message)
                temp = psd.get_values('temperature')
                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info('temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                psd = PointSupplementStreamParser(
                    stream_definition=SalinityTransform.outgoing_stream_def,
                    stream_granule=message)
                log.info(psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = psd.get_values('salinity')
                log.info('salinity=' + str(numpy.nanmin(salinity)))

                assertions(isinstance(salinity, numpy.ndarray))

                assertions(numpy.nanmin(salinity) >
                           0.0)  # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(
                        len(first_salinity_values) == len(
                            second_salinity_values))
                    for idx in range(0, len(first_salinity_values)):
                        assertions(first_salinity_values[idx] *
                                   2.0 == second_salinity_values[idx])
    def execute(self, granule):
        """
        Example process to double the salinity value
        """

        #  pull data from a granule
        psd = PointSupplementStreamParser(
            stream_definition=self.incoming_stream_def, stream_granule=granule)

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        salinity = psd.get_values('salinity')

        salinity *= 2.0

        print('Doubled salinity: %s' % str(salinity))

        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(
            point_definition=self.outgoing_stream_def,
            stream_id=self.streams['output'])

        for i in xrange(len(salinity)):
            point_id = psc.add_point(time=time[i],
                                     location=(longitude[i], latitude[i],
                                               height[i]))
            psc.add_scalar_point_coverage(point_id=point_id,
                                          coverage_id='salinity',
                                          value=salinity[i])

        return psc.close_stream_granule()
Example #8
0
    def process(self, packet):

        """Processes incoming data!!!!
        """

        # Use the PointSupplementStreamParser to pull data from a granule
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=packet)


        conductivity = psd.get_values('conductivity')
        pressure = psd.get_values('pressure')
        temperature = psd.get_values('temperature')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got conductivity: %s' % str(conductivity))
        log.warn('Got pressure: %s' % str(pressure))
        log.warn('Got temperature: %s' % str(temperature))

    

        # do L0 scaling here.....


        # Use the constructor to put data into a granule

        psc_conductivity = PointSupplementConstructor(point_definition=self.outgoing_stream_conductivity, stream_id=self.streams['conductivity'])

        psc_pressure = PointSupplementConstructor(point_definition=self.outgoing_stream_pressure, stream_id=self.streams['pressure'])

        psc_temperature = PointSupplementConstructor(point_definition=self.outgoing_stream_temperature, stream_id=self.streams['temperature'])

        ### The stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(conductivity)):
            point_id = psc_conductivity.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc_conductivity.add_scalar_point_coverage(point_id=point_id, coverage_id='conductivity', value=conductivity[i])
        self.conductivity.publish(psc_conductivity.close_stream_granule())

        for i in xrange(len(pressure)):
            point_id = psc_pressure.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc_pressure.add_scalar_point_coverage(point_id=point_id, coverage_id='pressure', value=pressure[i])
        self.pressure.publish(psc_pressure.close_stream_granule())

        for i in xrange(len(temperature)):
            point_id = psc_temperature.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc_temperature.add_scalar_point_coverage(point_id=point_id, coverage_id='temperature', value=temperature[i])
        self.temperature.publish(psc_temperature.close_stream_granule())

        return
Example #9
0
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(
            stream_definition=self.incoming_stream_def, stream_granule=granule)

        conductivity = psd.get_values('conductivity')
        pressure = psd.get_values('pressure')
        temperature = psd.get_values('temperature')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got conductivity: %s' % str(conductivity))
        log.warn('Got pressure: %s' % str(pressure))
        log.warn('Got temperature: %s' % str(temperature))

        sp = SP_from_cndr(r=conductivity / cte.C3515,
                          t=temperature,
                          p=pressure)

        sa = SA_from_SP(sp, pressure, longitude, latitude)

        density = rho(sa, temperature, pressure)

        log.warn('Got density: %s' % str(density))

        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(
            point_definition=self.outgoing_stream_def,
            stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(density)):
            point_id = psc.add_point(time=time[i],
                                     location=(longitude[i], latitude[i],
                                               height[i]))
            psc.add_scalar_point_coverage(point_id=point_id,
                                          coverage_id='density',
                                          value=density[i])

        return psc.close_stream_granule()
    def execute(self, granule):
        """
        Example process to double the salinity value
        """

        #  pull data from a granule
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        salinity = psd.get_values('salinity')

        salinity *= 2.0

        print ('Doubled salinity: %s' % str(salinity))


        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])

        for i in xrange(len(salinity)):
            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='salinity', value=salinity[i])

        return psc.close_stream_granule()
    def execute(self, granule):
        """Processes incoming data!!!!
        """
        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)

        conductivity = psd.get_values('conductivity')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got conductivity: %s' % str(conductivity))


        # The L1 conductivity data product algorithm takes the L0 conductivity data product and converts it
        # into Siemens per meter (S/m)
        #    SBE 37IM Output Format 0
        #    1) Standard conversion from 5-character hex string to decimal
        #    2)Scaling
        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(conductivity)):
            scaled_conductivity =  ( conductivity[i] / 100000.0 ) - 0.5
            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='conductivity', value=scaled_conductivity)

        return psc.close_stream_granule()
    def process(self, packet):
        log.debug('(%s): Received Viz Data Packet' % self.name)
        #log.debug('(%s):   - Processing: %s' % (self.name,packet))

        # parse the incoming data
        psd = PointSupplementStreamParser(
            stream_definition=self.stream_def.container, stream_granule=packet)

        # re-arrange incoming data into an easy to parse dictionary
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        if self.initDataFlag:
            # look at the incoming packet and store
            for varname in psd.list_field_names():
                self.lock.acquire()
                self.graph_data[varname] = []
                self.lock.release()

            self.initDataFlag = False

        # If code reached here, the graph data storage has been initialized. Just add values
        # to the list
        with self.lock:
            for varname in psd.list_field_names():
                self.graph_data[varname].extend(vardict[varname])
Example #13
0
    def get_last_value(self,granule):

        stream_resource_id = granule.stream_resource_id
        if not self.def_cache.has_key(stream_resource_id):
            stream_def = self.ps_cli.find_stream_definition(stream_id=stream_resource_id, id_only=False)
            self.def_cache[stream_resource_id] = stream_def.container

        definition = self.def_cache[stream_resource_id]

        psp = PointSupplementStreamParser(stream_definition=definition, stream_granule=granule)
        fields = psp.list_field_names()


        lu = LastUpdate()
        lu.timestamp = granule.identifiables[granule.data_stream_id].timestamp.value
        for field in fields:
            range_id = definition.identifiables[field].range_id
            lu.variables[field] = Variable()
            if definition.identifiables.has_key(field):
                lu.variables[field].definition = definition.identifiables[field].definition
            if definition.identifiables.has_key(range_id):
                lu.variables[field].units = definition.identifiables[range_id].unit_of_measure.code
            lu.variables[field].value = float(psp.get_values(field_name=field)[-1])
        return lu
Example #14
0
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)


        conductivity = psd.get_values('conductivity')
        pressure = psd.get_values('pressure')
        temperature = psd.get_values('temperature')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')



        log.warn('Got conductivity: %s' % str(conductivity))
        log.warn('Got pressure: %s' % str(pressure))
        log.warn('Got temperature: %s' % str(temperature))


        sp = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure)

        sa = SA_from_SP(sp, pressure, longitude, latitude)

        density = rho(sa, temperature, pressure)

        log.warn('Got density: %s' % str(density))

        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(density)):
            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='density', value=density[i])

        return psc.close_stream_granule()
Example #15
0
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)


        conductivity = psd.get_values('conductivity')
        pressure = psd.get_values('pressure')
        temperature = psd.get_values('temperature')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')



        log.warn('Got conductivity: %s' % str(conductivity))
        log.warn('Got pressure: %s' % str(pressure))
        log.warn('Got temperature: %s' % str(temperature))


        salinity = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure)

        log.warn('Got salinity: %s' % str(salinity))


        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])

        for i in xrange(len(salinity)):
            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='salinity', value=salinity[i])

        return psc.close_stream_granule()
Example #16
0
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(
            stream_definition=self.incoming_stream_def, stream_granule=granule)

        pressure = psd.get_values('pressure')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got pressure: %s' % str(pressure))

        # L1
        # 1) The algorithm input is the L0 pressure data product (p_hex) and, in the case of the SBE 37IM, the pressure range (P_rng) from metadata.
        # 2) Convert the hexadecimal string to a decimal string
        # 3) For the SBE 37IM only, convert the pressure range (P_rng) from psia to dbar SBE 37IM
        #    Convert P_rng (input from metadata) from psia to dbar
        # 4) Perform scaling operation
        #    SBE 37IM
        #    L1 pressure data product (in dbar):

        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(
            point_definition=self.outgoing_stream_def,
            stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(pressure)):
            #todo: get pressure range from metadata (if present) and include in calc
            scaled_pressure = (pressure[i])
            point_id = psc.add_point(time=time[i],
                                     location=(longitude[i], latitude[i],
                                               height[i]))
            psc.add_scalar_point_coverage(point_id=point_id,
                                          coverage_id='pressure',
                                          value=scaled_pressure)

        return psc.close_stream_granule()
Example #17
0
    def _validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:

            try:
                psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message)
                temp = psd.get_values('temperature')
                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info( 'temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)
                log.info( psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = psd.get_values('salinity')
                log.info( 'salinity=' + str(numpy.nanmin(salinity)))

                assertions(isinstance(salinity, numpy.ndarray))

                assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(len(first_salinity_values) == len(second_salinity_values))
                    for idx in range(0,len(first_salinity_values)):
                        assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])
Example #18
0
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(
            stream_definition=self.incoming_stream_def, stream_granule=granule)

        temperature = psd.get_values('temperature')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got temperature: %s' % str(temperature))

        # The L1 temperature data product algorithm takes the L0 temperature data product and converts it into Celcius.
        # Once the hexadecimal string is converted to decimal, only scaling (dividing by a factor and adding an offset) is
        # required to produce the correct decimal representation of the data in Celsius.
        # The scaling function differs by CTD make/model as described below.
        #    SBE 37IM, Output Format 0
        #    1) Standard conversion from 5-character hex string (Thex) to decimal (tdec)
        #    2) Scaling: T [C] = (tdec / 10,000) - 10

        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(
            point_definition=self.outgoing_stream_def,
            stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(temperature)):
            scaled_temperature = (temperature[i] / 10000.0) - 10
            point_id = psc.add_point(time=time[i],
                                     location=(longitude[i], latitude[i],
                                               height[i]))
            psc.add_scalar_point_coverage(point_id=point_id,
                                          coverage_id='temperature',
                                          value=scaled_temperature)

        return psc.close_stream_granule()
    def execute(self, granule):
        """Processes incoming data!!!!
        """
        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(
            stream_definition=self.incoming_stream_def, stream_granule=granule)

        conductivity = psd.get_values('conductivity')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got conductivity: %s' % str(conductivity))

        # The L1 conductivity data product algorithm takes the L0 conductivity data product and converts it
        # into Siemens per meter (S/m)
        #    SBE 37IM Output Format 0
        #    1) Standard conversion from 5-character hex string to decimal
        #    2)Scaling
        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(
            point_definition=self.outgoing_stream_def,
            stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(conductivity)):
            scaled_conductivity = (conductivity[i] / 100000.0) - 0.5
            point_id = psc.add_point(time=time[i],
                                     location=(longitude[i], latitude[i],
                                               height[i]))
            psc.add_scalar_point_coverage(point_id=point_id,
                                          coverage_id='conductivity',
                                          value=scaled_conductivity)

        return psc.close_stream_granule()
Example #20
0
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)


        pressure = psd.get_values('pressure')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got pressure: %s' % str(pressure))


        # L1
        # 1) The algorithm input is the L0 pressure data product (p_hex) and, in the case of the SBE 37IM, the pressure range (P_rng) from metadata.
        # 2) Convert the hexadecimal string to a decimal string
        # 3) For the SBE 37IM only, convert the pressure range (P_rng) from psia to dbar SBE 37IM
        #    Convert P_rng (input from metadata) from psia to dbar
        # 4) Perform scaling operation
        #    SBE 37IM
        #    L1 pressure data product (in dbar):


        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(pressure)):
            #todo: get pressure range from metadata (if present) and include in calc
            scaled_pressure = ( pressure[i])
            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='pressure', value=scaled_pressure)

        return psc.close_stream_granule()
    def execute(self, granule):
        """Processes incoming data!!!!
        """

        # Use the deconstructor to pull data from a granule
        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)


        temperature = psd.get_values('temperature')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got temperature: %s' % str(temperature))


        # The L1 temperature data product algorithm takes the L0 temperature data product and converts it into Celcius.
        # Once the hexadecimal string is converted to decimal, only scaling (dividing by a factor and adding an offset) is
        # required to produce the correct decimal representation of the data in Celsius.
        # The scaling function differs by CTD make/model as described below.
        #    SBE 37IM, Output Format 0
        #    1) Standard conversion from 5-character hex string (Thex) to decimal (tdec)
        #    2) Scaling: T [C] = (tdec / 10,000) - 10

        # Use the constructor to put data into a granule
        psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
        ### Assumes the config argument for output streams is known and there is only one 'output'.
        ### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(temperature)):
            scaled_temperature = ( temperature[i] / 10000.0) - 10
            point_id = psc.add_point(time=time[i],location=(longitude[i],latitude[i],height[i]))
            psc.add_scalar_point_coverage(point_id=point_id, coverage_id='temperature', value=scaled_temperature)

        return psc.close_stream_granule()
Example #22
0
    def execute(self, granule):

        log.debug('(Google DT transform): Received Viz Data Packet' )

        self.dataDescription = []
        self.dataTableContent = []
        element_count_id = 0
        expected_range = []

        # NOTE : Detect somehow that this is a replay stream with a set number of expected granules. Based on this
        #       calculate the number of expected records and set the self.realtime_window_size bigger or equal to this
        #       number.



        psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule)
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])


        #iinit the dataTable
        # create data description from the variables in the message
        self.dataDescription = [('time', 'datetime', 'time')]

        # split the data string to extract variable names
        for varname in psd.list_field_names():
            if varname == 'time':
                continue

            self.dataDescription.append((varname, 'number', varname))


        # Add the records to the datatable
        for i in xrange(arrLen):
            varTuple = []

            for varname,_,_ in self.dataDescription:
                val = float(vardict[varname][i])
                if varname == 'time':
                    #varTuple.append(datetime.fromtimestamp(val))
                    varTuple.append(val)
                else:
                    varTuple.append(val)

            # Append the tuples to the data table
            self.dataTableContent.append (varTuple)

            # Maintain a sliding window for realtime transform processes
            if len(self.dataTableContent) > self.realtime_window_size:
                # always pop the first element till window size is what we want
                while len(self.dataTableContent) > realtime_window_size:
                    self.dataTableContent.pop(0)


        """ To Do : Do we need to figure out the how many granules have been received for a replay stream ??

        if not self.realtime_flag:
            # This is the historical view part. Make a note of how many records were received
            in_data_stream_id = self.incoming_stream_def.data_stream_id
            element_count_id = self.incoming_stream_def.identifiables[in_data_stream_id].element_count_id
            # From each granule you can check the constraint on the number of records
            expected_range = granule.identifiables[element_count_id].constraint.intervals[0]

            # The number of records in a given packet is:
            self.total_num_of_records_recvd += packet.identifiables[element_count_id].value

        """

        # define an output container of data


        # submit the partial datatable to the viz service
        rdt = RecordDictionaryTool(taxonomy=tx)

        # submit resulting table back using the out stream publisher. The data_product_id is the input dp_id
        # responsible for the incoming data
        msg = {"viz_product_type": "google_realtime_dt",
               "data_product_id": "FAKE_DATAPRODUCT_ID_0000",
               "data_table_description": self.dataDescription,
               "data_table_content": self.dataTableContent}

        rdt['google_dt_components'] = numpy.array([msg])

        log.debug('Google DT transform: Sending a granule')
        out_granule = build_granule(data_producer_id='google_dt_transform', taxonomy=tx, record_dictionary=rdt)

        #self.publish(out_granule)

        # clear the tuple for future use
        self.varTuple[:] = []

        return out_granule
Example #23
0
    def process(self, packet):
        """Processes incoming data!!!!
        """

        # Use the PointSupplementStreamParser to pull data from a granule
        psd = PointSupplementStreamParser(
            stream_definition=self.incoming_stream_def, stream_granule=packet)

        conductivity = psd.get_values('conductivity')
        pressure = psd.get_values('pressure')
        temperature = psd.get_values('temperature')

        longitude = psd.get_values('longitude')
        latitude = psd.get_values('latitude')
        height = psd.get_values('height')
        time = psd.get_values('time')

        log.warn('Got conductivity: %s' % str(conductivity))
        log.warn('Got pressure: %s' % str(pressure))
        log.warn('Got temperature: %s' % str(temperature))

        # do L0 scaling here.....

        # Use the constructor to put data into a granule

        psc_conductivity = PointSupplementConstructor(
            point_definition=self.outgoing_stream_conductivity,
            stream_id=self.streams['conductivity'])

        psc_pressure = PointSupplementConstructor(
            point_definition=self.outgoing_stream_pressure,
            stream_id=self.streams['pressure'])

        psc_temperature = PointSupplementConstructor(
            point_definition=self.outgoing_stream_temperature,
            stream_id=self.streams['temperature'])

        ### The stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
        ### application level like this!

        for i in xrange(len(conductivity)):
            point_id = psc_conductivity.add_point(time=time[i],
                                                  location=(longitude[i],
                                                            latitude[i],
                                                            height[i]))
            psc_conductivity.add_scalar_point_coverage(
                point_id=point_id,
                coverage_id='conductivity',
                value=conductivity[i])
        self.conductivity.publish(psc_conductivity.close_stream_granule())

        for i in xrange(len(pressure)):
            point_id = psc_pressure.add_point(time=time[i],
                                              location=(longitude[i],
                                                        latitude[i],
                                                        height[i]))
            psc_pressure.add_scalar_point_coverage(point_id=point_id,
                                                   coverage_id='pressure',
                                                   value=pressure[i])
        self.pressure.publish(psc_pressure.close_stream_granule())

        for i in xrange(len(temperature)):
            point_id = psc_temperature.add_point(time=time[i],
                                                 location=(longitude[i],
                                                           latitude[i],
                                                           height[i]))
            psc_temperature.add_scalar_point_coverage(
                point_id=point_id,
                coverage_id='temperature',
                value=temperature[i])
        self.temperature.publish(psc_temperature.close_stream_granule())

        return
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue


        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------


        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'



        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream')



        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.ctd_stream_publisher',
            'class':'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class':'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)



        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=1
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)



        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id)


        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = ctd_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------


        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(stream_definition_id=sal_stream_def_id)


        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = sal_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service



        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query = StreamQuery(stream_ids=[ctd_stream_id,]),
            exchange_name='salinity_transform_input') # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={'output':sal_stream_id,},
            process_definition_id = salinity_transform_procdef_id,
            # no configuration needed at this time...
            )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(transform_id=sal_transform_id)



        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)


        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([sal_stream_id,]),
            exchange_name = 'salinity_test',
            name = "test salinity subscription",
            )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node)

        result = gevent.event.AsyncResult()
        results = []
        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) >3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(subscription_id=salinity_subscription_id)


        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data



        for message in results:

            psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())


            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
    def process(self, packet):

        log.debug('(%s): Received Viz Data Packet' % (self.name) )

        element_count_id = 0
        expected_range = []

        psd = PointSupplementStreamParser(stream_definition=self.stream_def, stream_granule=packet)
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])


        #if its the first time, init the dataTable
        if self.initDataTableFlag:
            # create data description from the variables in the message
            self.dataDescription = [('time', 'datetime', 'time')]

            # split the data string to extract variable names
            for varname in psd.list_field_names():
                if varname == 'time':
                    continue

                self.dataDescription.append((varname, 'number', varname))

            self.initDataTableFlag = False


        # Add the records to the datatable
        for i in xrange(arrLen):
            varTuple = []

            for varname,_,_ in self.dataDescription:
                val = float(vardict[varname][i])
                if varname == 'time':
                    varTuple.append(datetime.fromtimestamp(val))
                else:
                    varTuple.append(val)

            # Append the tuples to the data table
            self.dataTableContent.append (varTuple)

            if self.realtime_flag:
                # Maintain a sliding window for realtime transform processes
                realtime_window_size = 100
                if len(self.dataTableContent) > realtime_window_size:
                    # always pop the first element till window size is what we want
                    while len(self.dataTableContent) > realtime_window_size:
                        self.dataTableContent.pop(0)


        if not self.realtime_flag:
            # This is the historical view part. Make a note of now many records were received
            data_stream_id = self.stream_def.data_stream_id
            element_count_id = self.stream_def.identifiables[data_stream_id].element_count_id
            # From each granule you can check the constraint on the number of records
            expected_range = packet.identifiables[element_count_id].constraint.intervals[0]

            # The number of records in a given packet is:
            self.total_num_of_records_recvd += packet.identifiables[element_count_id].value


        # submit the Json version of the datatable to the viz service
        if self.realtime_flag:
        # create the google viz data table
            data_table = gviz_api.DataTable(self.dataDescription)
            data_table.LoadData(self.dataTableContent)

            # submit resulting table back using the out stream publisher
            msg = {"viz_product_type": "google_realtime_dt",
                   "data_product_id": self.data_product_id,
                   "data_table": data_table.ToJSonResponse() }
            self.out_stream_pub.publish(msg)
        else:
            # Submit table back to the service if we received all the replay data
            if self.total_num_of_records_recvd == (expected_range[1] + 1):
                # If the datatable received was too big, decimate on the fly to a fixed size
                max_google_dt_len = 1024
                if len(self.dataTableContent) > max_google_dt_len:
                    decimation_factor = int(math.ceil(len(self.dataTableContent) / (max_google_dt_len)))

                    for i in xrange(len(self.dataTableContent) - 1, 0, -1):

                        if(i % decimation_factor == 0):
                            continue
                        self.dataTableContent.pop(i)

                data_table = gviz_api.DataTable(self.dataDescription)
                data_table.LoadData(self.dataTableContent)

                # submit resulting table back using the out stream publisher
                msg = {"viz_product_type": "google_dt",
                       "data_product_id_token": self.data_product_id_token,
                       "data_table": data_table.ToJSonResponse() }
                self.out_stream_pub.publish(msg)
                return


        # clear the tuple for future use
        self.varTuple[:] = []
Example #26
0
    def test_workflow_components(self):

        cc = self.container
        assertions = self.assertTrue


        #-------------------------------
        # Create CTD Parsed as the initial data product
        #-------------------------------
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')


        print 'Creating new CDM data product with a stream definition'
        dp_obj = IonObject(RT.DataProduct,name='ctd_parsed',description='ctd stream test')
        try:
            ctd_parsed_data_product = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id)
        except Exception as ex:
            self.fail("failed to create new data product: %s" %ex)

        print 'new ctd_parsed_data_product_id = ', ctd_parsed_data_product

        instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" )
        instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)


        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product)

        self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product, persist_data=True, persist_metadata=True)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        ctd_stream_id = stream_ids[0]

        ###
        ###  Setup the first transformation
        ###

        # Salinity: Data Process Definition
        log.debug("Create data process definition SalinityTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='ctd_salinity',
            description='create a salinity data product',
            module='ion.processes.data.transforms.ctd.ctd_L2_salinity',
            class_name='SalinityTransform',
            process_source='SalinityTransform source code here...')
        try:
            ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Excpetion as ex:
            self.fail("failed to create new SalinityTransform data process definition: %s" %ex)


        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityTransform.outgoing_stream_def,  name='L2_salinity')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(sal_stream_def_id, ctd_L2_salinity_dprocdef_id )

        # Create the output data product of the transform
        log.debug("create output data product L2 Salinity")
        ctd_l2_salinity_output_dp_obj = IonObject(RT.DataProduct, name='L2_Salinity',description='transform output L2 salinity')
        ctd_l2_salinity_output_dp_id = self.dataproductclient.create_data_product(ctd_l2_salinity_output_dp_obj, sal_stream_def_id)
        self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_l2_salinity_output_dp_id, persist_data=True, persist_metadata=True)


        # Create the Salinity transform data process
        log.debug("create L2_salinity data_process and start it")
        try:
            l2_salinity_all_data_process_id = self.dataprocessclient.create_data_process(ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product, {'output':ctd_l2_salinity_output_dp_id})
            self.dataprocessclient.activate_data_process(l2_salinity_all_data_process_id)
        except BadRequest as ex:
            self.fail("failed to create new data process: %s" %ex)

        log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return")



        ###
        ###  Setup the second transformation
        ###

        # Salinity: Data Process Definition
        log.debug("Create data process definition SalinityDoublerTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='salinity_doubler',
            description='create a salinity doubler data product',
            module='ion.processes.data.transforms.example_double_salinity',
            class_name='SalinityDoubler',
            process_source='SalinityDoubler source code here...')
        try:
            salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new SalinityDoubler data process definition: %s" %ex)



        # create a stream definition for the data from the salinity Transform
        salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityDoubler.outgoing_stream_def,  name='SalinityDoubler')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(salinity_double_stream_def_id, salinity_doubler_dprocdef_id )

        # Create the output data product of the transform
        log.debug("create output data product SalinityDoubler")
        salinity_doubler_output_dp_obj = IonObject(RT.DataProduct, name='SalinityDoubler',description='transform output salinity doubler')
        salinity_doubler_output_dp_id = self.dataproductclient.create_data_product(salinity_doubler_output_dp_obj, salinity_double_stream_def_id)
        self.dataproductclient.activate_data_product_persistence(data_product_id=salinity_doubler_output_dp_id, persist_data=True, persist_metadata=True)


        # Create the Salinity transform data process
        log.debug("create L2_salinity data_process and start it")
        try:
            salinity_double_data_process_id = self.dataprocessclient.create_data_process(salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id, {'output':salinity_doubler_output_dp_id})
            self.dataprocessclient.activate_data_process(salinity_double_data_process_id)
        except BadRequest as ex:
            self.fail("failed to create new data process: %s" %ex)

        log.debug("test_createTransformsThenActivateInstrument: create L2_salinity data_process return")









        ###
        ### Start the process for producing the CTD data
        ###
        # process definition for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.ctd_stream_publisher',
            'class':'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)

        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
                }
        }
        ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)




        ## get the stream id for the transform outputs
        stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_stream_id = stream_ids[0]

        stream_ids, _ = self.rrclient.find_objects(salinity_doubler_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_dbl_stream_id = stream_ids[0]


        ###
        ### Make a subscriber in the test to listen for transformed data
        ###
        salinity_subscription_id = self.pubsubclient.create_subscription(
            query=StreamQuery([ctd_stream_id, sal_stream_id,sal_dbl_stream_id]),
            exchange_name = 'salinity_test',
            name = "test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node)

        result = gevent.event.AsyncResult()
        results = []
        def message_received(message, headers):
            # Heads
            log.warn(' data received!')
            results.append(message)
            if len(results) >15:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id)


        # Assert that we have received data
        assertions(result.get(timeout=20))

        #Stop the transform process

        # stop the flow parse the messages...
        self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data


        first_salinity_values = None

        for message in results:

            try:
                psd = PointSupplementStreamParser(stream_definition=ctd_stream_def, stream_granule=message)
                temp = psd.get_values('temperature')
                print psd.list_field_names()
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                print 'temperature=' + str(numpy.nanmin(temp))

                first_salinity_values = None

            else:
                psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)
                print psd.list_field_names()

                # Test the handy info method for the names of fields in the stream def
                assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = psd.get_values('salinity')
                print 'salinity=' + str(numpy.nanmin(salinity))

                assertions(isinstance(salinity, numpy.ndarray))

                assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(len(first_salinity_values) == len(second_salinity_values))
                    for idx in range(0,len(first_salinity_values)):
                        assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])
Example #27
0
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(
            container=SalinityTransform.outgoing_stream_def,
            name='Scalar Salinity data stream')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.ctd_stream_publisher',
            'class': 'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class': 'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=ctd_stream_def_id)

        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------

        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=sal_stream_def_id)

        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=sal_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery(stream_ids=[
                ctd_stream_id,
            ]),
            exchange_name='salinity_transform_input'
        )  # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={
                'output': sal_stream_id,
            },
            process_definition_id=salinity_transform_procdef_id,
            # no configuration needed at this time...
        )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(
            transform_id=sal_transform_id)

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                sal_stream_id,
            ]),
            exchange_name='salinity_test',
            name="test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        for message in results:

            psd = PointSupplementStreamParser(
                stream_definition=SalinityTransform.outgoing_stream_def,
                stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())

            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) >
                       0.0)  # salinity should always be greater than 0
    def process(self, packet):

        log.debug('(%s): Received Viz Data Packet' % (self.name))

        element_count_id = 0
        expected_range = []

        psd = PointSupplementStreamParser(stream_definition=self.stream_def,
                                          stream_granule=packet)
        vardict = {}
        arrLen = None
        for varname in psd.list_field_names():
            vardict[varname] = psd.get_values(varname)
            arrLen = len(vardict[varname])

        #if its the first time, init the dataTable
        if self.initDataTableFlag:
            # create data description from the variables in the message
            self.dataDescription = [('time', 'datetime', 'time')]

            # split the data string to extract variable names
            for varname in psd.list_field_names():
                if varname == 'time':
                    continue

                self.dataDescription.append((varname, 'number', varname))

            self.initDataTableFlag = False

        # Add the records to the datatable
        for i in xrange(arrLen):
            varTuple = []

            for varname, _, _ in self.dataDescription:
                val = float(vardict[varname][i])
                if varname == 'time':
                    varTuple.append(datetime.fromtimestamp(val))
                else:
                    varTuple.append(val)

            # Append the tuples to the data table
            self.dataTableContent.append(varTuple)

            if self.realtime_flag:
                # Maintain a sliding window for realtime transform processes
                realtime_window_size = 100
                if len(self.dataTableContent) > realtime_window_size:
                    # always pop the first element till window size is what we want
                    while len(self.dataTableContent) > realtime_window_size:
                        self.dataTableContent.pop(0)

        if not self.realtime_flag:
            # This is the historical view part. Make a note of now many records were received
            data_stream_id = self.stream_def.data_stream_id
            element_count_id = self.stream_def.identifiables[
                data_stream_id].element_count_id
            # From each granule you can check the constraint on the number of records
            expected_range = packet.identifiables[
                element_count_id].constraint.intervals[0]

            # The number of records in a given packet is:
            self.total_num_of_records_recvd += packet.identifiables[
                element_count_id].value

        # submit the Json version of the datatable to the viz service
        if self.realtime_flag:
            # create the google viz data table
            data_table = gviz_api.DataTable(self.dataDescription)
            data_table.LoadData(self.dataTableContent)

            # submit resulting table back using the out stream publisher
            msg = {
                "viz_product_type": "google_realtime_dt",
                "data_product_id": self.data_product_id,
                "data_table": data_table.ToJSonResponse()
            }
            self.out_stream_pub.publish(msg)
        else:
            # Submit table back to the service if we received all the replay data
            if self.total_num_of_records_recvd == (expected_range[1] + 1):
                # If the datatable received was too big, decimate on the fly to a fixed size
                max_google_dt_len = 1024
                if len(self.dataTableContent) > max_google_dt_len:
                    decimation_factor = int(
                        math.ceil(
                            len(self.dataTableContent) / (max_google_dt_len)))

                    tempDataTableContent = []
                    for i in xrange(0, len(self.dataTableContent),
                                    decimation_factor):
                        # check limits
                        if i >= len(self.dataTableContent):
                            break

                        tempDataTableContent.append(self.dataTableContent[i])

                    self.dataTableContent = tempDataTableContent

                data_table = gviz_api.DataTable(self.dataDescription)
                data_table.LoadData(self.dataTableContent)

                # submit resulting table back using the out stream publisher
                msg = {
                    "viz_product_type": "google_dt",
                    "data_product_id_token": self.data_product_id_token,
                    "data_table": data_table.ToJSonResponse()
                }
                self.out_stream_pub.publish(msg)
                return

        # clear the tuple for future use
        self.varTuple[:] = []